Java压缩之LZW算法字典压缩与解压讲解

压缩过程：

前面已经写过一篇哈夫曼压缩，lzw字典压缩与哈夫曼压缩的不同之处在于不需要把编码写入文件，编码表是在读文件中生成的，首先将0-255个ascll码与对应的数字存入哈希表中，作为基础码表。

这里的后缀为当前

前缀+后缀如果在码表中存在，前缀等于前缀+后缀。如果不存在，将前缀+后缀所表示的字符串写入编码表编码，同时将前缀写入压缩文件中。这里重点注意一下，一个字节所能表示的数字范围为0-255，所以我们将一个字符的编码变成两个字节写进去，分别写入它的高八位和低八位，比如256即为00000001 11111111 这里用到dataoutputstream dos对象中的 dos.writechar(256)方法。

两个字节所能表示的范围为0-65535。当我们的编码超过这份范围，就需要重置编码表，再重新编码。

解压过程

cw表示读取的到的字符，pw为上一行的cw，cw再编码表中存在：p→pw，c→cw的第一个字符，输出cw。cw在编码表中不存在，p→pw，c→pw的第一字符输出p+c。

当我们读到65535的时候，就重置码表，重新编码。

代码部分

								
									 public   class   yasuo { 

									    private   int   bianma =   256  ;  // 编码 

									    private   string perfix =   ""  ;  // 前缀 

									    private   string suffix =   ""  ;  // 后缀 

									    private   string zhongjian =   ""  ;  // 中间变量 

									    hashmap<string, integer> hm =   new   hashmap<string, integer>();  // 编码表 

									    private   static   string path =   "d:\\java\\字典压缩\\zidianyasuo.txt"  ;  // 要压缩的文件 

									    private   static   string path2 =   "d:\\java\\字典压缩\\yasuo.txt"  ;  // 解压后的文件 

									    private   static   string path3 =   "d:\\java\\字典压缩\\jieya.txt"  ;  // 解压后的文件 

									    public   static   void   main(string[] args)   throws   ioexception { 

									    /** 

									     * 压缩 

									     */ 

									    yasuo yasuo =   new   yasuo(); 

									    yasuo.yasuo(); 

									    /** 

									     * 解压 

									     */ 

									    jieya jie =   new   jieya(); 

									    jie.jieya(path2,path3); 

									    } 

									    public   void   yasuo()   throws   ioexception { 

									    // 创建文件输入流 

									    inputstream is =   new   fileinputstream(path); 

									    byte  [] buffer =   new   byte  [is.available()];  // 创建缓存区域 

									    is.read(buffer);  // 读入所有的文件字节 

									    string str =   new   string(buffer);  // 对字节进行处理 

									    is.close();   // 关闭流 

									    // 创建文件输出流 

									    outputstream os =   new   fileoutputstream(path2); 

									    dataoutputstream dos =   new   dataoutputstream(os); 

									 // system.out.println(str); 

									    // 把最基本的256个ascll码放编码表中 

									    for   (  int   i =   0  ; i <   256  ; i++) { 

									     char   ch = (  char  ) i; 

									     string st = ch +   ""  ; 

									     hm.put(st, i); 

									    } 

									    for   (  int   i =   0  ; i < str.length(); i++) { 

									     if  (bianma==  65535  ){ 

									     system.out.println(  "重置"  ); 

									     dos.writechar(  65535  );  //写出一个-1作为重置的表示与码表的打印 

									     hm.clear();  //清空hashmap 

									     for   (  int   j =   0  ; j <   256  ; j++) {  //重新将基本256个编码写入 

									      char   ch = (  char  ) j; 

									      string st = ch +   ""  ; 

									      hm.put(st, j); 

									     } 

									     perfix=  ""  ; 

									     bianma=  0  ; 

									     } 

									     char   ch = str.charat(i); 

									     string s = ch +   ""  ; 

									     suffix = s; 

									     zhongjian = perfix + suffix; 

									     if   (hm.get(zhongjian) ==   null  ) {  // 如果码表中没有 前缀加后缀的码表 

									 //  system.out.print(zhongjian); 

									 //  system.out.println(" 对应的编码为 " + bianma); 

									     hm.put(zhongjian, bianma);  // 向码表添加 前缀加后缀 和 对应的编码 

									 //  system.out.println(" " + perfix); 

									 //  system.out.println("写入的编码 "+hm.get(perfix)); 

									     dos.writechar(hm.get(perfix));   // 把前缀写入压缩文件 

									     bianma++; 

									     perfix = suffix; 

									     }   else   {  // 如果有下一个前缀保存 上一个前缀加后缀 

									     perfix = zhongjian; 

									     } 

									     if   (i == str.length() -   1  ) {  // 把最后一个写进去 

									 //  system.out.print("写入最后一个"+perfix); 

									     dos.writechar(hm.get(perfix)); 

									 //  system.out.println("   "+hm.get(perfix)); 

									     } 

									    } 

									    os.close();  // 关闭流 

									 // system.out.println(hm.tostring());// 输出码表 

									    } 

									 }

								
									 public   class   jieya { 

									    private   arraylist<integer> list =   new   arraylist<integer>();  // 存高八位 

									    private   int   count =   0  ;  // 下标 

									    private   arraylist<integer> numlist =   new   arraylist<>();  // 存编码 

									    hashmap<string, integer> hm =   new   hashmap<>();  // 编码表 

									    hashmap<integer, string> hm1 =   new   hashmap<>();  // 编码表 

									    private   string cw =   ""  ; 

									    private   string pw =   ""  ; 

									    private   string p =   ""  ; 

									    private   string c =   ""  ; 

									    private   int   bianma =   256  ; 

									    public   void   jieya(string path, string path1)   throws   ioexception { 

									    // 读取压缩文件 

									    inputstream is =   new   fileinputstream(path); 

									    byte  [] buffer =   new   byte  [is.available()]; 

									    is.read(buffer); 

									    is.close();  // 关闭流 

									    string str =   new   string(buffer); 

									    // system.out.println(str); 

									    // 读高八位 把高八位所表示的数字放入list中 

									    for   (  int   i =   0  ; i < buffer.length; i +=   2  ) { 

									     int   a = buffer[i]; 

									     list.add(a);  // 高八位存入list列表中 

									    } 

									    for   (  int   i =   1  ; i < buffer.length; i +=   2  ) {  // 读低八位 

									     // system.out.println(list.get(count)+"---"); 

									     if   (buffer[i] == -  1   && buffer[i -   1  ] == -  1  ) { 

									     numlist.add(  65535  ); 

									     }   else   { 

									     // system.out.println(i); 

									     if   (list.get(count) >   0  ) {  // 如果低八位对应的高八位为1 

									      if   (buffer[i] <   0  ) { 

									      int   a = buffer[i] +   256   +   256   * list.get(count); 

									      // buffer[i]+=256+256*list.get(count); 

									      numlist.add(a);  // 存入numlist中 

									      }   else   { 

									      int   a = buffer[i] +   256   * (list.get(count)); 

									      // system.out.println(buffer[i]+" "+a + "+++"); 

									      numlist.add(a);  // 存入numlist中 

									      } 

									     }   else   {  // 高八位为0 

									      // system.out.println(buffer[i]); 

									      numlist.add((  int  ) buffer[i]);  // 存入numlist中 

									     } 

									     count++; 

									     } 

									    } 

									    // system.out.println(list.size()+" "+count+" "+numlist.size()+"比较大小"+" 

									    // "+buffer.length); 

									    // for(int i=0;i<numlist.size();i++){ 

									    // system.out.println(numlist.get(i)+"p"); 

									    // } 

									    /** 

									     * 把0-255位字符编码 

									     */ 

									    for   (  int   i =   0  ; i <   256  ; i++) { 

									     char   ch = (  char  ) i; 

									     string st = ch +   ""  ; 

									     hm.put(st, i); 

									     hm1.put(i, st); 

									    } 

									    /** 

									     * 根据numlist队列中的元素开始重新编码，输出文件 

									     */ 

									    // 创建输出流 

									    outputstream os =   new   fileoutputstream(path1); 

									    // 遍历numlist 

									    for   (  int   i =   0  ; i < numlist.size(); i++) { 

									     int   n = numlist.get(i); 

									     if   (hm.containsvalue(n) ==   true  ) {  // 如果编码表中存在 

									     cw = hm1.get(n); 

									     // system.out.println(cw+"*"); 

									     if   (pw !=   ""  ) { 

									      os.write(cw.getbytes(  "gbk"  )); 

									      p = pw; 

									      c = cw.charat(  0  ) +   ""  ;  // c=cw的第一个 

									      // system.out.println(c+"&"); 

									      hm.put(p + c, bianma); 

									      hm1.put(bianma, p + c); 

									      bianma++; 

									     }   else   { 

									      os.write(cw.getbytes(  "gbk"  ));  // 第一个 

									     } 

									     }   else   {  // 编码表中不存在 

									     p = pw; 

									     // system.out.println(pw+"-="); 

									     c = pw.charat(  0  ) +   ""  ;  // c=pw的第一个 

									     hm.put(p + c, bianma); 

									     hm1.put(bianma, p + c); 

									     bianma++; 

									     os.write((p + c).getbytes(  "gbk"  )); 

									     cw = p + c; 

									     } 

									     pw = cw; 

									     // system.out.println(bianma); 

									     // system.out.println(cw+"=="); 

									     if   (i ==   65535  ) { 

									     system.out.println(  "重置2"  ); 

									     hm.clear(); 

									     hm1.clear(); 

									     for   (  int   j =   0  ; j <   256  ; j++) { 

									      char   ch = (  char  ) j; 

									      string st = ch +   ""  ; 

									      hm.put(st, j); 

									      hm1.put(j, st); 

									     } 

									     bianma =   0  ; 

									     pw =   ""  ; 

									     } 

									    } 

									    // system.out.println(hm1.tostring()); 

									    os.close(); 

									    } 

									 }

不足之处：当编码超过65535的时候，并没有处理好，不能重置码表，还原出的文件在超过65535的部分就开始乱码。还有待改善。

总结

以上就是这篇文章的全部内容了，希望本文的内容对大家的学习或者工作具有一定的参考学习价值，谢谢大家对的支持。如果你想了解更多相关内容请查看下面相关链接

原文链接：https://blog.csdn.net/lzq1326253299/article/details/82750568

查看更多关于Java压缩之LZW算法字典压缩与解压讲解的详细内容...

声明：本文来自网络，不代表【好得很程序员自学网】立场，转载请注明出处：http://www.haodehen.cn/did248451

更新时间：2023-06-23 阅读：33次