1. 计算目标字符串中各个字符出现的次数
2. 将步骤1重构成最优二叉树 – 节点的每个权重代表字符出现的次数
3. 字符编码:根节点到某个字符节点的路径(左为0、右为1)
4. 根据3步骤的字符编码,将目标字符串转成二进制编码
OptimalBinaryTree.java
public class OptimalBinaryTree<T> {
Node<T> root;
T[] datas;
OptimalBinaryTree(@NotNull T...datas) {
T[] copyData = Arrays.copyOf(datas, datas.length);
Arrays.sort(copyData);
datas = copyData;
createdOptimalBinaryTree(datas);
}
public Node<T> getRoot() {
return root;
}
public String inOrder() {
StringBuilder sb = new StringBuilder();
sb.append("[");
root.inOrder(root,sb);
sb.replace(sb.lastIndexOf(", "), sb.length(), "");
sb.append("]");
return sb.toString();
}
public String preOrder() {
StringBuilder sb = new StringBuilder();
sb.append("[");
root.preOrder(root,sb);
sb.replace(sb.lastIndexOf(", "), sb.length(), "");
sb.append("]");
return sb.toString();
}
@Override
public String toString() {
return preOrder();
}
private boolean createdOptimalBinaryTree(T[] datas) {
LinkedList<Node<T>> list = new LinkedList<>();
boolean flag = false;
Node<T> newNode;
for(T data : datas) {
newNode = new Node<>();
newNode.setData(data);
list.add( newNode );
}
while(true) {
Node<T> lNode = list.pop();
Node<T> rNode = list.pop();
Node<T> parNode = new Node<>();
parNode.weight = lNode.weight + rNode.weight;
parNode.leftNode = lNode;
parNode.rightNode = rNode;
list.addFirst(parNode);
Collections.sort(list);
if(list.size() == 1) {
root = list.get(0);
flag = true;
break;
}
}
return flag;
}
protected class Node<T> implements Comparable<Node> {
T data;
Integer weight;
Node<T> leftNode;
Node<T> rightNode;
public T getData() {
return data;
}
public Node<T> getLeftNode() {
return leftNode;
}
public void setLeftNode(Node<T> leftNode) {
this.leftNode = leftNode;
}
public Node<T> getRightNode() {
return rightNode;
}
public void setRightNode(Node<T> rightNode) {
this.rightNode = rightNode;
}
public void preOrder(Node node, StringBuilder sb) {
sb.append(node.toString() + ", ");
if(node.leftNode != null) {
preOrder(node.leftNode, sb);
}
if(node.rightNode != null) {
preOrder(node.rightNode, sb);
}
}
public void inOrder(Node node, StringBuilder sb) {
if(node.leftNode != null) {
inOrder(node.leftNode, sb);
}
sb.append(node.toString() + ", ");
if(node.rightNode != null) {
inOrder(node.rightNode, sb);
}
}
public void setData(T data) {
this.data = data;
try {
Field field = data.getClass().getDeclaredField("weight");
field.setAccessible(true);
weight = (Integer) field.get(data);
} catch (Exception e) {
e.printStackTrace();
}
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
@Override
public int compareTo(Node node) {
return this.weight - node.weight;
}
}
public static void main(String[] args) {
Student student1 = new Student(1, "lrc", 8);
Student student2 = new Student(2, "qcj", 2);
Student student3 = new Student(3, "ert", 10);
Student student4 = new Student(4, "bfg", 5);
Student student5 = new Student(5, "ytu", 25);
Student[] stu = {student1,student2,student3,student4,student5};
OptimalBinaryTree<Student> tree = new OptimalBinaryTree(stu);
System.out.println(tree);
}
}
HuffmanCode.java
package top.linruchang.algorithm.tree;
import java.util.*;
public class HuffmanCode {
OptimalBinaryTree<MyCharacter> tree;
String data;
String encodedData;
byte[] encodedDataArray;
HashMap<Character, MyCharacter> charMap;
HashMap<Character, String> codeMap = new HashMap<>();
HuffmanCode(String str) {
this.data = str;
initCharMap(str);
System.out.println("每个字符出现的次数:" + charMap);
MyCharacter[] myCharacters = new MyCharacter[charMap.size()];
charMap.values().toArray(myCharacters);
tree = new OptimalBinaryTree<>(myCharacters);
System.out.println("前序遍历赫夫曼树:" + tree);
initHuffmanCode(tree.getRoot(), "", new StringBuilder());
System.out.println("编码转换规则" + codeMap);
EncodingData();
System.out.println("原字符串:" + data);
System.out.println("赫夫曼编码后的字符串:" + encodedData);
zipCode();
}
public HashMap<Character, String> getCodeMap() {
return codeMap;
}
public static String decode(byte[] binaryMessg, Map< Character, String> codeMap) {
String encodedDataByBytes = getEncodedDataByBytes(binaryMessg, codeMap);
return decode(encodedDataByBytes, codeMap);
}
public static String decode(String binaryMessg, Map< Character, String> codeMap) {
StringBuilder textContent = new StringBuilder();
Set<Map.Entry<Character, String>> entries = codeMap.entrySet();
Integer startIndex = 0;
while(true) {
String code;
Integer tempEndIndex;
for(Map.Entry<Character, String> map: entries) {
code = map.getValue();
tempEndIndex = startIndex + code.length();
if(tempEndIndex > binaryMessg.length()) {
continue;
}
String substr = binaryMessg.substring(startIndex, tempEndIndex);
if(code.equals(substr)) {
textContent.append(map.getKey());
startIndex = tempEndIndex;
break;
}
}
if(startIndex >= binaryMessg.length()) {
break;
}
}
return textContent.toString();
}
public static String getEncodedDataByBytes(byte[] bytes, Map< Character, String> codeMap) {
StringBuilder sb = new StringBuilder();
int j = 0;
for(byte b : bytes) {
String binStr = Integer.toBinaryString(b);
if(binStr.length() <= 8) {
if(j == bytes.length -1) {
sb = adjustString(sb, new StringBuilder(binStr), codeMap);
break;
}
for(int i = 0; i<8-binStr.length(); i++) {
sb.append("0");
}
sb.append(binStr);
}
if(binStr.length() > 8) {
sb.append(binStr.substring(binStr.length()-8));
}
j++;
}
return sb.toString();
}
public static StringBuilder adjustString(StringBuilder prefixSb, StringBuilder suffixSb, Map< Character, String> codeMap) {
Integer startIndex = 0;
Integer endIndex = 0;
String prefixStr = prefixSb.toString();
String suffixStr = suffixSb.toString();
Collection<String> codes = codeMap.values();
Integer count = 0;
while(true) {
++count;
if(count == 2) {
break;
}
for(String code : codes) {
Integer curEndIndex = startIndex + code.length();
if(curEndIndex > prefixSb.length()) {
continue;
}
String substr = prefixStr.substring(startIndex, curEndIndex);
if(code.equals(code)) {
startIndex = curEndIndex;
endIndex = curEndIndex;
count = 0;
break;
}
}
if(startIndex >= prefixSb.length()) {
break;
}
}
String waitPrefixStr = null;
List<String> waitPrefixStrs = new ArrayList<String>();
if(startIndex != prefixStr.length()) {
waitPrefixStr = prefixStr.substring(startIndex);
System.out.println("waitPrefix:" + waitPrefixStr);
for(String code : codes) {
if(code.length() > waitPrefixStr.length()) {
boolean isMatch = code.substring(0,waitPrefixStr.length()).matches(waitPrefixStr);
if(isMatch) {
if(code.charAt(waitPrefixStr.length()) == '0') {
waitPrefixStrs.add(code);
}
}
}
}
}
if(waitPrefixStrs.size() > 0) {
String copyWaitPrefixStr = new String(waitPrefixStr);
for(String repairTarget : waitPrefixStrs) {
for(int index = waitPrefixStr.length(); index < repairTarget.length(); index++) {
if(repairTarget.charAt(index) == '0') {
copyWaitPrefixStr += "0";
}else {
break;
}
}
String tempSuffixStr = copyWaitPrefixStr + suffixStr;
Integer sufixStartIndex = 0;
Integer suffixCount = 0;
boolean isValid = false;
while(true) {
++suffixCount;
if(suffixCount == 2) {
break;
}
for(String code : codes) {
Integer curSuffixEndIndex = sufixStartIndex + code.length();
if(curSuffixEndIndex > tempSuffixStr.length()) {
continue;
}
if(tempSuffixStr.substring(sufixStartIndex, curSuffixEndIndex).equals(code)) {
sufixStartIndex = curSuffixEndIndex;
suffixCount = 0;
break;
}
}
if(sufixStartIndex == tempSuffixStr.length()) {
isValid = true;
break;
}
}
if(isValid) {
for(int index = waitPrefixStr.length(); index < repairTarget.length(); index++) {
if(repairTarget.charAt(index) == '0') {
prefixSb.append("0");
}else {
break;
}
}
break;
}
}
}
return prefixSb.append(suffixSb);
}
public String getEncodedData() {
return encodedData;
}
private void zipCode() {
byte[] bytes;
Integer length = encodedData.length();
if (length % 8 == 0) {
bytes = new byte[length / 8];
} else {
bytes = new byte[length / 8 + 1];
}
Integer startIndex = 0;
Integer endIndex = 8;
Integer byteIndex = 0;
while (startIndex < length - 1) {
String binaryNum = encodedData.substring(startIndex, endIndex);
System.out.println(binaryNum);
Integer num = Integer.valueOf(binaryNum, 2);
bytes[byteIndex++] = num.byteValue();
startIndex += 8;
endIndex += 8;
if (endIndex > length) {
endIndex = length;
}
}
encodedDataArray = bytes;
}
public byte[] getEncodedDataBytes() {
return encodedDataArray;
}
public void EncodingData() {
char[] chars = new char[data.length()];
data.getChars(0, data.length(), chars, 0);
StringBuilder sb = new StringBuilder();
for (char curChar : chars) {
String code = codeMap.get(curChar);
sb.append(code);
}
encodedData = sb.toString();
}
private void initHuffmanCode(OptimalBinaryTree.Node node, String signLR, StringBuilder sb) {
StringBuilder sb2 = new StringBuilder(sb);
sb2.append(signLR);
if (node != null) {
if (node.data == null) {
initHuffmanCode(node.leftNode, "0", sb2);
initHuffmanCode(node.rightNode, "1", sb2);
} else {
Character curCharacter = ((MyCharacter) node.getData()).getCharacter();
codeMap.put(curCharacter, sb2.toString());
}
}
}
private void initCharMap(String str) {
char[] chars = new char[str.length()];
str.getChars(0, str.length(), chars, 0);
charMap = new HashMap<>();
for (char curChar : chars) {
boolean hasKey = charMap.containsKey(curChar);
MyCharacter curCharacter;
if (hasKey) {
curCharacter = charMap.get(curChar);
Integer count = curCharacter.getCount();
curCharacter.setCount(++count);
} else {
curCharacter = new MyCharacter();
curCharacter.setCharacter(curChar);
curCharacter.setCount(1);
charMap.put(curChar, curCharacter);
}
}
}
private class MyCharacter implements Comparable<MyCharacter> {
Character character;
Integer asciiValue;
Integer count;
Integer weight;
public Character getCharacter() {
return character;
}
public void setCharacter(Character character) {
this.character = character;
this.asciiValue = (int) character.charValue();
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
this.weight = count;
}
@Override
public String toString() {
return "MyCharacter{" +
"character=" + character +
", asciiValue=" + asciiValue +
", count=" + count +
'}';
}
@Override
public int compareTo(MyCharacter orderCharacter) {
return this.count - orderCharacter.count;
}
}
public static void main(String[] args) {
String str = "I love you";
HuffmanCode code = new HuffmanCode(str);
System.out.println("赫夫曼编码后二进制转成字节数组:" + Arrays.toString(code.getEncodedDataBytes()));
byte[] bytes = code.getEncodedDataBytes();
String encodeStr = HuffmanCode.getEncodedDataByBytes(bytes, code.getCodeMap());
System.out.println("赫夫曼编码二进制字符串:" + encodeStr);
System.out.println(code.decode(encodeStr, code.getCodeMap()));
System.out.println(code.decode(bytes, code.getCodeMap()));
Integer length1 = str.length() * 8;
Integer length2 = code.getEncodedData().length();
double redPer = (length1 - length2) / (double) length1;
System.out.printf("固定长度ascii编码长度length1:%d,赫夫曼编码后长度length2:%d,所以减少率为:%f\n", length1, length2, redPer);
}
}
![[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-u01K8VFh-1587795622115)(en-resource://database/32602:1)]](https://www.bmabk.com/wp-content/uploads/2022/05/post-loading.gif)
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
文章由极客之音整理,本文链接:https://www.bmabk.com/index.php/post/46516.html
极客之音——专业性很强的中文编程技术网站,欢迎收藏到浏览器,订阅我们!