数据结构与算法14 – 树的应用

导读:本篇文章讲解 数据结构与算法14 – 树的应用,希望对大家有帮助,欢迎收藏,转发!站点地址:www.bmabk.com

1. 赫夫曼编码 – 最优二叉树

大幅减少字符串编码成二进制数的长度 – 比固定长度ASCII编码减少50%


字符编码:将字符转成二进制进行网络传输

字符编码方法

1. 纯ascii编码 – 每个字符都转成固定8位二进制 — 占用空间大

2. 计算句子同样字符出现个数,自己定义不定长的二进制位数进行表示字符,一啊不般出现次数越多,位数越少 – 不固定编码,占用空间小,但编码会冲突

3. 赫夫曼编码:利用赫夫曼树进行编码字符 — 不固定编码,大大减少空间,但编码不冲突


赫夫曼编码流程

1. 计算目标字符串中各个字符出现的次数

2. 将步骤1重构成最优二叉树 – 节点的每个权重代表字符出现的次数

3. 字符编码:根节点到某个字符节点的路径(左为0、右为1)

4. 根据3步骤的字符编码,将目标字符串转成二进制编码



OptimalBinaryTree.java

/**
 * @Classname OptimalBinaryTree
 * @Description
 * @Date 2020/4/13 10:48
 * @Created by lrc
 */
public class OptimalBinaryTree<T> {

    //二叉树根节点 - 根据datas进行组建
    Node<T> root;

    //构造器传入的数据
    T[] datas;


    //传数组构造最优二叉树
    OptimalBinaryTree(@NotNull T...datas) {


        T[] copyData = Arrays.copyOf(datas, datas.length);
        Arrays.sort(copyData);
//        System.out.println(Arrays.toString(copyData));
        datas = copyData;

        createdOptimalBinaryTree(datas);

    }


    public Node<T> getRoot() {
        return root;
    }

    // 最优二叉树 - 中序遍历
    public String  inOrder() {

        StringBuilder sb = new StringBuilder();

        sb.append("[");

        //真正的中序遍历
        root.inOrder(root,sb);

        sb.replace(sb.lastIndexOf(", "), sb.length(), "");

        sb.append("]");

        return sb.toString();


    }


    // 最优二叉树 - 前序遍历
    public String  preOrder() {

        StringBuilder sb = new StringBuilder();

        sb.append("[");

        //真正的前序遍历
        root.preOrder(root,sb);

        sb.replace(sb.lastIndexOf(", "), sb.length(), "");

        sb.append("]");

        return sb.toString();


    }


    @Override
    public String toString() {
        return preOrder();
    }

    private boolean createdOptimalBinaryTree(T[] datas) {

        LinkedList<Node<T>> list = new LinkedList<>();

        boolean flag = false;

        Node<T> newNode;
        for(T data : datas) {
            newNode = new Node<>();
            newNode.setData(data);
            list.add( newNode );
        }


        while(true) {

            //弹出两个子节点,构成左右子树
            Node<T> lNode = list.pop();
            Node<T> rNode = list.pop();

            //父节点权重是上面两个子节点的权重
            Node<T> parNode = new Node<>();
            parNode.weight = lNode.weight + rNode.weight;
            parNode.leftNode = lNode;
            parNode.rightNode = rNode;

            //父节点必须往链表头添加 - 否则遇到weigth相同时,组件二叉树会混乱
            list.addFirst(parNode);
            Collections.sort(list);

            //打印当前链表情况
//            System.out.println("遍历" + list);

            //如果链表中只有一个元素,说明这个节点就是根节点
            if(list.size() == 1) {
                root = list.get(0);
                flag = true;
                break;
            }

        }


        return flag;
    }



    protected class Node<T> implements Comparable<Node> {
        T data;
        Integer weight;
        Node<T> leftNode;
        Node<T> rightNode;

        public T getData() {
            return data;
        }

        public Node<T> getLeftNode() {
            return leftNode;
        }

        public void setLeftNode(Node<T> leftNode) {
            this.leftNode = leftNode;
        }

        public Node<T> getRightNode() {
            return rightNode;
        }

        public void setRightNode(Node<T> rightNode) {
            this.rightNode = rightNode;
        }

        public  void preOrder(Node node, StringBuilder sb) {


            sb.append(node.toString() + ", ");

            if(node.leftNode != null) {

                preOrder(node.leftNode, sb);
            }

            if(node.rightNode != null) {
                preOrder(node.rightNode, sb);
            }

        }


        public  void inOrder(Node node, StringBuilder sb) {

            if(node.leftNode != null) {

                inOrder(node.leftNode, sb);
            }

            sb.append(node.toString() + ", ");


            if(node.rightNode != null) {
                inOrder(node.rightNode, sb);
            }

        }

        public void setData(T data) {
            this.data = data;

            //节点的权重硬性规定为数据data内部的权重
            try {
                Field field = data.getClass().getDeclaredField("weight");
                field.setAccessible(true);

                weight = (Integer) field.get(data);
            } catch (Exception e) {
                e.printStackTrace();
            }

        }

        @Override
        public String toString() {
            return "Node{" +
                    "data=" + data +
                    ", weight=" + weight +
                    '}';
        }

        // node排序必须要实现这个函数 大于实参则大于0 小于则小于0 等于则等于0
        @Override
        public int compareTo(Node node) {
            return this.weight - node.weight;
        }
    }


    public static void main(String[] args) {


        //1. 数据准备 - 为什么写那么麻烦,主要是写熟悉一些数组、链表之间的转换函数
        Student student1 = new Student(1, "lrc", 8);
        Student student2 = new Student(2, "qcj", 2);
        Student student3 = new Student(3, "ert", 10);
        Student student4 = new Student(4, "bfg", 5);
        Student student5 = new Student(5, "ytu", 25);
        Student[] stu = {student1,student2,student3,student4,student5};
        // 2  5 8 10 25
        // 7 8 10 25
        // 10 15 25
        // 25 25
        // 50

        //2. 构造最优二叉树
        OptimalBinaryTree<Student> tree = new OptimalBinaryTree(stu);

        //3. 前序遍历二叉树
        System.out.println(tree);


    }

}


HuffmanCode.java

package top.linruchang.algorithm.tree;

import java.util.*;

/**
 * @Classname HuffmanCode
 * @Description
 * @Date 2020/4/13 21:27
 * @Created by lrc
 */
public class HuffmanCode {

    // 根据data字符个数构建赫夫曼二叉树
    OptimalBinaryTree<MyCharacter> tree;

    //需要被赫夫曼编码的原字符串
    String data;

    //赫夫曼编码字符串data
    String encodedData;

    //压缩encodedData的数据
    byte[] encodedDataArray;

    //存储data字符串对应的每个字符个数记录
    HashMap<Character, MyCharacter> charMap;

    //根据tree获取对应字符的赫夫曼编码
    HashMap<Character, String> codeMap = new HashMap<>();


    HuffmanCode(String str) {

        //需要被赫夫曼编码的原字符串
        this.data = str;

        // 1. 计算字符串各个字符出现的次数
        initCharMap(str);
        System.out.println("每个字符出现的次数:" + charMap);

        // 2. 构造最优二叉树
        MyCharacter[] myCharacters = new MyCharacter[charMap.size()];
        charMap.values().toArray(myCharacters);
        tree = new OptimalBinaryTree<>(myCharacters);
        System.out.println("前序遍历赫夫曼树:" + tree);


        //3. 根据tree获取对应字符的赫夫曼编码
        initHuffmanCode(tree.getRoot(), "", new StringBuilder());
        System.out.println("编码转换规则" + codeMap);


        //4. 根据codeMap字符编码规则将data字符串转成二进制编码
        EncodingData();
        System.out.println("原字符串:" + data);
        System.out.println("赫夫曼编码后的字符串:" + encodedData);


        //5. 压缩encodedData
        zipCode();

    }


    public HashMap<Character, String> getCodeMap() {
        return codeMap;
    }

    /**
     *  解码
     * @param binaryMessg 字节数组
     * @param codeMap  赫夫曼编码规则
     * @return  返回原字符串
     */
    public static String decode(byte[] binaryMessg, Map< Character, String> codeMap) {

        String encodedDataByBytes = getEncodedDataByBytes(binaryMessg, codeMap);

        return decode(encodedDataByBytes, codeMap);

    }


    /**
     *
     * @param binaryMessg 二进制字符串
     * @param codeMap  赫夫曼编码规则
     * @return 返回原字符串
     */
    public static String decode(String binaryMessg, Map< Character, String> codeMap) {

        StringBuilder textContent = new StringBuilder();

        Set<Map.Entry<Character, String>> entries = codeMap.entrySet();

        Integer startIndex = 0;

        while(true) {
            String code;
            Integer tempEndIndex;
            for(Map.Entry<Character, String> map: entries) {

                code = map.getValue();
                tempEndIndex = startIndex + code.length();
                if(tempEndIndex > binaryMessg.length()) {
                    continue;
                }

                String substr = binaryMessg.substring(startIndex, tempEndIndex);


                if(code.equals(substr)) {
                    textContent.append(map.getKey());
                    startIndex = tempEndIndex;
                    break;
                }
            }

            if(startIndex >= binaryMessg.length()) {
                break;
            }

        }
        return textContent.toString();
    }

    /**
     *
     * @param bytes  字节数组 - 存储了赫夫曼编码的信息
     * @param codeMap  匹配规则
     * @return  返回赫夫曼编码的二进制信息 - 根据这段信息进行解码
     */
    public static String getEncodedDataByBytes(byte[] bytes, Map< Character, String> codeMap) {

        StringBuilder sb = new StringBuilder();

        int j = 0;
        for(byte b : bytes) {

            String binStr = Integer.toBinaryString(b);

            if(binStr.length() <= 8) {

                if(j == bytes.length -1) {
                    sb = adjustString(sb, new StringBuilder(binStr), codeMap);
                    break;
                }

                for(int i = 0; i<8-binStr.length(); i++) {
                    sb.append("0");
                }
                sb.append(binStr);
            }
            if(binStr.length() > 8) {

                sb.append(binStr.substring(binStr.length()-8));
            }
            j++;
        }
        return sb.toString();

    }


    /**
     * 这段代码别看了,实在是太冗余,写完我都不想在看它,懒得重构
     * 修改最后一段二进制与前面一段二进制 - 因为Integer.toBinaryString得到的补码会忽略到前面的0,故需要补全
     * @param prefixSb
     * @param suffixSb
     * @param codeMap
     * @return
     */
    public static StringBuilder adjustString(StringBuilder prefixSb, StringBuilder suffixSb, Map< Character, String> codeMap) {

        //1. 找到前半段没有匹配的二进制字符
        Integer startIndex = 0;
        Integer endIndex = 0;

        String prefixStr = prefixSb.toString();
        String suffixStr = suffixSb.toString();

        Collection<String> codes = codeMap.values();

        Integer count = 0;
        while(true) {
            ++count;
            //遍历查询前缀字符对应赫夫曼编码

            //说明持续验证最后一段都不能找到对应的赫夫曼编码
            if(count == 2) {
                break;
            }

            for(String code : codes) {
                Integer curEndIndex = startIndex + code.length();

                //截图的结束索引大于被截取字段本身 - 则忽略该次查询
                if(curEndIndex > prefixSb.length()) {
                    continue;
                }

                String substr = prefixStr.substring(startIndex, curEndIndex);

                if(code.equals(code)) {
                    startIndex = curEndIndex;
                    endIndex = curEndIndex;
                    count = 0;
                    break;
                }
            }

            //前半段都符合编码情况
            if(startIndex >= prefixSb.length()) {
                break;
            }

        }

        // 2.找到赫夫曼编码符合修复条件的几个编码
        String waitPrefixStr = null;
        List<String> waitPrefixStrs = new ArrayList<String>();


        if(startIndex != prefixStr.length()) {
            waitPrefixStr = prefixStr.substring(startIndex);
            System.out.println("waitPrefix:" + waitPrefixStr);


            //查找修复前缀字段等待字段
            for(String code : codes) {

                //只有短过赫夫曼编码的才是需要被修复,否则不用修复
                if(code.length() > waitPrefixStr.length()) {
                    boolean isMatch = code.substring(0,waitPrefixStr.length()).matches(waitPrefixStr);

                    if(isMatch) {

                        //waitPrefixStr只有末尾等于有0才用修复,否则不用修复
                        if(code.charAt(waitPrefixStr.length()) == '0') {
                            waitPrefixStrs.add(code);
                        }
                    }
                }
            }

        }



        //3. 真正的修复前半段 - 利用waitPrefixStrs进行依依比对
        if(waitPrefixStrs.size() > 0) {

            String copyWaitPrefixStr = new String(waitPrefixStr);
            for(String repairTarget : waitPrefixStrs) {

                for(int index = waitPrefixStr.length(); index < repairTarget.length(); index++) {

                    if(repairTarget.charAt(index) == '0') {
                        copyWaitPrefixStr += "0";
                    }else {
                        break;
                    }
                }

                String tempSuffixStr = copyWaitPrefixStr + suffixStr;

                Integer sufixStartIndex = 0;

                Integer suffixCount = 0;

                //验证当前的copyWaitPrefixStr是否是需要的
                boolean isValid = false;
                while(true) {
                    ++suffixCount;

                    if(suffixCount == 2) {
                        break;
                    }

                    for(String code : codes) {

                        Integer curSuffixEndIndex = sufixStartIndex + code.length();

                        if(curSuffixEndIndex > tempSuffixStr.length()) {
                            continue;
                        }

                        if(tempSuffixStr.substring(sufixStartIndex, curSuffixEndIndex).equals(code)) {
                            sufixStartIndex = curSuffixEndIndex;
                            suffixCount = 0;
                            break;
                        }

                    }

                    if(sufixStartIndex == tempSuffixStr.length()) {
                        isValid = true;
                        break;
                    }

                }

                if(isValid) {

                    for(int index = waitPrefixStr.length(); index < repairTarget.length(); index++) {

                        if(repairTarget.charAt(index) == '0') {
                            prefixSb.append("0");
                        }else {
                            break;
                        }
                    }
                    break;

                }

            }

        }

        return prefixSb.append(suffixSb);
    }

    public String getEncodedData() {
        return encodedData;
    }

    private void zipCode() {
        byte[] bytes;

        //1.获取数组长度
        Integer length = encodedData.length();
        if (length % 8 == 0) {
            bytes = new byte[length / 8];
        } else {
            bytes = new byte[length / 8 + 1];
        }


        //2.每8位截取一段字符(补码),将其转成10进制
        Integer startIndex = 0;
        Integer endIndex = 8;
        Integer byteIndex = 0;
        while (startIndex < length - 1) {

            String binaryNum = encodedData.substring(startIndex, endIndex);
            System.out.println(binaryNum);
            Integer num = Integer.valueOf(binaryNum, 2);
            bytes[byteIndex++] = num.byteValue();
            startIndex += 8;
            endIndex += 8;

            if (endIndex > length) {
                endIndex = length;
            }
        }

        //3.返回字节数组
        encodedDataArray = bytes;
    }

    public byte[] getEncodedDataBytes() {
        return encodedDataArray;
    }

    public void EncodingData() {

        char[] chars = new char[data.length()];
        data.getChars(0, data.length(), chars, 0);

        StringBuilder sb = new StringBuilder();
        for (char curChar : chars) {
            String code = codeMap.get(curChar);
            sb.append(code);
        }
        encodedData = sb.toString();

    }

    /**
     * 根据赫夫曼树 - 获取字符赫夫曼编码
     *
     * @param node   当前节点
     * @param signLR 标记是左节点还是右节点  0:左节点  1:右节点
     * @param sb     存储到达路径  --根据上述signLR存储 -- 字符赫夫曼编码
     */
    private void initHuffmanCode(OptimalBinaryTree.Node node, String signLR, StringBuilder sb) {

        StringBuilder sb2 = new StringBuilder(sb);
        sb2.append(signLR);

        if (node != null) {

            if (node.data == null) {

                initHuffmanCode(node.leftNode, "0", sb2);

                initHuffmanCode(node.rightNode, "1", sb2);

            } else {
                Character curCharacter = ((MyCharacter) node.getData()).getCharacter();
                codeMap.put(curCharacter, sb2.toString());
            }

        }

    }

    //计算原字符串各个字符出现的次数
    private void initCharMap(String str) {
        char[] chars = new char[str.length()];
        str.getChars(0, str.length(), chars, 0);

        charMap = new HashMap<>();
        for (char curChar : chars) {

            boolean hasKey = charMap.containsKey(curChar);

            MyCharacter curCharacter;
            if (hasKey) {
                curCharacter = charMap.get(curChar);
                Integer count = curCharacter.getCount();
                curCharacter.setCount(++count);
            } else {

                curCharacter = new MyCharacter();
                curCharacter.setCharacter(curChar);
                curCharacter.setCount(1);
                charMap.put(curChar, curCharacter);
            }

        }
    }

    //内部类 - 用来存储原字符串对应的字符以及出现次数、以及节点权重
    private class MyCharacter implements Comparable<MyCharacter> {

        Character character;
        Integer asciiValue;
        Integer count;
        Integer weight;

        public Character getCharacter() {
            return character;
        }

        public void setCharacter(Character character) {
            this.character = character;
            this.asciiValue = (int) character.charValue();
        }

        public int getCount() {
            return count;
        }

        public void setCount(int count) {
            this.count = count;
            this.weight = count;
        }

        @Override
        public String toString() {
            return "MyCharacter{" +
                    "character=" + character +
                    ", asciiValue=" + asciiValue +
                    ", count=" + count +
                    '}';
        }

        @Override
        public int compareTo(MyCharacter orderCharacter) {
            return this.count - orderCharacter.count;
        }
    }


    public static void main(String[] args) {


//        String str = "abcfdef tratr egcs";
        String str = "I love you";

        HuffmanCode code = new HuffmanCode(str);

        System.out.println("赫夫曼编码后二进制转成字节数组:" + Arrays.toString(code.getEncodedDataBytes()));

        //通过字节数组进行节码
        byte[] bytes = code.getEncodedDataBytes();

        //将字节数组转成二进制字符串 - 通过二进制字符串转成原字符串明文
        String encodeStr = HuffmanCode.getEncodedDataByBytes(bytes, code.getCodeMap());
        System.out.println("赫夫曼编码二进制字符串:" + encodeStr);
        System.out.println(code.decode(encodeStr, code.getCodeMap()));
        System.out.println(code.decode(bytes, code.getCodeMap()));


        Integer length1 = str.length() * 8;
        Integer length2 = code.getEncodedData().length();
        double redPer = (length1 - length2) / (double) length1;
        System.out.printf("固定长度ascii编码长度length1:%d,赫夫曼编码后长度length2:%d,所以减少率为:%f\n", length1, length2, redPer);

    }

}

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-u01K8VFh-1587795622115)(en-resource://database/32602:1)]

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。

文章由极客之音整理,本文链接:https://www.bmabk.com/index.php/post/46516.html

(0)
小半的头像小半

相关推荐

极客之音——专业性很强的中文编程技术网站,欢迎收藏到浏览器,订阅我们!