首页 > 代码库 > trie数的实现

trie数的实现

      Trie树又称单词查找树,Trie树,是一种树形结构,是一种哈希树的变种。典型应用是用于统计,排序和保存大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计。它的优点是:利用字符串的公共前缀来减少查询时间,最大限度地减少无谓的字符串比较,查询效率比哈希表高。

      这里的需求是,给如一组词汇,北京人,北京,武汉,武汉话等等。能够统计“武汉”这个词的词频,输入"武"的时候,能够得到以“武”开头的所有词。实现的代码如下:

package com.dong.util;import java.util.ArrayList;import java.util.List;public class Trie {    // 根节点    private final TrieNode root = new TrieNode(‘ ‘);    // 向trie树中插入一个词    public void insert(String word) {        if (word == null || word.length() == 0) {            return;        }        String left = word;        TrieNode cur = root;        while (left.length() > 0) {            char toInert = left.charAt(0);            TrieNode next = null;            // 如果那个节点不存在的话,将当前节点插入            if (containCharNode(cur.getChild(), toInert) == null) {                next = new TrieNode(toInert);                cur.getChild().add(next);            } else {                next = containCharNode(cur.getChild(), toInert);            }            cur = next;            left = left.substring(1);            if (left.length() == 0) {                cur.setFreq(cur.getFreq() + 1);            }        }    }    // 通过前缀查找词,返回包括前缀的所有词。    public List<String> search(String prefix) {        List<String> retList = new ArrayList<String>();        List<String> tempList = new ArrayList<String>();        if (prefix == null || prefix.length() == 0) {            return null;        }        String left = prefix;        TrieNode cur = root;        while (left.length() > 0) {            char toFind = left.charAt(0);            TrieNode next = null;            // 如果那个节点不存在的话,将当前节点插入            if (containCharNode(cur.getChild(), toFind) == null) {                return null;            } else {                next = containCharNode(cur.getChild(), toFind);                if (left.length() == 1) {                    cur = next;                    break;                }            }            cur = next;            left = left.substring(1);        }        dfs(cur, new ArrayList<Character>(), tempList);        for (String s : tempList) {            retList.add(prefix + s);        }        if (getFreq(prefix) > 0) {            retList.add(prefix);        }        return retList;    }    // 深度搜索一个trieNode节点下的所有的词    private void dfs(TrieNode root, List<Character> stack, List<String> retList) {        if (root.getChild().size() == 0) {            StringBuffer sb = new StringBuffer();            for (char c : stack) {                sb.append(c);            }            retList.add(sb.toString());        } else {            for (TrieNode r : root.getChild()) {                stack.add(r.getVal());                dfs(r, stack, retList);                stack.remove(stack.size() - 1);            }        }    }    // 查看一个节点的的子节点是否包含一个字符    public TrieNode containCharNode(List<TrieNode> child, char c) {        TrieNode ret = null;        for (TrieNode temp : child) {            if (temp.getVal() == c) {                ret = temp;            }        }        return ret;    }    // 得到一个词的词频    public int getFreq(String word) {        if (word == null || word.length() == 0) {            return 0;        }        String left = word;        TrieNode cur = root;        while (left.length() > 0) {            char toFind = left.charAt(0);            TrieNode next = null;            // 如果不存在此节点,返回0            if (containCharNode(cur.getChild(), toFind) == null) {                return 0;            } else {                next = containCharNode(cur.getChild(), toFind);                if (left.length() == 1) {                    cur = next;                    break;                }            }            cur = next;            left = left.substring(1);        }        return cur.getFreq();    }    public static String fill(String prefix, ArrayList<Character> stack) {        ArrayList<String> retList = new ArrayList<String>();        StringBuffer sb = new StringBuffer();        sb.append(prefix);        for (char c : stack) {            sb.append(c);        }        return sb.toString();    }    public static void main(String[] args) {        Trie t = new Trie();        String[] wordList = { "我晕", "我晕啊", "我不信啊", "我信了", "也是", "这不对啊", "我晕" };        for (String word : wordList) {            t.insert(word);        }        System.out.println(t.search("我晕"));    }}class TrieNode {    // 节点下存放的字符    private char val;    // 一个节点下面的子节点    private List<TrieNode> child;    // 该词的词频    private int freq;    public TrieNode(char val) {        child = new ArrayList();        freq = 0;        this.val = val;    }    public char getVal() {        return val;    }    public void setVal(char val) {        this.val = val;    }    public List<TrieNode> getChild() {        return child;    }    public void setChild(List<TrieNode> child) {        this.child = child;    }    public int getFreq() {        return freq;    }    public void setFreq(int freq) {        this.freq = freq;    }}

 

trie数的实现