首页 > 代码库 > Java实现的一个词频统计程序

Java实现的一个词频统计程序

import java.util.HashMap;
import java.util.Iterator;

public class WordCount {
	public static void main(String[] args) {
		String[] text=new String[]{"the weather is good ","today is good","today has good weather","good weather is good"};
		HashMap<String, Integer> hashMap=new HashMap<String, Integer>();
		for (int i=0;i<text.length;i++){
			String temp=text[i];
			String[] words=temp.split("\\s");
			for(int j=0;j<words.length;j++){
				if(!hashMap.containsKey(words[j])){
					hashMap.put(words[j], new Integer(1));
				}else{
					int k=hashMap.get(words[j]).intValue()+1;
					hashMap.put(words[j], new Integer(k));
				}
			}
		}
		Iterator iterator=hashMap.keySet().iterator();
		while(iterator.hasNext()){
			String word=(String) iterator.next();
			System.out.println(word+":"+hashMap.get(word));
		}
	}
}
海量数据可以使用MapReduce来做。