首页 > 代码库 > Lucene实战-Indexer索引创建
Lucene实战-Indexer索引创建
package com.lin.util; import java.io.File; import java.io.FileFilter; import java.io.FileReader; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; public class Indexer { private IndexWriter writer; /** * 主程序 * * @param indexDir * 索引位置 * @param dataDir * 数据来源 * @throws Exception */ public static void index(String indexDir, String dataDir) throws Exception { if (indexDir == null || dataDir == null) { throw new IllegalArgumentException("请检查你的参数是否正确"); } long start = System.currentTimeMillis(); Indexer indexer = new Indexer(indexDir); int numIndexed; try { numIndexed = indexer.index(dataDir, new TextFilesFilter()); } finally { indexer.close(); } long end = System.currentTimeMillis(); System.out.println("Indexing " + numIndexed + " files took " + (end - start) + " milliseconds"); } /** * 初始化writer(用与建立索引) * * @param indexDir * @throws IOException */ private Indexer(String indexDir) throws IOException { Directory dir = FSDirectory.open(new File(indexDir)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_2, new IKAnalyzer()); writer = new IndexWriter(dir, config); } /** * * @param dataDir * @param filter * @return 索引的文件数 * @throws IOException */ public int index(String dataDir, FileFilter filter) throws IOException { File[] files = new File(dataDir).listFiles(); for (File f : files) { if (!f.isDirectory() && !f.isHidden() && f.canRead() && f.exists() && (filter == null || filter.accept(f))) { indexFile(f); } } return writer.numDocs(); } private void indexFile(File f) throws IOException { System.out.println("indexing " + f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); } @SuppressWarnings("deprecation") protected Document getDocument(File f) throws IOException { Document doc = new Document(); doc.add(new Field("contents", new FileReader(f))); doc.add(new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); return doc; } /** * 文件过滤器 * * @author zan * */ private static class TextFilesFilter implements FileFilter { public boolean accept(File f) { return f.getName().toLowerCase().endsWith(".txt"); } } public void close() throws IOException { if (writer != null) { writer.close(); } } public static void main(String[] args) throws Exception { Indexer.index("d:\\index", "D:\\Program Files\\TortoiseSVN"); } }
Lucene实战-Indexer索引创建
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。