lucene学习-3 - 代码重构 - 程序员工具箱

首页 > 代码库 > lucene学习-3 - 代码重构

lucene学习-3 - 代码重构

2024-08-09 08:32:28 219人阅读

内容就是标题了。是要重构下上一节的代码，大体上按如下的思路：

功能拆分；
创建必要的工具类；

两个工具类StringUtils和TxtUtils。

StringUtils，主要是获取当前系统的换行符：

package com.zhyea.util;public class StringUtils {    public static final String NEWLINE = System.getProperty("line.separator");    }

TxtUtils，主要是读取txt文件，这里使用了一个自定义类FileCharsetDetector，可以点击这个超链接：

package com.zhyea.util;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;/** * txt文件处理工具类 *  * @author robin *  */public class TxtUtils {    /**     * 检查txt文件编码格式     *      * @param file     *            txt文件对象     * @return     * @throws IOException     */    public static String checkEncode(File file) throws IOException {        String encode = FileCharsetDetector.checkEncoding(file);        return (encode.equals("windows-1252") ? "Unicode" : encode);    }    /**     * 读取txt文件内容     *      * @param file     *            Txt文件对象     * @return     * @throws IOException     */    public static String readTxt(File file) throws IOException {        BufferedReader reader = null;        try {            String encode = checkEncode(file);            reader = new BufferedReader(new InputStreamReader(                    new FileInputStream(file), encode));            StringBuilder builder = new StringBuilder();            String content = null;            while (null != (content = reader.readLine())) {                builder.append(content).append(StringUtils.NEWLINE);            }            return builder.toString();        } finally {            reader.close();        }    }}

然后是拆分后的Lucene操作类：

package com.zhyea.doggie;import java.io.File;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import com.zhyea.util.TxtUtils;public class DoggieLucene {    /**     * 分词器     */    private static Analyzer analyzer;    /**     * 创建分词器实例     *      * @param clazz     *            创建分词器使用的类     * @return     * @throws InstantiationException     * @throws IllegalAccessException     */    public static Analyzer createAnalyzer(Class<?> clazz)            throws InstantiationException, IllegalAccessException {        if (null != analyzer && analyzer.getClass().equals(clazz)) {            return analyzer;        }        return analyzer = (Analyzer) clazz.newInstance();    }    /**     * 创建索引写出器     *      * @param analyzer     *            分词器     * @param indexPath     *            索引存储路径     * @return     * @throws IOException     */    public static IndexWriter createIndexWriter(Analyzer analyzer,            String indexPath) throws IOException {        // 创建索引存储目录        Directory dir = FSDirectory.open(new File(indexPath));        // 创建索引写入器配置        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST,                analyzer);        // 创建索引写入器        return new IndexWriter(dir, config);    }    /**     * 写入索引，索引文件为本地文本文件     *      * @param writer     *            索引写出器     * @param localDocPath     *            本地文本文件存储地址     * @throws IOException     */    public static void addLocalDocument(IndexWriter writer, String localDocPath)            throws IOException {        File directory = new File(localDocPath);        for (File tmp : directory.listFiles()) {            Document doc = new Document();            doc.add(new StringField("path", tmp.getCanonicalPath(),                    Field.Store.YES));            doc.add(new TextField("content", TxtUtils.readTxt(tmp),                    Field.Store.YES));            writer.addDocument(doc);            writer.commit();        }    }    /**     * 创建索引写入器     *      * @param indexPath     *            索引存储路径     * @return     * @throws IOException     */    public static IndexReader createIndexReader(String indexPath)            throws IOException {        return DirectoryReader.open(FSDirectory.open(new File(indexPath)));    }    /**     * 创建索引搜索器     *      * @param reader     *            索引写入器     * @return     */    public static IndexSearcher createIndexSearcher(IndexReader reader) {        return new IndexSearcher(reader);    }    /**     * 执行搜索     *      * @param searcher     *            搜索器     * @param target     *            搜索对象     * @return     * @throws IOException     */    public static TopDocs executeSearch(IndexSearcher searcher, Query query)            throws IOException {        return searcher.search(query, 10000);    }    /**     * 展示查询结果     *      * @param docs     *            查询结果文档     * @throws IOException     */    public static void showResult(TopDocs docs, IndexReader reader)            throws IOException {        Document doc = null;        for (ScoreDoc tmp : docs.scoreDocs) {            doc = reader.document(tmp.doc);            System.out.println(tmp.score + "  " + doc.get("path"));            // System.out.println(doc.getField("path").stringValue());        }    }}

拆的比较琐碎了，凑合看吧。

创建索引的类：

package com.zhyea.doggie;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;import org.apache.lucene.index.IndexWriter;import com.zhyea.util.FileUtil;public class IndexTest{        String indexPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\index";    String docPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\docs";        public static void main(String[] args){        try{            new IndexTest().createIndex();        }catch(Exception e){            e.printStackTrace();        }    }        /**     * 创建索引     * @throws IOException     * @throws InstantiationException     * @throws IllegalAccessException     */    private void createIndex() throws IOException,                                       InstantiationException,                                       IllegalAccessException{        IndexWriter writer = null;        try{            Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class);            writer = DoggieLucene.createIndexWriter(analyzer, indexPath);            DoggieLucene.addLocalDocument(writer, docPath);        }finally{            if(null!=writer)writer.close();        }    }}

执行搜索的类：

package com.zhyea.doggie;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.TopDocs;public class SearchTest {    String indexPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\index";        public static void main(String[] args){        try{            new SearchTest().executeSearch();        }catch(Exception e){            e.printStackTrace();        }    }        public void executeSearch() throws ParseException,                                        IOException,                                        InstantiationException,                                        IllegalAccessException{        IndexReader reader = null;        try{            reader = DoggieLucene.createIndexReader(indexPath);            IndexSearcher searcher = DoggieLucene.createIndexSearcher(reader);            Analyzer analyzer = DoggieLucene.createAnalyzer(SmartChineseAnalyzer.class);            Query query = new QueryParser("content", analyzer).parse("杨过");            TopDocs docs = DoggieLucene.executeSearch(searcher, query);            DoggieLucene.showResult(docs, reader);        }finally{            if(null!=reader)reader.close();        }    }}

OK。

lucene学习-3 - 代码重构

声明：以上内容来自用户投稿及互联网公开渠道收集整理发布，本网站不拥有所有权，未作人工编辑处理，也不承担相关法律责任，若内容有误或涉及侵权可进行投诉：投诉/举报工作人员会在5个工作日内联系你，一经查实，本站将立刻删除涉嫌侵权内容。

联系
我们

首页 > 代码库 > lucene学习-3 - 代码重构

lucene学习-3 - 代码重构

看完仍有疑问？有类似问题直接问程序猿