lucene学习 - 2 - 一个示例 - 程序员工具箱

首页 > 代码库 > lucene学习 - 2 - 一个示例

lucene学习 - 2 - 一个示例

2024-08-08 22:50:33 218人阅读

接下来我会写一个lucene的实例。实际上在搜索引擎上随便搜索下都能找到这样的东西。不过还是写一下吧，这也是我学习的经历。

package com.zhyea.doggie;import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class LuceneTest {    public static void main(String[] args) {        // 要用来检索的样本文件存储路径        String docPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\docs";        // 索引文件存储路径        String indexPath = "D:\\aqsiqDevelop\\workspace3\\doggie\\WebContent\\index";        try {            // 分析器，这里使用了标准分析器            Analyzer analyzer = new StandardAnalyzer();            // 准备好索引存储目录            Directory dir = FSDirectory.open(new File(indexPath));            // 创建IndexWriter（索引写入器）配置，            // 在配置中指明创建IndexWriter使用的lucene的版本及使用的分析器            IndexWriterConfig config = new IndexWriterConfig(Version.LATEST,                    analyzer);            // 创建IndexWriter（索引写入器），并指明索引存储路径和配置文件            IndexWriter writer = new IndexWriter(dir, config);            // 使用IndexWriter（索引写入器）创建索引，这里另外创建一个方法            addDocuments(docPath, writer);                        /* -------------创建索引结束，以下是进行搜索------------ */            // 创建索引读出器            IndexReader reader = DirectoryReader.open(dir);            // 创建搜索器            IndexSearcher seacher = new IndexSearcher(reader);            // 创建搜索对象            Query query = new TermQuery(new Term("content", "杨过"));            // 执行搜索，并返回结果            TopDocs topDocs = seacher.search(query, 10000);            // 展示搜索结果            Document doc;            for(ScoreDoc tmp : topDocs.scoreDocs){                doc = reader.document(tmp.doc);                System.out.println("书名：" + doc.get("name")                                  + "---------------------"                                 + "路径：" + doc.get("path"));            }        } catch (Exception e) {            e.printStackTrace();        }    }    /**     * 遍历样本文本所在的目录，进行分析。      * 这里采用的样本文本是金庸的三部小说：神雕、射雕和笑傲江湖。     * @param docPath     *            样本文本存储路径     * @param writer     *            索引写入器     * @throws IOException     */    private static void addDocuments(String docPath, IndexWriter writer)            throws IOException {        File dir = new File(docPath);        for (File tmp : dir.listFiles()) {            //创建Document对象，代表一个被索引的基本单元            Document doc = new Document();            String fileName = tmp.getName();            String filePath = tmp.getCanonicalPath();                    String fileContent = readTxt(tmp);            //创建Field，并加入Document            doc.add(new StringField("name", fileName,   Field.Store.YES));            doc.add(new StringField("path", filePath,   Field.Store.YES));            doc.add(new TextField("content",fileContent,Field.Store.YES));            //将Document从内存写入真实目录            writer.addDocument(doc);            //提交索引，将索引写入索引文件，这个别忘了            writer.commit();        }    }    /**     * 换行标志符     */    static final String NEWLINE = System.getProperty("line.separator");    /**     * 读取txt文件     *      * @param file     *            txt文件对象     * @return     * @throws IOException     */    private static String readTxt(File file) throws IOException {        BufferedReader br = null;        try {            br = new BufferedReader(new FileReader(file));            StringBuilder builder = new StringBuilder();            String line;            while (null != (line = br.readLine())) {                builder.append(line).append(NEWLINE);            }            return builder.toString();        } finally {            if (null != br) br.close();        }    }}

执行代码，发现没有任何输出。用luke进行查看索引目录，发现content对应的是乱码：

技术分享

在读取txt文件时，需要调整编码格式，或者直接调整txt的编码格式与工作空间默认编码相同即可。

这里就不写出了。

调整乱码后，再次执行程序，发现还是不能检索出什么东西。再次查看索引目录：

技术分享

所有的中文字符都被分开成为单独的Term。这次需要调整分析器，将分析器调整为CJKAnalyzer。这次能够检索出结果了：

技术分享

实际上，影响查询结果的不只是分析器，还有这一句：

new TermQuery(new Term("content", "杨过"));

好了，这些可以留到以后再说。

all。

lucene学习 - 2 - 一个示例

声明：以上内容来自用户投稿及互联网公开渠道收集整理发布，本网站不拥有所有权，未作人工编辑处理，也不承担相关法律责任，若内容有误或涉及侵权可进行投诉：投诉/举报工作人员会在5个工作日内联系你，一经查实，本站将立刻删除涉嫌侵权内容。

联系
我们

首页 > 代码库 > lucene学习 - 2 - 一个示例

lucene学习 - 2 - 一个示例

看完仍有疑问？有类似问题直接问程序猿