首页 > 代码库 > lucene 建立CRUD操作

lucene 建立CRUD操作

IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); // 指定所用的索引库
这句会引发线程安全问题,在全剧终 IndexSearcher只能有一个对象才可以,所以在ArticleDocumentUtils中保存一个 并且引用它。
indexSearcher为了提高效率,也是在内存中有缓存的所以需要commit才能放入索引文件数据库中


数据库优化
每次添加数据在索引文件夹下有很多小文件,为了合并小文件提高效率

//优化,合并多个小文件为一个打文件
LuceneUtils.getIndexWriter.optimize();


//配置当小文件的数量达到多少个后就自动合并为一个大文件,最小2,默认10
LucenenUtils.getIndexWriter().setMergeFactor(3);
当增加数据的时候自动触发。

Lucene.java

 1 package cn.itcast._util; 2  3 import java.io.File; 4 import java.io.IOException; 5  6 import org.apache.lucene.analysis.Analyzer; 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 import org.apache.lucene.index.CorruptIndexException; 9 import org.apache.lucene.index.IndexWriter;10 import org.apache.lucene.index.IndexWriter.MaxFieldLength;11 import org.apache.lucene.store.Directory;12 import org.apache.lucene.store.FSDirectory;13 import org.apache.lucene.store.LockObtainFailedException;14 import org.apache.lucene.util.Version;15 16 public class LuceneUtils {17 18     private static Directory directory; // 索引库目录19     private static Analyzer analyzer; // 分词器20 21     private static IndexWriter indexWriter;22 23     static {24         try {25             // 这里应是读取配置文件得到的索引库目录26             directory = FSDirectory.open(new File("./indexDir"));27             analyzer = new StandardAnalyzer(Version.LUCENE_30);28         } catch (IOException e) {29             throw new RuntimeException(e);30         }31     }32 33     /**34      * 获取全局唯一的IndexWriter对象35      * 36      * @return37      */38     public static IndexWriter getIndexWriter() {39         // 在第一次使用IndexWriter是进行初始化40         if (indexWriter == null) {41             synchronized (LuceneUtils.class) { // 注意线程安全问题42                 if (indexWriter == null) {43                     try {44                         indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);45                         System.out.println("=== 已经初始化 IndexWriter ===");46                     } catch (Exception e) {47                         throw new RuntimeException(e);48                     }49                 }50             }51 52             // 指定一段代码,会在JVM退出之前执行。53             Runtime.getRuntime().addShutdownHook(new Thread() {54                 public void run() {55                     try {56                         indexWriter.close();57                         System.out.println("=== 已经关闭 IndexWriter ===");58                     } catch (Exception e) {59                         throw new RuntimeException(e);60                     }61                 }62             });63         }64 65         return indexWriter;66     }67 68     public static Directory getDirectory() {69         return directory;70     }71 72     public static Analyzer getAnalyzer() {73         return analyzer;74     }75 76 }

 

 

ArticleDocumentUtils.java

 1 package cn.itcast._util; 2  3 import org.apache.lucene.document.Document; 4 import org.apache.lucene.document.Field; 5 import org.apache.lucene.document.Field.Index; 6 import org.apache.lucene.document.Field.Store; 7 import org.apache.lucene.util.NumericUtils; 8  9 import cn.itcast._domain.Article;10 11 public class ArticleDocumentUtils {12 13     /**14      * 把Article转为Document15      * 16      * @param article17      * @return18      */19     public static Document articleToDocument(Article article) {20         Document doc = new Document();21 22         String idStr = NumericUtils.intToPrefixCoded(article.getId()); // 一定要使用Lucene的工具类把数字转为字符串!23         24         doc.add(new Field("id", idStr, Store.YES, Index.NOT_ANALYZED)); // 注意:唯一标示符一般选择Index.NOT_ANALYZED25         doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));26         doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));27 28         return doc;29     }30 31     /**32      * 把Document转为Article33      * 34      * @param doc35      * @return36      */37     public static Article documentToArticle(Document doc) {38         Article article = new Article();39         40         Integer id = NumericUtils.prefixCodedToInt(doc.get("id")); // 一定要使用Lucene的工具类把字符串转为数字!41         42         article.setId(id);43         article.setTitle(doc.get("title"));44         article.setContent(doc.get("content"));45         46         return article;47     }48 49 }
View Code

QueryResult.java

 1 package cn.itcast._domain; 2  3 import java.util.List; 4  5 public class QueryResult { 6     private List list; // 一段数据列表 7     private int count; // 总记录数 8  9     public QueryResult(List list, int count) {10         this.list = list;11         this.count = count;12     }13 14     public List getList() {15         return list;16     }17 18     public void setList(List list) {19         this.list = list;20     }21 22     public int getCount() {23         return count;24     }25 26     public void setCount(int count) {27         this.count = count;28     }29 30 }
View Code

 

ArticleIndexDao.java

  1 package cn.itcast.b_indexdao;  2   3 import java.io.IOException;  4 import java.util.ArrayList;  5 import java.util.List;  6   7 import org.apache.lucene.document.Document;  8 import org.apache.lucene.index.Term;  9 import org.apache.lucene.queryParser.MultiFieldQueryParser; 10 import org.apache.lucene.queryParser.QueryParser; 11 import org.apache.lucene.search.IndexSearcher; 12 import org.apache.lucene.search.Query; 13 import org.apache.lucene.search.TopDocs; 14 import org.apache.lucene.util.NumericUtils; 15 import org.apache.lucene.util.Version; 16  17 import cn.itcast._domain.Article; 18 import cn.itcast._domain.QueryResult; 19 import cn.itcast._util.ArticleDocumentUtils; 20 import cn.itcast._util.LuceneUtils; 21  22 public class ArticleIndexDao { 23  24     /** 25      * 保存到索引库(建立索引) 26      *  27      * @param article 28      */ 29     public void save(Article article) { 30         // 1,把Article转为Document 31         Document doc = ArticleDocumentUtils.articleToDocument(article); 32  33         // 2,添加到索引库中 34         try { 35             LuceneUtils.getIndexWriter().addDocument(doc); // 添加 36             LuceneUtils.getIndexWriter().commit(); // 提交更改 37         } catch (Exception e) { 38             throw new RuntimeException(e); 39         } 40     } 41  42     /** 43      * 删除索引 44      *  45      * Term :某字段中出现的某一个关键词(在索引库的目录中) 46      *  47      * @param id 48      */ 49     public void delete(Integer id) { 50         try { 51             String idStr = NumericUtils.intToPrefixCoded(id); // 一定要使用Lucene的工具类把数字转为字符串! 52             Term term = new Term("id", idStr); 53  54             LuceneUtils.getIndexWriter().deleteDocuments(term); // 删除所有含有这个Term的Document 55             LuceneUtils.getIndexWriter().commit(); // 提交更改 56         } catch (Exception e) { 57             throw new RuntimeException(e); 58         } 59     } 60  61     /** 62      * 更新索引 63      *  64      * @param article 65      */ 66     public void update(Article article) { 67         try { 68             Term term = new Term("id", NumericUtils.intToPrefixCoded(article.getId())); // 一定要使用Lucene的工具类把数字转为字符串! 69             Document doc = ArticleDocumentUtils.articleToDocument(article); 70  71             LuceneUtils.getIndexWriter().updateDocument(term, doc); // 更新就是先删除再添加 72             LuceneUtils.getIndexWriter().commit(); // 提交更改 73  74             // indexWriter.deleteDocuments(term); 75             // indexWriter.addDocument(doc); 76         } catch (Exception e) { 77             throw new RuntimeException(e); 78         } 79     } 80  81     /** 82      * * 搜索   用于分页的 83      *  84      * @param queryString 85      *            查询条件 86      * @param first 87      *            从结果列表的哪个索引开始获取数据 88      * @param max 89      *            最多获取多少条数据(如果没有这么多,就把剩余的都返回) 90      *  91      * @return 一段数据列表 + 符合条件的总记录数 92      */ 93     public QueryResult search(String queryString, int first, int max) { 94         IndexSearcher indexSearcher = null; 95         try { 96             // 1,把查询字符串转为Query对象(在title与content中查询) 97             QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "title", "content" }, LuceneUtils.getAnalyzer()); 98             Query query = queryParser.parse(queryString); 99 100             // 2,执行查询,得到中间结果101             indexSearcher = new IndexSearcher(LuceneUtils.getDirectory());102             TopDocs topDocs = indexSearcher.search(query, first + max); // 最多返回前n条数据,这里要计算好,要返回足够数量的数据103             int count = topDocs.totalHits; // 符合条件的总记录数104 105             // 3,处理数据106             List<Article> list = new ArrayList<Article>();107             int endIndex = Math.min(first + max, topDocs.scoreDocs.length); // 计算结束的边界108 109             for (int i = first; i < endIndex; i++) { // 应只取一段数据110                 // 根据内部编号获取真正的Document数据111                 int docId = topDocs.scoreDocs[i].doc;112                 Document doc = indexSearcher.doc(docId);113                 // 把Document转换为Article114                 Article article = ArticleDocumentUtils.documentToArticle(doc);115                 list.add(article);116             }117 118             // 4,封装结果并返回119             return new QueryResult(list, count);120 121         } catch (Exception e) {122             throw new RuntimeException(e);123         } finally {124             // 关闭IndexSearcher125             if (indexSearcher != null) {126                 try {127                     indexSearcher.close();128                 } catch (IOException e) {129                     throw new RuntimeException(e);130                 }131             }132         }133     }134 }

不分页的查询

LuceneUtils.getIndexWriter()
 1     public List<Article> searchArticle(String condition) { 2         // 执行搜索 3         List<Article> list = new ArrayList<Article>(); 4         IndexSearcher indexSearcher = null; 5         try { 6             // 1,把查询字符串转为Query对象(默认只从title中查询) 7             QueryParser queryParser = new MultiFieldQueryParser( 8                     Version.LUCENE_30, new String[] { "title", "content" }, 9                     LuceneUtils.getAnalyzer());10             Query query = queryParser.parse(condition);11 12             // 2,执行查询,得到中间结果13             //indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); // 指定所用的索引库,会引发线程安全问题
         indexSearcher=LuceneUtils.getIndexWriter();
14 TopDocs topDocs = indexSearcher.search(query, 1000); // 最多返回前n条结果15 int count = topDocs.totalHits;16 System.out.println("scoreDocs.length"+topDocs.scoreDocs.length); //一样17 System.out.println("count"+count); //一样18 ScoreDoc[] scoreDocs = topDocs.scoreDocs;19 20 // 3,处理结果21 for (int i = 0; i < scoreDocs.length; i++) {22 ScoreDoc scoreDoc = scoreDocs[i];23 float score = scoreDoc.score; // 相关度得分24 int docId = scoreDoc.doc; // Document的内部编号25 26 // 根据编号拿到Document数据27 Document document = indexSearcher.doc(docId);28 29 // 把Document转为Article30 Article article=ArticleDocumentUtils.documentToArticle(document);31 32 list.add(article);33 }34 } catch (Exception e) {35 throw new RuntimeException();36 } finally {37 try {38 if (null != indexSearcher)39 indexSearcher.close();40 } catch (Exception e) {41 e.printStackTrace();42 }43 }44 return list;45 }

 

 

ArticleIndexDaoTest.java

 1 package cn.itcast.b_indexdao; 2  3 import java.util.List; 4  5 import org.junit.Test; 6  7 import cn.itcast._domain.Article; 8 import cn.itcast._domain.QueryResult; 9 10 public class ArticleIndexDaoTest {11 12     private ArticleIndexDao indexDao = new ArticleIndexDao();13 14     @Test15     public void testSave() {16         // 准备数据17         Article article = new Article();18         article.setId(1);19         article.setTitle("准备Lucene的开发环境");20         article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。");21 22         // 放到索引库中23         indexDao.save(article);24     }25 26     @Test27     public void testSave_25() {28         for (int i = 1; i <= 25; i++) {29             // 准备数据30             Article article = new Article();31             article.setId(i);32             article.setTitle("准备Lucene的开发环境");33             article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。");34 35             // 放到索引库中36             indexDao.save(article);37         }38     }39 40     @Test41     public void testDelete() {42         indexDao.delete(1);43     }44 45     @Test46     public void testUpdate() {47         // 准备数据48         Article article = new Article();49         article.setId(1);50         article.setTitle("准备Lucene的开发环境");51         article.setContent("这是更新后的内容");52 53         // 更新到索引库中54         indexDao.update(article);55     }56     //用于分页的57     @Test58     public void testSearch() {59         // 准备查询条件60         String queryString = "lucene";61         // String queryString = "hibernate";62 63         // 执行搜索64         // QueryResult qr = indexDao.search(queryString, 0, 10000);65 66         // QueryResult qr = indexDao.search(queryString, 0, 10); // 第1页,每页10条67         // QueryResult qr = indexDao.search(queryString, 10, 10); // 第2页,每页10条68         QueryResult qr = indexDao.search(queryString, 20, 10); // 第3页,每页10条69 70         // 显示结果71         System.out.println("总结果数:" + qr.getCount());72         for (Article a : (List<Article>) qr.getList()) {73             System.out.println("------------------------------");74             System.out.println("id = " + a.getId());75             System.out.println("title = " + a.getTitle());76             System.out.println("content = " + a.getContent());77         }78     }79 80 }

 不分页查询测试

 1 @Test 2     public void testSearchArticle() { 3         // 准备查询条件 4         String queryString = "lucene的"; 5         // String queryString = "hibernate"; 6  7         // 执行搜索 8         List<Article> list =dao.searchArticle(queryString); 9         10         // 显示结果11         System.out.println("总结果数:" + list.size());12         for (Article a : list) {13             System.out.println("------------------------------");14             System.out.println("id = " + a.getId());15             System.out.println("title = " + a.getTitle());16             System.out.println("content = " + a.getContent());17         }18     }