首页 > 代码库 > lucene 建立CRUD操作
lucene 建立CRUD操作
IndexSearcher indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); // 指定所用的索引库
这句会引发线程安全问题,在全剧终 IndexSearcher只能有一个对象才可以,所以在ArticleDocumentUtils中保存一个 并且引用它。
indexSearcher为了提高效率,也是在内存中有缓存的所以需要commit才能放入索引文件数据库中
数据库优化
每次添加数据在索引文件夹下有很多小文件,为了合并小文件提高效率
//优化,合并多个小文件为一个打文件
LuceneUtils.getIndexWriter.optimize();
//配置当小文件的数量达到多少个后就自动合并为一个大文件,最小2,默认10
LucenenUtils.getIndexWriter().setMergeFactor(3);
当增加数据的时候自动触发。
Lucene.java
1 package cn.itcast._util; 2 3 import java.io.File; 4 import java.io.IOException; 5 6 import org.apache.lucene.analysis.Analyzer; 7 import org.apache.lucene.analysis.standard.StandardAnalyzer; 8 import org.apache.lucene.index.CorruptIndexException; 9 import org.apache.lucene.index.IndexWriter;10 import org.apache.lucene.index.IndexWriter.MaxFieldLength;11 import org.apache.lucene.store.Directory;12 import org.apache.lucene.store.FSDirectory;13 import org.apache.lucene.store.LockObtainFailedException;14 import org.apache.lucene.util.Version;15 16 public class LuceneUtils {17 18 private static Directory directory; // 索引库目录19 private static Analyzer analyzer; // 分词器20 21 private static IndexWriter indexWriter;22 23 static {24 try {25 // 这里应是读取配置文件得到的索引库目录26 directory = FSDirectory.open(new File("./indexDir"));27 analyzer = new StandardAnalyzer(Version.LUCENE_30);28 } catch (IOException e) {29 throw new RuntimeException(e);30 }31 }32 33 /**34 * 获取全局唯一的IndexWriter对象35 * 36 * @return37 */38 public static IndexWriter getIndexWriter() {39 // 在第一次使用IndexWriter是进行初始化40 if (indexWriter == null) {41 synchronized (LuceneUtils.class) { // 注意线程安全问题42 if (indexWriter == null) {43 try {44 indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);45 System.out.println("=== 已经初始化 IndexWriter ===");46 } catch (Exception e) {47 throw new RuntimeException(e);48 }49 }50 }51 52 // 指定一段代码,会在JVM退出之前执行。53 Runtime.getRuntime().addShutdownHook(new Thread() {54 public void run() {55 try {56 indexWriter.close();57 System.out.println("=== 已经关闭 IndexWriter ===");58 } catch (Exception e) {59 throw new RuntimeException(e);60 }61 }62 });63 }64 65 return indexWriter;66 }67 68 public static Directory getDirectory() {69 return directory;70 }71 72 public static Analyzer getAnalyzer() {73 return analyzer;74 }75 76 }
ArticleDocumentUtils.java
1 package cn.itcast._util; 2 3 import org.apache.lucene.document.Document; 4 import org.apache.lucene.document.Field; 5 import org.apache.lucene.document.Field.Index; 6 import org.apache.lucene.document.Field.Store; 7 import org.apache.lucene.util.NumericUtils; 8 9 import cn.itcast._domain.Article;10 11 public class ArticleDocumentUtils {12 13 /**14 * 把Article转为Document15 * 16 * @param article17 * @return18 */19 public static Document articleToDocument(Article article) {20 Document doc = new Document();21 22 String idStr = NumericUtils.intToPrefixCoded(article.getId()); // 一定要使用Lucene的工具类把数字转为字符串!23 24 doc.add(new Field("id", idStr, Store.YES, Index.NOT_ANALYZED)); // 注意:唯一标示符一般选择Index.NOT_ANALYZED25 doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));26 doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));27 28 return doc;29 }30 31 /**32 * 把Document转为Article33 * 34 * @param doc35 * @return36 */37 public static Article documentToArticle(Document doc) {38 Article article = new Article();39 40 Integer id = NumericUtils.prefixCodedToInt(doc.get("id")); // 一定要使用Lucene的工具类把字符串转为数字!41 42 article.setId(id);43 article.setTitle(doc.get("title"));44 article.setContent(doc.get("content"));45 46 return article;47 }48 49 }
QueryResult.java
1 package cn.itcast._domain; 2 3 import java.util.List; 4 5 public class QueryResult { 6 private List list; // 一段数据列表 7 private int count; // 总记录数 8 9 public QueryResult(List list, int count) {10 this.list = list;11 this.count = count;12 }13 14 public List getList() {15 return list;16 }17 18 public void setList(List list) {19 this.list = list;20 }21 22 public int getCount() {23 return count;24 }25 26 public void setCount(int count) {27 this.count = count;28 }29 30 }
ArticleIndexDao.java
1 package cn.itcast.b_indexdao; 2 3 import java.io.IOException; 4 import java.util.ArrayList; 5 import java.util.List; 6 7 import org.apache.lucene.document.Document; 8 import org.apache.lucene.index.Term; 9 import org.apache.lucene.queryParser.MultiFieldQueryParser; 10 import org.apache.lucene.queryParser.QueryParser; 11 import org.apache.lucene.search.IndexSearcher; 12 import org.apache.lucene.search.Query; 13 import org.apache.lucene.search.TopDocs; 14 import org.apache.lucene.util.NumericUtils; 15 import org.apache.lucene.util.Version; 16 17 import cn.itcast._domain.Article; 18 import cn.itcast._domain.QueryResult; 19 import cn.itcast._util.ArticleDocumentUtils; 20 import cn.itcast._util.LuceneUtils; 21 22 public class ArticleIndexDao { 23 24 /** 25 * 保存到索引库(建立索引) 26 * 27 * @param article 28 */ 29 public void save(Article article) { 30 // 1,把Article转为Document 31 Document doc = ArticleDocumentUtils.articleToDocument(article); 32 33 // 2,添加到索引库中 34 try { 35 LuceneUtils.getIndexWriter().addDocument(doc); // 添加 36 LuceneUtils.getIndexWriter().commit(); // 提交更改 37 } catch (Exception e) { 38 throw new RuntimeException(e); 39 } 40 } 41 42 /** 43 * 删除索引 44 * 45 * Term :某字段中出现的某一个关键词(在索引库的目录中) 46 * 47 * @param id 48 */ 49 public void delete(Integer id) { 50 try { 51 String idStr = NumericUtils.intToPrefixCoded(id); // 一定要使用Lucene的工具类把数字转为字符串! 52 Term term = new Term("id", idStr); 53 54 LuceneUtils.getIndexWriter().deleteDocuments(term); // 删除所有含有这个Term的Document 55 LuceneUtils.getIndexWriter().commit(); // 提交更改 56 } catch (Exception e) { 57 throw new RuntimeException(e); 58 } 59 } 60 61 /** 62 * 更新索引 63 * 64 * @param article 65 */ 66 public void update(Article article) { 67 try { 68 Term term = new Term("id", NumericUtils.intToPrefixCoded(article.getId())); // 一定要使用Lucene的工具类把数字转为字符串! 69 Document doc = ArticleDocumentUtils.articleToDocument(article); 70 71 LuceneUtils.getIndexWriter().updateDocument(term, doc); // 更新就是先删除再添加 72 LuceneUtils.getIndexWriter().commit(); // 提交更改 73 74 // indexWriter.deleteDocuments(term); 75 // indexWriter.addDocument(doc); 76 } catch (Exception e) { 77 throw new RuntimeException(e); 78 } 79 } 80 81 /** 82 * * 搜索 用于分页的 83 * 84 * @param queryString 85 * 查询条件 86 * @param first 87 * 从结果列表的哪个索引开始获取数据 88 * @param max 89 * 最多获取多少条数据(如果没有这么多,就把剩余的都返回) 90 * 91 * @return 一段数据列表 + 符合条件的总记录数 92 */ 93 public QueryResult search(String queryString, int first, int max) { 94 IndexSearcher indexSearcher = null; 95 try { 96 // 1,把查询字符串转为Query对象(在title与content中查询) 97 QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "title", "content" }, LuceneUtils.getAnalyzer()); 98 Query query = queryParser.parse(queryString); 99 100 // 2,执行查询,得到中间结果101 indexSearcher = new IndexSearcher(LuceneUtils.getDirectory());102 TopDocs topDocs = indexSearcher.search(query, first + max); // 最多返回前n条数据,这里要计算好,要返回足够数量的数据103 int count = topDocs.totalHits; // 符合条件的总记录数104 105 // 3,处理数据106 List<Article> list = new ArrayList<Article>();107 int endIndex = Math.min(first + max, topDocs.scoreDocs.length); // 计算结束的边界108 109 for (int i = first; i < endIndex; i++) { // 应只取一段数据110 // 根据内部编号获取真正的Document数据111 int docId = topDocs.scoreDocs[i].doc;112 Document doc = indexSearcher.doc(docId);113 // 把Document转换为Article114 Article article = ArticleDocumentUtils.documentToArticle(doc);115 list.add(article);116 }117 118 // 4,封装结果并返回119 return new QueryResult(list, count);120 121 } catch (Exception e) {122 throw new RuntimeException(e);123 } finally {124 // 关闭IndexSearcher125 if (indexSearcher != null) {126 try {127 indexSearcher.close();128 } catch (IOException e) {129 throw new RuntimeException(e);130 }131 }132 }133 }134 }
不分页的查询
LuceneUtils.getIndexWriter()
1 public List<Article> searchArticle(String condition) { 2 // 执行搜索 3 List<Article> list = new ArrayList<Article>(); 4 IndexSearcher indexSearcher = null; 5 try { 6 // 1,把查询字符串转为Query对象(默认只从title中查询) 7 QueryParser queryParser = new MultiFieldQueryParser( 8 Version.LUCENE_30, new String[] { "title", "content" }, 9 LuceneUtils.getAnalyzer());10 Query query = queryParser.parse(condition);11 12 // 2,执行查询,得到中间结果13 //indexSearcher = new IndexSearcher(LuceneUtils.getDirectory()); // 指定所用的索引库,会引发线程安全问题
indexSearcher=LuceneUtils.getIndexWriter();
14 TopDocs topDocs = indexSearcher.search(query, 1000); // 最多返回前n条结果15 int count = topDocs.totalHits;16 System.out.println("scoreDocs.length"+topDocs.scoreDocs.length); //一样17 System.out.println("count"+count); //一样18 ScoreDoc[] scoreDocs = topDocs.scoreDocs;19 20 // 3,处理结果21 for (int i = 0; i < scoreDocs.length; i++) {22 ScoreDoc scoreDoc = scoreDocs[i];23 float score = scoreDoc.score; // 相关度得分24 int docId = scoreDoc.doc; // Document的内部编号25 26 // 根据编号拿到Document数据27 Document document = indexSearcher.doc(docId);28 29 // 把Document转为Article30 Article article=ArticleDocumentUtils.documentToArticle(document);31 32 list.add(article);33 }34 } catch (Exception e) {35 throw new RuntimeException();36 } finally {37 try {38 if (null != indexSearcher)39 indexSearcher.close();40 } catch (Exception e) {41 e.printStackTrace();42 }43 }44 return list;45 }
ArticleIndexDaoTest.java
1 package cn.itcast.b_indexdao; 2 3 import java.util.List; 4 5 import org.junit.Test; 6 7 import cn.itcast._domain.Article; 8 import cn.itcast._domain.QueryResult; 9 10 public class ArticleIndexDaoTest {11 12 private ArticleIndexDao indexDao = new ArticleIndexDao();13 14 @Test15 public void testSave() {16 // 准备数据17 Article article = new Article();18 article.setId(1);19 article.setTitle("准备Lucene的开发环境");20 article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。");21 22 // 放到索引库中23 indexDao.save(article);24 }25 26 @Test27 public void testSave_25() {28 for (int i = 1; i <= 25; i++) {29 // 准备数据30 Article article = new Article();31 article.setId(i);32 article.setTitle("准备Lucene的开发环境");33 article.setContent("如果信息检索系统在用户发出了检索请求后再去互联网上找答案,根本无法在有限的时间内返回结果。");34 35 // 放到索引库中36 indexDao.save(article);37 }38 }39 40 @Test41 public void testDelete() {42 indexDao.delete(1);43 }44 45 @Test46 public void testUpdate() {47 // 准备数据48 Article article = new Article();49 article.setId(1);50 article.setTitle("准备Lucene的开发环境");51 article.setContent("这是更新后的内容");52 53 // 更新到索引库中54 indexDao.update(article);55 }56 //用于分页的57 @Test58 public void testSearch() {59 // 准备查询条件60 String queryString = "lucene";61 // String queryString = "hibernate";62 63 // 执行搜索64 // QueryResult qr = indexDao.search(queryString, 0, 10000);65 66 // QueryResult qr = indexDao.search(queryString, 0, 10); // 第1页,每页10条67 // QueryResult qr = indexDao.search(queryString, 10, 10); // 第2页,每页10条68 QueryResult qr = indexDao.search(queryString, 20, 10); // 第3页,每页10条69 70 // 显示结果71 System.out.println("总结果数:" + qr.getCount());72 for (Article a : (List<Article>) qr.getList()) {73 System.out.println("------------------------------");74 System.out.println("id = " + a.getId());75 System.out.println("title = " + a.getTitle());76 System.out.println("content = " + a.getContent());77 }78 }79 80 }
不分页查询测试
1 @Test 2 public void testSearchArticle() { 3 // 准备查询条件 4 String queryString = "lucene的"; 5 // String queryString = "hibernate"; 6 7 // 执行搜索 8 List<Article> list =dao.searchArticle(queryString); 9 10 // 显示结果11 System.out.println("总结果数:" + list.size());12 for (Article a : list) {13 System.out.println("------------------------------");14 System.out.println("id = " + a.getId());15 System.out.println("title = " + a.getTitle());16 System.out.println("content = " + a.getContent());17 }18 }
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。