当我们有了最新的庖丁解牛分词包和lucene3.3或Lucene3.4即可以运行以下示例:(我已将源码和使用到的文件以及生成的索引文件上传了:http://download.csdn.net/detail/a_2cai/3671272)
为了简单起见我建了一个txt文件:内容如下(就如一个数据库的不同条目)
172.7.14.198::172.7.19.71::DS-2DF1-4010020090611AACH290005648WC::移动侦测::2011/9/1172.7.14.198::172.7.24.51::DS-9016HF-S1620100809BBRR401273372WCVU::移动::2011/9/1172.7.14.198::172.7.24.51::DS-9016HF-S1620100809BBRR401273372WCVUC::移动侦测::2011/9/1172.7.14.198::172.7.19.71::DS-2DF1-4010020090611AACH290005648WC::Lucene测试::2011/9/1172.7.14.198::172.7.19.71::遮挡一下::遮挡报警::2011/9/1172.7.14.198::172.7.19.71::遮经挡报警::遮挡报警::2011/9/1172.7.14.198::172.7.19.71::多域测试::移动侦测::2011/9/1172.7.14.198::172.7.19.71::多域测试::移动侦测::2011/9/1172.7.14.198::172.7.19.71::DS-2DF1-4010020090611AACH290005648WC::移动侦测::2011/9/1172.7.14.198::172.7.24.51::DS-9016HF-S1620100809BBRR401273372WCVU::磁盘已满::2011/8/31172.7.14.198::172.7.19.71::DS-2DF1-4010020090611AACH290005648WC::移动侦测::2011/9/1172.7.14.198::172.7.19.71::DS-2DF1-4010020090611AACH290005648WC::移动侦测::2011/9/1172.7.14.198::172.7.24.51::DS-9016HF-S1620100809BBRR401273372WCVU::移动::2011/9/1172.7.14.198::172.7.24.51::DS-9016HF-S1620100809BBRR401273372WCVUC::移动侦测::2011/9/1172.7.14.198::172.7.19.71::DS-2DF1-4010020090611AACH290005648WC::Lucene测试::2011/9/1172.7.14.198::172.7.19.71::DS-2DF1-4010020090611AACH290005648WC::遮挡报警::2011/9/1172.7.14.198::172.7.19.71::DS-2DF1-4010020090611AACH290005648WC::移动侦测指的是将一个汉字序列切分成一个一个单独的词::2011/9/1172.7.14.198::172.7.24.51::DS-9016HF-S1620100809BBRR401273372WCVU::查询方式总体来讲分两类:查询API查询和语法查询::2011/8/31172.7.14.198::172.7.24.51::DS-9016HF-S1620100809BBRR401273372WCVU::对于查询时的Field名一定要大小写对应,默认情况下要查询的关键字要转成小写,这在lucene建索引的时候做过特殊处理::2011/8/31建索引:
public static void testIndex() throws Exception{ String itemFilePath = "TestDocs/luceneDemoTest.txt"; boolean isCreate = false; Date start = new Date(); try { System.out.println("Indexing to directory '" + indexFilePath + "'..."); Directory dir = FSDirectory.open(new File(indexFilePath)); //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33); //Analyzer analyzer = new PaodingAnalyzer("etc/paoding-analysis-default.properties"); //Analyzer analyzer = new PaodingAnalyzer("F:/paodingetc/paoding-analysis-default.properties"); //Analyzer analyzer = new PaodingAnalyzer("ifexists:paoding-dic-home.properties"); Analyzer analyzer = new PaodingAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_33, analyzer); if (isCreate) { //创建新索引删除旧索引 iwc.setOpenMode(OpenMode.CREATE); } else { // 向索引中添加新的Document iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); FileInputStream fis; try { fis = new FileInputStream(itemFilePath); } catch (FileNotFoundException fnfe) { return; } try { //new BufferedReader(); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fis, "UTF-8")); String item = ""; String[] fields; item = bufferedReader.readLine(); Integer id = 0; while(!"".equals(item) && item != null) { id++; fields = item.split("::"); // 创建空文档 Document doc = new Document(); // NumericField idField = new NumericField("ID", Field.Store.YES, true); // idField.setIntValue(id); // doc.add(idField); doc.add(new Field("ID", id.toString(),Field.Store.YES, Field.Index.NO)); doc.add(new Field("IDSTR", id.toString(),Field.Store.YES, Field.Index.NOT_ANALYZED));//不分词索引,其实没必要索引为了测试所以索引 NumericField idField = new NumericField("IDNUM", Field.Store.YES, true); idField.setIntValue(id); doc.add(idField);//用于测试数字 doc.add(new Field("PCIP", fields[0],Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("DeviceIP", fields[1],Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("DeviceSerialNum", fields[2],Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("AlarmType", fields[3],Field.Store.YES, Field.Index.ANALYZED)); //一个域可以有子集 doc.add(new Field("MultiFields", fields[2],Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("MultiFields", fields[3],Field.Store.YES, Field.Index.ANALYZED)); NumericField alarmDatetime = new NumericField("alarmDatetime", Field.Store.YES, true); SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM/dd"); alarmDatetime.setLongValue(simpleDateFormat.parse(fields[4]).getTime()); doc.add(alarmDatetime); if (id == 5) { doc.setBoost(1.2f); } if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(doc); } else { writer.updateDocument(new Term("ID" , id.toString()), doc); } item = bufferedReader.readLine(); } } finally { fis.close(); } //writer.optimize();//优化索引 writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
检索:
public static void testSearch() throws Exception{ String field = "AlarmType"; boolean raw = false; String queryString = "移动"; int hitsPerPage = 10; IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(indexFilePath))); //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33); //Analyzer analyzer = new PaodingAnalyzer(); BufferedReader in = null; //QueryParser parser = new QueryParser(Version.LUCENE_33, field, analyzer); //Query query = parser.parse(queryString.trim()); Query query = new TermQuery(new Term(field,"查询")); System.out.println("Searching for: " + query.toString(field)); // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage, Sort.RELEVANCE); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); System.out.println("ID:" + doc.get("ID") + "\tPCIP:" + doc.get("PCIP") + "\tDeviceIP:" + doc.get("DeviceIP") + "\tDeviceSerialNum:" + doc.get("DeviceSerialNum") + "\tAlarmType:" + doc.get("AlarmType") + "\tAlarmDatetime:" + new Date(Long.parseLong(doc.get("alarmDatetime"))).toString()); } }
以上为建索引和检索的简单实例,大家可以下载源码运行调试下,对于初接触的人使用会有帮助的。Demo源码以及相关配置信息见eclipse工程:http://download.csdn.net/detail/a_2cai/3671272