Lucene之——搜索实例

时间:2021-07-20 03:08:50

转载请注明出处:http://blog.csdn.net/l1028386804/article/details/49287663

一个搜索功能:要求将所有包括搜索字段的文章的标题列出来(文章的内容存储在Oracle的CLOB字段中),也就是要用Lucene实现对数据库的大字段进行索引和搜索。创建lucene通过java定时任务来完成。

定时调用建立索引方法

package com.qqw.index;

import java.util.Timer;

public class IndexerServer {
	 /** 
     * 定时调用建立索引任务 
     * @author liuyazhuang
     * @create 2015-10-20
     */  
    public static void main(String[] args) {  
        String propFile = "directory.properties";  
        Config.setConfigFileName(propFile);  
        Timer   timer = new Timer();  
        LuceneDBIndexerTask luceneTask=LuceneDBIndexerTask.getInstance();   
        timer.scheduleAtFixedRate(luceneTask, 0,DataTypeUtil.toLong(Constant.CREATE_INDEX_SLEEP_TIME));  
    }  
}
建立索引的核心实现
package com.qqw.index;
import java.io.BufferedReader;  
import java.io.File;  
import java.io.IOException;  
import java.io.StringWriter;  
import java.sql.Connection;  
import java.sql.DriverManager;  
import java.sql.ResultSet;  
import java.sql.SQLException;  
import java.sql.Statement;  
import java.text.SimpleDateFormat;  
import java.util.Arrays;  
import java.util.Date;  
import java.util.TimerTask;  
  
import oracle.sql.CLOB;  
  
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
  
  /** 
  * 建立索引的任务类 
  * @author liuyazhuang
  * @create 2015-10-20
 */  
public class LuceneDBIndexerTask extends TimerTask {  
    //缺省索引目录
    private static String DEFAULT_INDEX_DIR="C:\\IndexDB";  
     //临时索引目录的父目录  
    private File parentDir=null;  
     //被搜索的索引文件  
    private static LuceneDBIndexerTask index=new LuceneDBIndexerTask();  
      
    //构造方法  
    private LuceneDBIndexerTask(){  
        String dirStr=Constant.INDEX_STORE_DIRECTORY;  
        if(dirStr!=null&&!"".equals(dirStr)){  
            this.parentDir=new File(dirStr);  
        }else{  
            this.parentDir=new File(DEFAULT_INDEX_DIR);  
        }  
        if(!this.parentDir.exists()){  
            this.parentDir.mkdir();  
        }  
    }  
    /** 
     * 单实例访问接口 
     * @return 
     */  
    public static LuceneDBIndexerTask getInstance(){  
        return index;  
    }  
     /** 
     * 锁定目录以及文件 
     * 只允许单线程访问 
     * 
     */  
    /*public synchronized  void singleRunning(){ 
        if(flag==false){ 
            flag=true; 
            run(parentDir);  
        } 
    }*/  
  
     /** 
     * 为数据库字段建立索引 
     */  
    public void run()  {  
        System.out.println("====LuceneDBIndexerTask$run()===============");  
        System.out.println("~~~开始建立索引文件~~~~~~~~~~~~~~~");  
        Connection conn=null;  
        Statement stmt=null;  
        ResultSet rs=null;
        String filedir="d:\\fileIndex\\blogs";
		File indexDir = new File(filedir);
        Analyzer analyzer = new IKAnalyzer();
		IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_33,
				analyzer);

		conf.setOpenMode(OpenMode.CREATE);
        try {  
             Class.forName(Constant.DB_DRIVER_STRING).newInstance();  
             conn = DriverManager.getConnection(Constant.DB_URI_STRING, Constant.DB_USERNAME, Constant.DB_PWD);  
             stmt = conn.createStatement();  
             rs = stmt.executeQuery(Constant.DB_QUERY_STRING);  
             File file=new File(parentDir+File.separator+new SimpleDateFormat("yyyyMMddHHmmss").format(new Date())+File.separator);  
             if(!file.exists()){  
                 file.mkdir();  
             }  
             IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir),
 					conf);
            long startTime = new Date().getTime();  
            while (rs.next()) {  
                Document doc = new Document();  
                 doc.add(new Field("ARTICLEID", rs.getString("ARTICLEID"), Field.Store.YES,Field.Index.ANALYZED));  
                 doc.add(new Field("TITLE", rs.getString("TITLE"), Field.Store.YES,Field.Index.ANALYZED));  
                 doc.add(new Field("USERNAME", rs.getString("USERNAME"), Field.Store.YES,Field.Index.ANALYZED));  
                 doc.add(new Field("USERID", rs.getString("USERID"), Field.Store.YES,Field.Index.ANALYZED));  
                 //对日期建立索引  
                 String createdate=new SimpleDateFormat("yyyy-MM-dd").format(rs.getTimestamp("CREATEDATE"));  
                 doc.add(new Field("CREATEDATE", createdate, Field.Store.YES,Field.Index.ANALYZED));  
                 //对大字段建立索引  
                 BufferedReader in=null;  
                 String content="";  
                 CLOB clob =  (CLOB) rs.getClob("CONTENT");  
                 if (clob != null) {  
                    //得到一个读入流  
                  in=new BufferedReader(clob.getCharacterStream());  
                  StringWriter out=new StringWriter();  
                  int c;  
                  while((c=in.read())!=-1){  
                      out.write(c);  
                   }  
                  content=out.toString();  
                }  
                doc.add(new Field("CONTENT", content, Field.Store.YES, Field.Index.ANALYZED));  
                writer.addDocument(doc);  
            }  
            writer.optimize();  
            writer.close();  
              
            //测试一下索引的时间     
            long endTime = new Date().getTime();  
            System.out.println("索引文件"+file.getPath()+"建立成功...");  
            System.out.println("这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!");  
            //判断文件目录file下的文件个数如果大于3,就将文件建立最早的文件给删除掉  
            checkFiles(parentDir);  
        } catch (IOException e) {  
            e.printStackTrace();  
        } catch (SQLException e) {  
            e.printStackTrace();  
        } catch (ClassNotFoundException e) {  
            e.printStackTrace();  
        } catch (InstantiationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IllegalAccessException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally{  
         try {  
             if(rs!=null){  
               rs.close();     
             }  
             if(stmt!=null){  
                 stmt.close();     
              }   
             if(conn!=null){  
                 conn.close();     
              }   
            } catch (SQLException e) {  
                e.printStackTrace();  
            }  
        }  
    }  
      
     /** 
     * 判断文件目录file下的文件个数如果大于3,就将文件建立最早的文件给删除掉 
     */  
    public  void checkFiles(File dir) {  
        int length=dir.listFiles().length;  
        while(length>3){  
            //删除生成最早的文件  
         File [] files=dir.listFiles();  
         String[] names=dir.list();  
         Arrays.sort(names);  
         File deletefile=files[0];  
         deleteDirectory(deletefile);  
         length--;  
       }  
    }  
    /* 
     * 递归删除一个目录以及下面的文件 
     */  
    public boolean deleteDirectory(File path) {     
        if( path.exists() ) {     
          File[] files = path.listFiles();     
          for(int i=0; i<files.length; i++) {     
             if(files[i].isDirectory()) {     
               deleteDirectory(files[i]);    
             }     
             else {     
                 //删除文件  
               files[i].delete();     
             }     
          }     
        }     
        //删除目录  
        boolean hasdelete=path.delete();  
        if(hasdelete){  
            System.out.println("删除索引目录"+path);  
        }  
        return hasdelete;  
      }     
    public static void main(String[] args) {  
      new LuceneDBIndexerTask().run();  
    }  
  
}  
配置文件管理类:
package com.qqw.index;
import java.io.IOException;  
import java.io.InputStream;  
import java.util.Properties;  
/** 
 *  
 * @author liuyazhuang
 * @create 2015-10-20
 * 
 */  
public class Config {  
    private static Config cfg = null;  
    private static String configFileName = null;  
    private Properties props;  
    public Config() {  
        props = new java.util.Properties();  
    }  
    /** 
     * 单例访问接口 
     * @return 
     */  
    public synchronized static Config getInstance() {  
        if (cfg == null) {  
            cfg = new Config();  
            cfg.loadConfig();  
            return cfg;  
        } else {  
            return cfg;  
        }  
    }  
    private int loadConfig() {  
        if (configFileName != null || configFileName.length() > 0) {  
            InputStream inputStream = Config.class.getClassLoader()  
                    .getResourceAsStream("directory.properties");  
            System.out.println("configFileName=" + configFileName);  
            try {  
                props.load(inputStream);  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
            return 1;  
        }  
        return 0;  
    }  
  
    public static void setConfigFileName(String cfg) {  
        configFileName = cfg;  
    }  
    public String getProperty(String keyName) {  
        return props.getProperty(keyName);  
    }  
  
}  
常量配置

package com.qqw.index;
/** 
 * 常量配置类 * 
 * @author liuyazhuang 
 * @create 2015-10-20 
 */  
public class Constant {  
  
    // 隔多长时间建立一次索引  
    public static final String CREATE_INDEX_SLEEP_TIME = Config.getInstance()  
            .getProperty("create_index_sleep_time");  
    // 索引文件存放路径  
    public static final String INDEX_STORE_DIRECTORY = Config.getInstance()  
            .getProperty("index_store_directory");  
    //数据库驱动程序  
    public static final String DB_DRIVER_STRING = Config.getInstance()  
    .getProperty("db_driver_string");  
    //数据库连接URI  
    public static final String DB_URI_STRING = Config.getInstance()  
    .getProperty("db_uri_string");  
    //数据库连接username  
    public static final String DB_USERNAME= Config.getInstance()  
    .getProperty("db_username");  
     //数据库连接pwd  
    public static final String DB_PWD= Config.getInstance()  
    .getProperty("db_pwd");  
    //数据库查询语句db_query_str  
    public static final String DB_QUERY_STRING= Config.getInstance()  
    .getProperty("db_query_string");  
      
}  
数据类型处理类:
package com.qqw.index;

/** 
 * 数据类型转换工具类 
 * @author liuyazhuang
 * @create 2015-10-20
 */  
public class DataTypeUtil {  
     /** 
     * 将对象转换为整数型 
     * @param o  源对象 
     * @return 对应的Long值,如果出错,则返回Long.MIN_VALUE 
     */  
    public static long toLong(Object o) {  
        if (o == null) {  
            throw new IllegalArgumentException("该对象为空");  
        }  
        String s = o.toString();  
        try {  
            return Long.parseLong(s);  
        } catch (Exception ex) {  
            return Long.MAX_VALUE;  
        }  
    }  
}  
配置文件
#== the directory for store lucene-index ========#  
index_store_directory=D\:/lucene/indexDB/  
  
#======== two hours ========#  
#create_index_sleep_time=7200000  
  
#======== two minutes ========#  
create_index_sleep_time=7200000  
db_driver_string=oracle.jdbc.driver.OracleDriver
db_uri_string=jdbc\:oracle\:thin\:@localhost\:1521\:orcl  
db_username=test  
db_pwd=test
db_query_string=SELECT  * from journalarticle
核心搜索类:
package com.qqw.search;
import java.io.File;  
import java.io.IOException;  
import java.util.ArrayList;  
import java.util.HashMap;
import java.util.List;  
import java.util.Map;
  
import org.apache.lucene.document.Document;  
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.Query;  
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;  
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;
  
/** 
 * 负责搜索的类
 * @author liuyazhuang
 * @create 2015-10-20
 */  
public class LuceneDBQuery {  
  
    private static LuceneDBQuery search = new LuceneDBQuery();  
  
    // 构造方法  
    private LuceneDBQuery() {  
  
    }  
  
    /** 
     * 单实例访问接口 
     *  
     * @return 
     */  
    public static LuceneDBQuery getInstance() {  
        return search;  
    }  
 
	public List<Map<String,Object>> seacherStr(String[] indexFields,String[] searchFields,String queryString,
			String searchdictory,String[] highlighterFields) {
		List<Map<String,Object>> list = null;
		TopDocs topDocs = null;
		Query query = null;
		IndexSearcher searcher = null;
		try {
			searcher = new IndexSearcher(FSDirectory.open(new File(
					searchdictory)), true);// read-only
			   
		     BooleanClause.Occur[] flags=new BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST};
		     query = IKQueryParser.parseMultiField(searchFields, queryString,flags);// 多个
			//在索引器中使用IKSimilarity相似度评估器
			searcher.setSimilarity(new IKSimilarity());

			// 准备高亮器
			Formatter formatter = new SimpleHTMLFormatter(
					"<span class=\"highlighter\">", "</span>");
			Scorer fragmentScorer = new QueryScorer(query);
			Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
			Fragmenter fragmenter = new SimpleFragmenter(100);// 高亮范围
			highlighter.setTextFragmenter(fragmenter);

			if (searcher != null) {
				topDocs = searcher.search(query, 100);// 100是显示队列的Size
				ScoreDoc[] hits = topDocs.scoreDocs;
				System.out.println("共有" + searcher.maxDoc() + "条索引,命中"
						+ hits.length + "条");
				list = new ArrayList<Map<String,Object>>();
				for (int i = 0; i < hits.length; i++) {//长度遍历
					ScoreDoc scoreDoc = topDocs.scoreDocs[i];// 读取第几条记录
					int docSn = scoreDoc.doc;
					// 文档内部编号
					Document document = searcher.doc(docSn);
					Map<String,Object> map=new HashMap<String, Object>();
					
					// 高亮
					for (int k = 0; k < indexFields.length; k++) {//遍历所有的字段
						
						map.put(indexFields[k], document.get(indexFields[k]));
						
						for (int j = 0; j < highlighterFields.length; j++) {//遍历要高亮的字段,要高亮的字段肯定小于等于所有的字段
						
							// 如果当前属性值中没有出现关键字,则返回null
							String hctemp = highlighter.getBestFragment(
									new IKAnalyzer(), "\""+highlighterFields[j]+"\"", document.get(highlighterFields[j]));
							if (hctemp == null) {
								hctemp = document.get(highlighterFields[j]);
							}
							map.put(highlighterFields[j], hctemp);
						}
					}
					list.add(map);
					
				}

			}
		}
		catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} 
		catch (InvalidTokenOffsetsException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		return list;
	}

	// 查询分页
	public List<Map<String,Object>> seacherStrbyPage(String[] indexFields,String[] searchFields,String queryString,
			String searchdictory, int firstResult, int maxResult,String[] highlighterFields) {

		List<Map<String,Object>> list = null;
		TopDocs topDocs = null;
		Query query = null;
		IndexSearcher searcher = null;
		try {
			searcher = new IndexSearcher(FSDirectory.open(new File(
					searchdictory)), true);// read-only
			// QueryParser qp = new QueryParser(Version.LUCENE_33, fields,
			// new StandardAnalyzer(Version.LUCENE_33));// 有变化的地方 单个字段关联
			//			
			// 使用IKQueryParser查询分析器构造Query对象

			// //声明BooleanClause.Occur[]数组,它表示多个条件之间的关系
			// BooleanClause.Occur[] flags=new
			// BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST};

			query = IKQueryParser.parseMultiField(searchFields, queryString);// 多个
			// //在索引器中使用IKSimilarity相似度评估器
			searcher.setSimilarity(new IKSimilarity());
			// query = IKQueryParser.parse(field, queryString);
			// QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_33,
			// fields,
			// new IKAnalyzer());// 有变化的地方 多个地段关联
			//			
			// query = qp.parse(queryString);

			// 准备高亮器
			Formatter formatter = new SimpleHTMLFormatter(
					"<span class=\"highlighter\">", "</span>");
			Scorer fragmentScorer = new QueryScorer(query);
			Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
			Fragmenter fragmenter = new SimpleFragmenter(100);// 高亮范围
			highlighter.setTextFragmenter(fragmenter);

			if (searcher != null) {
				topDocs = searcher.search(query, 100);// 100是显示队列的Size
				ScoreDoc[] hits = topDocs.scoreDocs;
				System.out.println("共有" + searcher.maxDoc() + "条索引,命中"
						+ hits.length + "条");
				list = new ArrayList<Map<String,Object>>();
				for (int i = firstResult - 1; i < firstResult + maxResult - 1; i++) {//按照分页的长度遍历
				//for (int i = 0; i < hits.length; i++) {//长度遍历
					ScoreDoc scoreDoc = topDocs.scoreDocs[i];// 读取第几条记录
					int docSn = scoreDoc.doc;
					// 文档内部编号
					Document document = searcher.doc(docSn);
					Map<String,Object> map=new HashMap<String, Object>();
					
					// 高亮
					for (int k = 0; k < indexFields.length; k++) {//遍历所有的字段
						
						map.put(indexFields[k], document.get(indexFields[k]));
						
						for (int j = 0; j < highlighterFields.length; j++) {//遍历要高亮的字段,要高亮的字段肯定小于等于所有的字段
						
							// 如果当前属性值中没有出现关键字,则返回null
							String hctemp = highlighter.getBestFragment(
									new IKAnalyzer(), "\""+highlighterFields[j]+"\"", document.get(highlighterFields[j]));
							if (hctemp == null) {
								hctemp = document.get(highlighterFields[j]);
							}
							map.put(highlighterFields[j], hctemp);
						}
					}
					list.add(map);
					
				}

			}
		}
		catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} 
		catch (InvalidTokenOffsetsException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		return list;
	}
	
	
	// 取得符合搜索条件的所有记录总数,以便分页 , 与上面方法类似
	public int getResultCount(String[] searchFields,String queryString, String searchdictory)
			throws Exception {
		TopDocs topDocs = null;
		Query query = null;
		IndexSearcher searcher = null;
		try {
			searcher = new IndexSearcher(FSDirectory.open(new File(
					searchdictory)), true);// read-only

			query = IKQueryParser.parseMultiField(searchFields, queryString);// 多个
			// //在索引器中使用IKSimilarity相似度评估器
			searcher.setSimilarity(new IKSimilarity());
			if (searcher != null) {
				topDocs = searcher.search(query, 100);// 100是显示队列的Size
			}
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		// ScoreDoc[] hits = topDocs.scoreDocs;取得还是hits的length
		return topDocs.scoreDocs.length;
	}
  
}  

配置文件管理类:
package com.qqw.search;

import java.io.IOException;  

import org.jdom.Document;  
import org.jdom.Element;  
import org.jdom.JDOMException;  
import org.jdom.input.SAXBuilder;  
 /** 
 * 配置文件的管理类
 * @author liuyazhuang
 * @create 2015-10-20
 */  
public class LuceneDBQueryUtil {  
      
 public static String getIndexPath(){  
  
        String filePath = "zxt_index.xml";  
        String indexPath="";  
        SAXBuilder builder = new SAXBuilder(false);  
        try {  
            Document doc = builder.build(Thread.currentThread().getContextClassLoader().getResource(filePath));  
            Element rootElement = doc.getRootElement();  
             Element index=rootElement.getChild("index");  
             indexPath=index.getText();  
             System.out.println(indexPath);  
        } catch (JDOMException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
        return indexPath;  
      
 }  
}  
通过ServletContextListener配置定时任务
package com.qqw.timer;

import java.util.Timer;

import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;

import com.qqw.index.Constant;
import com.qqw.index.LuceneDBIndexerTask;
/** 
 * 定时操作
 * @author liuyazhuang
 * @create 2015-10-20
 */  
public class MyListener implements ServletContextListener {
	  private Timer timer = null;

	  public void contextInitialized(ServletContextEvent event) {
	    timer = new Timer(true);
	    //设置任务计划,启动和间隔时间
	    timer.schedule(LuceneDBIndexerTask.getInstance(), 0,Long.valueOf(Constant.CREATE_INDEX_SLEEP_TIME));
	  }

	  public void contextDestroyed(ServletContextEvent event) {
	    timer.cancel();
	  }
}
web.xml 配置
<?xml version="1.0" encoding="UTF-8"?>
<web-app version="2.4" 
	xmlns="http://java.sun.com/xml/ns/j2ee" 
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
	xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee 
	http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd">
  <welcome-file-list>
    <welcome-file>index.jsp</welcome-file>
  </welcome-file-list>
  
  <!-- 配置servlet -->
 <servlet>
    <servlet-name>SearchServlet</servlet-name>
    <servlet-class>dataFromOracle.servlet.SearchServlet</servlet-class>
  </servlet>
  <servlet-mapping>
    <servlet-name>SearchServlet</servlet-name>
    <url-pattern>/SearchServlet</url-pattern>
  </servlet-mapping>
  
  <listener>
	<listener-class>com.qqw.timer.MyListener</listener-class>
</listener>
</web-app>
数据库表文件
-- Create table
create table JOURNALARTICLE
(
  ARTICLEID  NUMBER(10) not null,
  TITLE      VARCHAR2(255) not null,
  USERNAME   VARCHAR2(4000) not null,
  USERID     VARCHAR2(255) not null,
  CREATEDATE TIMESTAMP(6) not null,
  CONTENT    CLOB
);
-- Create/Recreate primary, unique and foreign key constraints 
alter table JOURNALARTICLE
  add constraint ARTICLEID primary key (ARTICLEID);
通过以上的代码,可以做到移植到新项目只需要修改配置文件即可。lucene索引建立,不需要考虑什么时候进行。只要保证数据库连接处于正常状态即可,索引字段和搜索字段都可以通知配置的形式表现出来。分页功能和高亮的功能都在其中。