elasticsearch-搜索引擎-简单开发(3)

时间:2022-05-28 17:12:59

  最近在网络爬虫、nlp群320349384中,对es搜索应用研发有所增加,在此进一步介绍搜索引擎应用研发的经典操作增删改查,由于在此应用中改的部分是相对很少的,故在此只讲增、查、删,即添加索引、条件检索、索引删除。

一、添加索引

  添加索引的步骤包括

   A、es搜索客户端创建

   B、要添加的目标数据

   C、将目标数据转化为可索引对象

   D、将索引对象(集合)通过A中的client发送给服务器端进行实际的索引操作。

   下面仅以jest第三方包的api方式解决以上提到的步骤

  A步代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
//传入ip、port、构建client创建工厂类
     public   ESCommonOperatorUtil(String es_index_server_ip,  int   data_port,
             int   admin_port) {
         this .server_ip = es_index_server_ip;
         this .data_port = data_port;
         this .admin_port = admin_port;
         factory.setHttpClientConfig( new   HttpClientConfig.Builder(
                 StaticValue.prefix_http + es_index_server_ip +  ":"   + data_port)
                 .multiThreaded( true ).build());
     }
     /**
      * 获取一个jest的对象
      *
      * @return
      */
     public   JestClient getJestClient() {
         JestClient client = factory.getObject();
         return   client;
     }
 

    B步代码:    

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
//将任意对象要索引的数据对象,转化为可索引地象
 
    private   static   List<Index> convertToIndexList(List pojoList) {
 
        List<Index> indexList =  new   ArrayList<Index>();
 
        Index index =  null ;
 
        for   (Object obj : pojoList) {
 
            index =  new   Index.Builder(obj).build();
 
            indexList.add(index);
 
        }
 
        return   indexList;
 
    }

    C和D步代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// 批量增加对象集合数据加进ES索引中,indexName为索引名称,typeName为索引类型,indexList为批量可索引对象集合
 
     public   void   addIndexToES(String indexName, String typeName,
 
             List<Index> indexList) {
 
         if   (indexList ==  null   || indexList.isEmpty()) {
 
             return ;
 
         }
 
         // 通过manager类得到jest client
 
         ESJestClientPojo esJestClientPojo = esClientManager.getJestClient();
 
         // 构建索引名称、索引类型的builder,此为加入索引数据的前提条件
 
         Bulk.Builder builder =  new   Bulk.Builder().defaultIndex(indexName)
 
                 .defaultType(typeName);
 
         // 每个index就是一个可索引对象,将其批量构建成json串数据
 
         builder.addAction(indexList).build();
 
         // 构造进可执行实质索引操作的批量操作工具类中
 
         Bulk bulk2 =  new   Bulk(builder);
 
  
 
         // 遇到time out时,进行重新请求
 
         int   repeat_time =  0 ;
 
         int   continious_exception_count =  0 ;
 
         while   (repeat_time <= SystemParas.es_index_fail_max_time) {
 
             try   {
 
                 //实质执行将数据转化为成索引的索引操作
 
                 esJestClientPojo.getJestClient().execute(bulk2);
 
                 logger.info(esJestClientPojo.getPrefix_sign() +  "index ok!" );
 
                 break ;
 
             }  catch   (SocketTimeoutException timeout) {
 
                 repeat_time++;
 
                 logger.info(esJestClientPojo.getPrefix_sign()
 
                         +  "occur timeout when indexing,will do it try again!" );
 
                 try   {
 
                     Thread.sleep(SystemParas.es_index_fail_waitting_time);
 
                 }  catch   (Exception e) {
 
                     e.printStackTrace();
 
                 }
 
                 continious_exception_count++;
 
  
 
                 // 更换新的jest client
 
                 if   (continious_exception_count == SystemParas.es_client_continious_exception_count_max) {
 
                     esJestClientPojo = esClientManager
 
                             .getNewClient(esJestClientPojo);
 
                 }
 
             }  catch   (Exception e) {
 
                 repeat_time++;
 
                 logger.info(esJestClientPojo.getPrefix_sign()
 
                         +  "occur unknown error when indexing,will do it try again!" );
 
                 e.printStackTrace();
 
                 try   {
 
                     Thread.sleep(SystemParas.es_index_fail_waitting_time);
 
                 }  catch   (Exception sleepException) {
 
                     sleepException.printStackTrace();
 
                 }
 
                 continious_exception_count++;
 
                 // 更换新的jest client
 
                 if   (continious_exception_count == SystemParas.es_client_continious_exception_count_max) {
 
                     esJestClientPojo = esClientManager
 
                             .getNewClient(esJestClientPojo);
 
                 }
 
             }
 
             if   (repeat_time > SystemParas.es_index_fail_max_time) {
 
                 logger.info( "the error index times is to the max try,will abandom this data!" );
 
                 break ;
 
             }
 
         }
 
         esClientManager.pushJestClient(esJestClientPojo);


二、条件检索

    步骤包括

    A、创建客户端client对象,类同于一中的。

    B、构建查询对象SearchSourceBuilder

    C、搜索操作及结果转化

   其中B步代码:   

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
// 通过各种查询条件构建可查询对象SearchSourceBuilder,其逻辑类似于sql查询
 
     private   static   SearchSourceBuilder getSearchSourceBuilder(
 
             SearchConditionPojo searchConditionPojo) {
 
         List<SearchConditionItem> searchConditionItemList =  null ;
 
         if   (searchConditionPojo ==  null
 
                 || StringOperatorUtil
 
                         .isBlankCollection(searchConditionItemList = searchConditionPojo
 
                                 .getSearchConditionList())) {
 
             return   null ;
 
         }
 
         // 布尔查询
 
         BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
 
         // 把list转化为es searchbuilder
 
         for   (SearchConditionItem conditionPojo : searchConditionItemList) {
 
             // and查询构建
 
             if   (conditionPojo.getSearchType() == SearchType.AND) {
 
                 boolQueryBuilder = boolQueryBuilder.must(QueryBuilders
 
                         .termQuery(conditionPojo.getName(),
 
                                 conditionPojo.getValue()));
 
             }  else   if   (conditionPojo.getSearchType() == SearchType.RANGE) { // 范围查询构建
 
                 boolQueryBuilder = boolQueryBuilder.must(QueryBuilders
 
                         .rangeQuery(conditionPojo.getName())
 
                         .from(conditionPojo.getFromObj())
 
                         .to(conditionPojo.getToObj())
 
                         .includeLower(conditionPojo.isIncludeLower())
 
                         .includeUpper(conditionPojo.isIncludeHigher()));
 
             }
 
         }
 
         SearchSourceBuilder searchSourceBuilder =  new   SearchSourceBuilder();
 
         searchSourceBuilder.query(boolQueryBuilder);
 
         // 排序字段构建
 
         for   (SearchConditionItem conditionPojo : searchConditionItemList) {
 
             if   (conditionPojo.getSearchType() == SearchType.SORT) {
 
                 if   (conditionPojo.getSortOrderEnum().toString()
 
                         .equals(SortOrder.ASC.toString())) {
 
                     searchSourceBuilder.sort(conditionPojo.getName(),
 
                             SortOrder.ASC);
 
                 }  else   {
 
                     searchSourceBuilder.sort(conditionPojo.getName(),
 
                             SortOrder.DESC);
 
                 }
 
             }
 
         }
 
  
 
         // 选择搜索命中结果集的起始偏移量及多少个元素
 
         searchSourceBuilder.from(searchConditionPojo.getStart());
 
         searchSourceBuilder.size(searchConditionPojo.getPageSize());
 
  
 
         return   searchSourceBuilder;
 
     }

C步代码:

   有两种结果的返回形式,即对象和json串形式  

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
// 通过搜索返回结果,以对象形式
 
     public   List<T> search4RetObj(String indexName, String indexType,
 
             SearchSourceBuilder searchSourceBuilder, Class<T> resultClass) {
 
         SearchResult searchResult = search(indexName, indexType,
 
                 searchSourceBuilder);
 
         if   (searchResult ==  null ) {
 
             return   null ;
 
         }
 
         List<SearchResult.Hit<T, Void>> hits = searchResult
 
                 .getHits(resultClass);
 
         List<T> resultClassList =  new   LinkedList<T>();
 
  
 
         Iterator<Hit<T, Void>> hitsList = hits.iterator();
 
         while   (hitsList.hasNext()) {
 
             resultClassList.add(hitsList.next().source);
 
         }
 
         return   resultClassList;
 
     }
 
  
 
     // 通过搜索返回结果,以json串形式
 
     public   String search4RetJson(String indexName, String indexType,
 
             SearchSourceBuilder searchSourceBuilder, Class<T> resultClass) {
 
         SearchResult searchResult = search(indexName, indexType,
 
                 searchSourceBuilder);
 
         if   (searchResult ==  null ) {
 
             return   null ;
 
         }
 
         String jsonResult = searchResult.getJsonString();
 
  
 
         return   jsonResult;
 
     }

三、索引删除

 有多种情况,如直接删除某索引库,或是按指定条件删除索引

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
//直接删除指定索引库
 
    public   void   deleteByIndexName(String indexName) {
 
        ESJestClientPojo esJestClientPojo = esClientManager.getJestClient();
 
        DeleteIndex dIndex =  new   DeleteIndex( new   DeleteIndex.Builder(indexName));
 
 
 
        // 遇到time out时,进行重新请求
 
        int   repeat_time =  0 ;
 
        while   (repeat_time < SystemParas.es_index_fail_max_time) {
 
            try   {
 
                esJestClientPojo.getJestClient().execute(dIndex);
 
                logger.info(esJestClientPojo.getPrefix_sign()
 
                        +  "delete index ok!" );
 
                break ;
 
            }  catch   (SocketTimeoutException timeout) {
 
                repeat_time++;
 
                logger.info(esJestClientPojo.getPrefix_sign()
 
                        +  "occur timeout when deleteByIndexName,will do it try again!" );
 
                try   {
 
                    Thread.sleep(SystemParas.es_index_fail_waitting_time);
 
                }  catch   (Exception e) {
 
                    e.printStackTrace();
 
                }
 
                continue ;
 
            }  catch   (Exception e) {
 
                e.printStackTrace();
 
                logger.info(esJestClientPojo.getPrefix_sign()
 
                        +  "to add one item index occur error,will jump the item!" );
 
                break ;
 
            }
 
        }
 
        esClientManager.pushJestClient(esJestClientPojo);
 
    }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
/**
 
      * 以下是ES端提供的client来操作索引端,通过构建QueryBuilder来达到按指定条件来删除索引
 
      * @param args
 
      */
 
     public   void   deleteIndexByQuery(String indexName, QueryBuilder queryBuilder) {
 
         ESSelfClientPojo esClientPojo = esClientManager.getESClient();
 
  
 
         // 遇到time out时,进行重新请求
 
         int   repeat_time =  0 ;
 
         while   (repeat_time < SystemParas.es_index_fail_max_time) {
 
             try   {
 
                 esClientPojo.getEsClient().prepareDeleteByQuery(indexName)
 
                         .setQuery(queryBuilder).execute().actionGet();
 
                 logger.info(esClientPojo.getPrefix_sign() +  "delete index ok!" );
 
                 break ;
 
             }  catch   (Exception e) {
 
                 e.printStackTrace();
 
                 logger.info(esClientPojo.getPrefix_sign()
 
                         +  "delete index fail,please try again!" );
 
             }
 
             repeat_time++;
 
         }
 
         esClientManager.pushESClient(esClientPojo);
 
     }


时间原因就暂写到这,有发现问题欢迎交流。交流促进成长,共享成就未来,欢迎加入网络技术、nlp群320349384,共同交流进步。