--test:
DATABASE:dwtest(210开发库)
SQL> desc iquery.lpx_kw_tmp02
Name Type Nullable Default Comments
------------------ ------------- -------- ------- --------
KEYWORDS VARCHAR2(256) Y
COUNTRY_ID VARCHAR2(10) Y
COUNTRY_NAME VARCHAR2(256) Y
REGION_ID NUMBER Y
REGIN_NAME VARCHAR2(128) Y
PRE_REGIN_ID NUMBER Y
PRE_REGIN_ENAME VARCHAR2(128) Y
SRH_PV_THIS_5MON NUMBER Y
CATEGORY_LEAF_ID NUMBER Y
CATEGORY_ROOT_ID NUMBER Y
CATEGORY_LEVEL2_ID NUMBER Y
CATEGORY_LEVEL3_ID NUMBER Y
--从业务上利用
CREATE TABLE iquery.lpx_kw_tmp02_bak TABLESPACE TBS_EN_DSS_2009 AS
SELECT keywords,
REGIN_NAME,
CATEGORY_ROOT_ID,
SUM(SRH_PV_THIS_5MON) AS SRH_PV_THIS_5MON
FROM iquery.lpx_kw_tmp02
WHERE CATEGORY_ROOT_ID IS NOT NULL
GROUP BY keywords,
REGIN_NAME,
CATEGORY_ROOT_ID;
SQL> desc iquery.lpx_kw_tmp02_bak
Name Type Nullable Default Comments
---------------- ------------- -------- ------- --------
KEYWORDS VARCHAR2(256) Y
REGIN_NAME VARCHAR2(128) Y
CATEGORY_ROOT_ID NUMBER Y
SRH_PV_THIS_5MON NUMBER Y
SQL> select count(1) from iquery.lpx_kw_tmp02_bak;
COUNT(1)
----------
7799710
--模糊匹配'mechanical'
SQL> SELECT a.regin_name
2 ,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
3 FROM iquery.lpx_kw_tmp02_bak a
4 WHERE keywords LIKE '%mechanical%'
5 AND category_root_id = 80
6 group by regin_name
7 ;
REGIN_NAME SRH_PV_THIS_5MON
-------------------------------------------------------------------------------- ----------------
Western Europe 3
North America 10
Middle East 1
OTHER 1
Asia 21
--耗时:2.828S
SQL> SELECT /*+parallel(a 4) */a.regin_name
2 ,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
3 FROM iquery.lpx_kw_tmp02_bak a
4 WHERE keywords LIKE '%mechanical%'
5 AND category_root_id = 80
6 group by regin_name
7 ;
REGIN_NAME SRH_PV_THIS_5MON
-------------------------------------------------------------------------------- ----------------
North America 10
OTHER 1
Western Europe 3
Middle East 1
Asia 21
--耗时1.11S
SQL> EXPLAIN PLAN FOR SELECT a.regin_name
2 ,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
3 FROM iquery.lpx_kw_tmp02_bak a
4 WHERE keywords LIKE '%mechanical%'
5 AND category_root_id = 80
6 group by regin_name
7 ;
Explained
SQL> select * from table(dbms_xplan.display());
PLAN_TABLE_OUTPUT
--------------------------------------------------------------------------------
Plan hash value: 3331152321
--------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Tim
--------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 5390 | 1168K| 60651 (2)| 00:
| 1 | HASH GROUP BY | | 5390 | 1168K| 60651 (2)| 00:
|* 2 | TABLE ACCESS FULL| LPX_KW_TMP02_BAK | 5390 | 1168K| 60649 (2)| 00:
--------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("CATEGORY_ROOT_ID"=80 AND "KEYWORDS" LIKE '%mechanical%')
Note
-----
- 'PLAN_TABLE' is old version
- dynamic sampling used for this statement
19 rows selected
--创建全文索引:
1、授权
以sys登录
SQL> grant ctxapp to etl;
Grant succeeded
以ctxsys登录
grant execute on ctx_ddl to etl;
2、建立索引
以etl连接
SQL> exec ctx_ddl.create_preference('etl_kw1','BASIC_LEXER');
PL/SQL procedure successfully completed
--其中:第一个参数可以随便命名,第二个参数必须是oracle规定的几个值之一。
--第二个参数可以是CHINESE_LEXER、chinese_vgram_lexer;CHINESE_LEXER建索引时间长、但查询速度、查询准确度都比chinese_vgram_lexer高
--建立后可以用如下方式查询:
SQL> SELECT * FROM CTX_PREFERENCES;
PRE_OWNER PRE_NAME PRE_CLASS PRE_OBJECT
------------------------------ ------------------------------ ------------------------------ ------------------------------
CTXSYS URL_DATASTORE DATASTORE URL_DATASTORE
CTXSYS FILE_DATASTORE DATASTORE FILE_DATASTORE
WKSYS WK_DATASTORE DATASTORE FILE_DATASTORE
CTXSYS DEFAULT_DATASTORE DATASTORE DIRECT_DATASTORE
CTXSYS DIRECT_DATASTORE DATASTORE DIRECT_DATASTORE
CTXSYS AUTO_FILTER FILTER AUTO_FILTER
CTXSYS MAIL_FILTER FILTER MAIL_FILTER
CTXSYS INSO_FILTER FILTER INSO_FILTER
CTXSYS NULL_FILTER FILTER NULL_FILTER
WKSYS WK_FILTER FILTER NULL_FILTER
CTXSYS DEFAULT_EXTRACT_LEXER LEXER AUTO_LEXER
WKSYS WK_KOREAN_LEXER LEXER KOREAN_MORPH_LEXER
WKSYS WK_LEXER LEXER MULTI_LEXER
ETL ETL_LEXER LEXER CHINESE_LEXER
ETL ETL_LPXUAN LEXER CHINESE_LEXER
ETL ETL_KW LEXER CHINESE_LEXER
CTXSYS MY_LEXER1 LEXER CHINESE_VGRAM_LEXER
WKSYS WK_CHINESE_LEXER LEXER CHINESE_VGRAM_LEXER
WKSYS WK_JAPANESE_LEXER LEXER JAPANESE_VGRAM_LEXER
CTXSYS MY_LEXER LEXER BASIC_LEXER
PRE_OWNER PRE_NAME PRE_CLASS PRE_OBJECT
------------------------------ ------------------------------ ------------------------------ ------------------------------
CTXSYS DEFAULT_LEXER LEXER BASIC_LEXER
CTXSYS BASIC_LEXER LEXER BASIC_LEXER
WKSYS WK_BASIC_LEXER LEXER BASIC_LEXER
CTXSYS DEFAULT_WORDLIST WORDLIST BASIC_WORDLIST
CTXSYS BASIC_WORDLIST WORDLIST BASIC_WORDLIST
WKSYS WK_WORDLIST WORDLIST BASIC_WORDLIST
CTXSYS MYSTORE STORAGE BASIC_STORAGE
CTXSYS DEFAULT_STORAGE STORAGE BASIC_STORAGE
CTXSYS MYSTORE1 STORAGE BASIC_STORAGE
WKSYS WK_STORAGE STORAGE BASIC_STORAGE
CTXSYS ENTITY_STORAGE_DR STORAGE ENTITY_STORAGE
CTXSYS ENTITY_STORAGE_D STORAGE ENTITY_STORAGE
CTXSYS ENTITY_STORAGE_R STORAGE ENTITY_STORAGE
CTXSYS ENTITY_STORAGE STORAGE ENTITY_STORAGE
CTXSYS DEFAULT_CLASSIFIER CLASSIFIER RULE_CLASSIFIER
CTXSYS DEFAULT_CLUSTERING CLUSTERING KMEAN_CLUSTERING
36 rows selected
SQL> create index idx_item_kw ON iquery.lpx_kw_tmp02_bak(keywords) indextype is ctxsys.context parameters('lexer etl_kw1');
Index created
--parameters括号中都第一个值lexer是固定的,第二个值是上一步建立的preference名称
--耗时:5056S
3、定期同步索引(同步索引为增量同步)
--context这种类型的索引需要手工同步
--同步:根据新增记录的文本内容更新全文搜索的索引。
SQL> create or replace procedure rel_kw_sync_index as
begin
ctx_ddl.sync_index('idx_item_rel_kw');
end;
/
Procedure created.
Elapsed: 00:00:00.08
SQL> VARIABLE jobno number;
SQL> BEGIN
2 DBMS_JOB.SUBMIT(:jobno,'rel_kw_sync_index();',
3 SYSDATE, 'SYSDATE + (1/24/4)');
4 commit;
5 END;
6 /
PL/SQL procedure successfully completed.
Elapsed: 00:00:00.27
4、定期优化索引
--优化:根据被删除记录清除全文搜索索引中的垃圾
SQL> create or replace procedure rel_kw_optimize_index as
2 begin
3 ctx_ddl.optimize_index('rel_kw_sync_index','FULL');
4 end;
5 /
SQL> VARIABLE jobno number;
SQL> BEGIN
2 DBMS_JOB.SUBMIT(:jobno,'rel_kw_optimize_index();',
3 SYSDATE, 'SYSDATE + 1');
4 commit;
5 END;
6 /
Procedure created.
全文检索的缺点是不能实时保证查询的正确性,所以要合理地定期同步索引和优化索引。
--造测试数据
DROP TABLE iquery.lpx_kw_test;
CREATE TABLE iquery.lpx_kw_test TABLESPACE TBS_EN_DSS_2009
AS
SELECT SUBSTR(keywords, INSTR(keywords, ' ', 1)+1, INSTR(keywords, ' ', 1, 2) - INSTR(keywords, ' ', 1) - 1) AS kw, a.*
FROM iquery.lpx_kw_tmp02_bak1 a
WHERE length(keywords) - length(REPLACE(keywords, ' ', '')) > 2 AND ROWNUM < 1000 + 1;
--根据行业和大洲进行匹配
DROP TABLE iquery.lpx_kw_tmp03;
CREATE TABLE iquery.lpx_kw_tmp03(
KW VARCHAR2(256)
,REGIN_NAME VARCHAR2(128)
,CATEGORY_ROOT_ID NUMBER
,SRH_PV_THIS_5MON NUMBER
);
set serveroutput ON;
DECLARE
cnt NUMBER;
start_date DATE;
end_date DATE;
CURSOR c1
IS
select kw, keywords, regin_name, category_root_id FROM iquery.lpx_kw_test;
BEGIN
cnt := 0;
start_date := SYSDATE;
FOR ref_cursor IN c1
LOOP
INSERT INTO iquery.lpx_kw_tmp03
SELECT ref_cursor.kw
,ref_cursor.regin_name
,ref_cursor.category_root_id
,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
FROM iquery.lpx_kw_tmp02_bak a --全文索引
WHERE contains(a.keywords, ref_cursor.kw) > 0
AND a.category_root_id = ref_cursor.category_root_id
AND a.regin_name = ref_cursor.regin_name
GROUP BY ref_cursor.kw
,ref_cursor.regin_name
,ref_cursor.category_root_id;
COMMIT;
cnt := cnt + 1;
END LOOP;
end_date := SYSDATE;
DBMS_OUTPUT.PUT_LINE('total records:'||cnt);
DBMS_OUTPUT.PUT_LINE('total time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000));
DBMS_OUTPUT.PUT_LINE('avg time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000)/cnt);
END;
/
total records:100
total time(ms):34000
avg time(ms):340
PL/SQL procedure successfully completed.