Milvus向量检索Demo初探

时间:2025-03-03 15:51:54
from pymilvus import connections # 1 创建连接 connections.connect( alias="default", host='localhost', port='19530' ) # 2 创建 collection,相当于一个表,类似 mongo 中 collection 概念 from pymilvus import CollectionSchema, FieldSchema, DataType book_id = FieldSchema( name="book_id", dtype=DataType.INT64, is_primary=True, ) word_count = FieldSchema( name="word_count", dtype=DataType.INT64, ) book_intro = FieldSchema( name="book_intro", dtype=DataType.FLOAT_VECTOR, dim=2 ) schema = CollectionSchema( fields=[book_id, word_count, book_intro], description="Test book search" ) collection_name = "book" #类似于表名 from pymilvus import Collection collection = Collection( name=collection_name, schema=schema, using='default', # milvus server name shards_num=2, consistency_level="Strong" ) # 3 为 collections 创建分区,默认为 collection创建两个分区 collection = Collection("book") # Get an existing collection. collection.create_partition("novel") # 4 插入数据到 collection import random # data[0] book_id, data[1] word_count, data[2]向量,总共 2000个 item data = [[i for i in range(2000)], [i for i in range(10000, 12000)], [[random.random() for _ in range(2)] for _ in range(2000)], ] mr = collection.insert(data) # 5 创建索引 # 索引参数 index_params = { "metric_type":"L2", "index_type":"IVF_FLAT", #加聚类的倒排索引 "params":{"nlist":1024} # 聚成 1024 个类 } # 向 collections 中插入索引 collection.create_index( field_name="book_intro", # collection中的存储向量的字段 index_params=index_params ) # 6 向量搜索,进行搜索之前要先将 collection加载到内存 collection = Collection("book") # Get an existing collection. collection.load() import time # 准备搜索参数 search_params = {"metric_type": "L2", "params": {"nprobe": 100}} # 搜索最相似的 100 个item start = time.time() results = collection.search( data=[[0.1, 0.2]], # query 向量,这个列子中向量维度是2 anns_field="book_intro", param=search_params, limit=10, # 返回 topK expr=None, consistency_level="Strong" ) end = time.time() # 搜索结果 print(results[0].ids, results[0].distances, end - start) ''' ([1073, 448, 489, 23, 752, 1496, 10, 1965, 1534, 107], [3.3897744287969545e-05, 0.000469639606308192, 0.0006169890984892845, 0.0006427020998671651, 0.000682430574670434, 0.000686873565427959, 0.0007882573409006, 0.0013403998455032706, 0.0016471443232148886, 0.0016562778037041426], 0.2413792610168457) ''' # 7 混合向量检索,在向量检索基础上,支持标量的指定范围查询 search_param = { "data": [[0.1, 0.2]], "anns_field": "book_intro", "param": {"metric_type": "L2", "params": {"nprobe": 10}}, "limit": 2, "expr": "word_count <= 11000", # 指定查询表达式 } res = collection.search(**search_param) print(res[0]) ''' ['(distance: 0.000469639606308192, id: 448)', '(distance: 0.0006169890984892845, id: 489)'] '''