Milvus向量检索Demo初探
from pymilvus import connections
# 1 创建连接
connections.connect(
alias="default",
host='localhost',
port='19530'
)
# 2 创建 collection,相当于一个表,类似 mongo 中 collection 概念
from pymilvus import CollectionSchema, FieldSchema, DataType
book_id = FieldSchema(
name="book_id",
dtype=DataType.INT64,
is_primary=True,
)
word_count = FieldSchema(
name="word_count",
dtype=DataType.INT64,
)
book_intro = FieldSchema(
name="book_intro",
dtype=DataType.FLOAT_VECTOR,
dim=2
)
schema = CollectionSchema(
fields=[book_id, word_count, book_intro],
description="Test book search"
)
collection_name = "book" #类似于表名
from pymilvus import Collection
collection = Collection(
name=collection_name,
schema=schema,
using='default', # milvus server name
shards_num=2,
consistency_level="Strong"
)
# 3 为 collections 创建分区,默认为 collection创建两个分区
collection = Collection("book") # Get an existing collection.
collection.create_partition("novel")
# 4 插入数据到 collection
import random
# data[0] book_id, data[1] word_count, data[2]向量,总共 2000个 item
data = [[i for i in range(2000)],
[i for i in range(10000, 12000)],
[[random.random() for _ in range(2)] for _ in range(2000)],
]
mr = collection.insert(data)
# 5 创建索引
# 索引参数
index_params = {
"metric_type":"L2",
"index_type":"IVF_FLAT", #加聚类的倒排索引
"params":{"nlist":1024} # 聚成 1024 个类
}
# 向 collections 中插入索引
collection.create_index(
field_name="book_intro", # collection中的存储向量的字段
index_params=index_params
)
# 6 向量搜索,进行搜索之前要先将 collection加载到内存
collection = Collection("book") # Get an existing collection.
collection.load()
import time
# 准备搜索参数
search_params = {"metric_type": "L2", "params": {"nprobe": 100}} # 搜索最相似的 100 个item
start = time.time()
results = collection.search(
data=[[0.1, 0.2]], # query 向量,这个列子中向量维度是2
anns_field="book_intro",
param=search_params,
limit=10, # 返回 topK
expr=None,
consistency_level="Strong"
)
end = time.time()
# 搜索结果
print(results[0].ids, results[0].distances, end - start)
'''
([1073, 448, 489, 23, 752, 1496, 10, 1965, 1534, 107],
[3.3897744287969545e-05, 0.000469639606308192, 0.0006169890984892845, 0.0006427020998671651, 0.000682430574670434, 0.000686873565427959, 0.0007882573409006, 0.0013403998455032706, 0.0016471443232148886, 0.0016562778037041426],
0.2413792610168457)
'''
# 7 混合向量检索,在向量检索基础上,支持标量的指定范围查询
search_param = {
"data": [[0.1, 0.2]],
"anns_field": "book_intro",
"param": {"metric_type": "L2", "params": {"nprobe": 10}},
"limit": 2,
"expr": "word_count <= 11000", # 指定查询表达式
}
res = collection.search(**search_param)
print(res[0])
'''
['(distance: 0.000469639606308192, id: 448)', '(distance: 0.0006169890984892845, id: 489)']
'''