//创建一个Redis连接 static ConnectionMultiplexer mux = ConnectionMultiplexer.Connect("localhost"); //获取一个Redis数据库 static IDatabase db = mux.GetDatabase(); //创建一个RediSearch客户端 static SearchCommands ft = new SearchCommands(db, null);在进行向量搜索之前,首先需要定义并创建索引,并指定相似性算法。
public static async Task CreateIndexAsync() { await ft.CreateAsync(indexName, new FTCreateParams() .On(IndexDataType.HASH) .Prefix(prefix), new Schema() .AddTagField("tag") .AddTextField("content") .AddVectorField("vector", VectorField.VectorAlgo.HNSW, new Dictionary<string, object>() { ["TYPE"] = "FLOAT32", ["DIM"] = 2, ["DISTANCE_METRIC"] = "COSINE" })); }
这段代码的意思是:
1.使用了一个异步方法 ft.CreateAsync 来创建索引。它接受三个参数:索引名称 indexName,一个 FTCreateParams 对象和一个 Schema 对象;
2.FTCreateParams 类提供了一些参数选项,用于指定索引的参数。这里使用 .On(IndexDataType.HASH) 方法来指定索引数据类型为哈希,并使用 .Prefix(prefix) 方法来指定索引数据的前缀;public async Task SetAsync(string docId, string prefix, string tag, string content, float[] vector) { await db.HashSetAsync($"{prefix}{docId}", new HashEntry[] { new HashEntry ("tag", tag), new HashEntry ("content", content), new HashEntry ("vector", vector.SelectMany(BitConverter.GetBytes).ToArray()) }); }SetAsync方法用于将一个具有指定文档ID、前缀、标签、内容及内容的向量存储到索引库中。并使用SelectMany()方法和BitConverter.GetBytes()方法将向量转换为一个字节数组。
public async IAsyncEnumerable<(string Content, double Score)> SearchAsync(float[] vector, int limit) { var query = new Query($"*=>[KNN {limit} @vector $vector AS score]") .AddParam("vector", vector.SelectMany(BitConverter.GetBytes).ToArray()) .SetSortBy("score") .ReturnFields("content", "score") .Limit(0, limit) .Dialect(2); var result = await ft.SearchAsync(indexName, query).ConfigureAwait(false); foreach (var document in result.Documents) { yield return (document["content"],Convert.ToDouble(document["score"])); } }这段代码的意思是:
public static async IAsyncEnumerable<(string Tag, string Content, double Score)> SearchAsync(string tag, float[] vector, int limit) { var query = new Query($"(@tag:{tag})=>[KNN {limit} @vector $vector AS score]") .AddParam("vector", vector.SelectMany(BitConverter.GetBytes).ToArray()) .SetSortBy("score") .ReturnFields("tag", "content", "score") .Limit(0, limit) .Dialect(2); var result = await ft.SearchAsync(indexName, query).ConfigureAwait(false); foreach (var document in result.Documents) { yield return (document["tag"], document["content"], Convert.ToDouble(document["score"])); } }这段代码使用了KNN和Range混合查询,与上一段代码相比,新增了@tag参数,将限制结果仅包含给定标签的内容。这样做可以增加查询的准确性,提高查询效率。
public async Task DeleteAsync(string docId, string prefix) { await db.KeyDeleteAsync($"{prefix}{docId}"); }这个方法通过删除与指定向量相关联的哈希缓存键,来实现从索引库中删除指定向量数据。
public async Task DropIndexAsync() { await ft.DropIndexAsync(indexName, true); }这个方法 await ft.DropIndexAsync接受两个参数: indexName 和 true 。indexName 表示索引库的名称, true 表示在删除索引时是否删除索引文件。
public async Task<InfoResult> InfoAsync() { return await ft.InfoAsync(indexName); }通过 await ft.InfoAsync(indexName) 方法,我们可以获取到指定索引库的大小,文档数量等相关索引库信息。
using NRedisStack; using NRedisStack.Search; using NRedisStack.Search.DataTypes; using NRedisStack.Search.Literals.Enums; using StackExchange.Redis; using static NRedisStack.Search.Schema; namespace RedisVectorExample { class Program { // 堆代码 duidaima.com //创建一个Redis连接 static ConnectionMultiplexer mux = ConnectionMultiplexer.Connect("localhost"); //获取一个Redis数据库 static IDatabase db = mux.GetDatabase(); //创建一个RediSearch客户端 static SearchCommands ft = new SearchCommands(db, null); //索引名称 static string indexName = "test:index"; //索引前缀 static string prefix = "test:data"; static async Task Main(string[] args) { //创建一个向量的索引 await CreateIndexAsync(); //添加一些向量到索引中 await SetAsync("1", "A", "测试数据A1", new float[] { 0.1f, 0.2f }); await SetAsync("2", "A", "测试数据A2", new float[] { 0.3f, 0.4f }); await SetAsync("3", "B", "测试数据B1", new float[] { 0.5f, 0.6f }); await SetAsync("4", "C", "测试数据C1", new float[] { 0.7f, 0.8f }); //删除一个向量 await DeleteAsync("4"); //KUN搜索 await foreach (var (Content, Score) in SearchAsync(new float[] { 0.1f, 0.2f }, 2)) { Console.WriteLine($"内容:{Content},相似度得分:{Score}"); } //混合 await foreach (var (Tag, Content, Score) in SearchAsync("A", new float[] { 0.1f, 0.2f }, 2)) { Console.WriteLine($"标签:{Tag},内容:{Content},相似度得分:{Score}"); } //检查索引是否存在 var info = await InfoAsync(); if (info != null) await DropIndexAsync(); //存在则删除索引 } public static async Task CreateIndexAsync() { await ft.CreateAsync(indexName, new FTCreateParams() .On(IndexDataType.HASH) .Prefix(prefix), new Schema() .AddTagField("tag") .AddTextField("content") .AddVectorField("vector", VectorField.VectorAlgo.HNSW, new Dictionary<string, object>() { ["TYPE"] = "FLOAT32", ["DIM"] = 2, ["DISTANCE_METRIC"] = "COSINE" })); } public static async Task SetAsync(string docId, string tag, string content, float[] vector) { await db.HashSetAsync($"{prefix}{docId}", new HashEntry[] { new HashEntry ("tag", tag), new HashEntry ("content", content), new HashEntry ("vector", vector.SelectMany(BitConverter.GetBytes).ToArray()) }); } public static async Task DeleteAsync(string docId) { await db.KeyDeleteAsync($"{prefix}{docId}"); } public static async Task DropIndexAsync() { await ft.DropIndexAsync(indexName, true); } public static async Task<InfoResult> InfoAsync() { return await ft.InfoAsync(indexName); } public static async IAsyncEnumerable<(string Content, double Score)> SearchAsync(float[] vector, int limit) { var query = new Query($"*=>[KNN {limit} @vector $vector AS score]") .AddParam("vector", vector.SelectMany(BitConverter.GetBytes).ToArray()) .SetSortBy("score") .ReturnFields("content", "score") .Limit(0, limit) .Dialect(2); var result = await ft.SearchAsync(indexName, query).ConfigureAwait(false); foreach (var document in result.Documents) { yield return (document["content"], Convert.ToDouble(document["score"])); } } public static async IAsyncEnumerable<(string Tag, string Content, double Score)> SearchAsync(string tag, float[] vector, int limit) { var query = new Query($"(@tag:{tag})=>[KNN {limit} @vector $vector AS score]") .AddParam("vector", vector.SelectMany(BitConverter.GetBytes).ToArray()) .SetSortBy("score") .ReturnFields("tag", "content", "score") .Limit(0, limit) .Dialect(2); var result = await ft.SearchAsync(indexName, query).ConfigureAwait(false); foreach (var document in result.Documents) { yield return (document["tag"], document["content"], Convert.ToDouble(document["score"])); } } } }篇幅原因先到这里,下一篇我们接着探讨如何利用ChatGPT Embeddings技术提取文本向量,并基于Redis实现文本相似度匹配。相比传统方法,这种方式能够更好地保留文本的语义和情感信息,从而更准确地反映文本的实质性内容。