Skip to content

Commit

Permalink
feat: add logic duckdb (#407)
Browse files Browse the repository at this point in the history
Co-authored-by: Konstantin S. <[email protected]>
  • Loading branch information
danijerez and HavenDV authored Jul 31, 2024
1 parent 07558f1 commit 384a807
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 4 deletions.
65 changes: 65 additions & 0 deletions src/Databases/DuckDb/src/DuckDbVectorCollection.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
using Microsoft.SemanticKernel.Connectors.DuckDB;
using Microsoft.SemanticKernel.Memory;

namespace LangChain.Databases.DuckDb
{
public class DuckDbVectorCollection(
DuckDBMemoryStore store,
string name = VectorCollection.DefaultName,
string? id = null) : VectorCollection(name, id), IVectorCollection
{
public async Task<IReadOnlyCollection<string>> AddAsync(IReadOnlyCollection<Vector> items, CancellationToken cancellationToken = default)
{
items = items ?? throw new ArgumentNullException(nameof(items));

List<string> list = [];
foreach (var item in items)
{
string? metadata = null;
//TODO: review way to map metadata
if (item.Metadata != null)
metadata = string.Join("#", item.Metadata.Select(kv => kv.Key + "&" + kv.Value));
var record = MemoryRecord.LocalRecord(item.Id, item.Text, null, item.Embedding, metadata);
var insert = await store.UpsertAsync(Name, record, cancellationToken).ConfigureAwait(false);
list.Add(insert);
}
return list;

}

public async Task<bool> DeleteAsync(IEnumerable<string> ids, CancellationToken cancellationToken = default)
{
await store.RemoveBatchAsync(Name, ids, cancellationToken).ConfigureAwait(false);
return true;
}

public async Task<Vector?> GetAsync(string id, CancellationToken cancellationToken = default)
{
var record = await store.GetAsync(Name, id, cancellationToken: cancellationToken).ConfigureAwait(false);

Dictionary<string, object>? metadata = null;
if(record?.Metadata?.AdditionalMetadata!=null)
metadata = record.Metadata.AdditionalMetadata
.Split('#')
.Select(part => part.Split('&'))
.ToDictionary(split => split[0], split => (object)split[1]);

return record != null ? new Vector { Id = id, Text = record.Metadata.Text, Metadata = metadata } : null;
}

public async Task<bool> IsEmptyAsync(CancellationToken cancellationToken = default)
{
var collections = store.GetCollectionsAsync(cancellationToken);
return !(await collections.CountAsync(cancellationToken).ConfigureAwait(false) > 0);
}

public async Task<VectorSearchResponse> SearchAsync(VectorSearchRequest request, VectorSearchSettings? settings = null, CancellationToken cancellationToken = default)
{
request = request ?? throw new ArgumentNullException(nameof(request));
settings ??= new VectorSearchSettings();
var results = await store.GetNearestMatchesAsync(Name, request.Embeddings.First(), limit: settings.NumberOfResults, cancellationToken: cancellationToken)
.ToListAsync(cancellationToken).ConfigureAwait(false);
return new VectorSearchResponse { Items = results.Select(x => new Vector { Text = x.Item1.Metadata.Text }).ToList() };
}
}
}
43 changes: 43 additions & 0 deletions src/Databases/DuckDb/src/DuckDbVectorDatabase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
using Microsoft.SemanticKernel.Connectors.DuckDB;

namespace LangChain.Databases.DuckDb
{
public class DuckDbVectorDatabase(DuckDBMemoryStore store) : IVectorDatabase
{
public async Task CreateCollectionAsync(string collectionName, int dimensions, CancellationToken cancellationToken = default)
{
await store.CreateCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
}

public async Task DeleteCollectionAsync(string collectionName, CancellationToken cancellationToken = default)
{
await store.DeleteCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
}

public async Task<IVectorCollection> GetCollectionAsync(string collectionName, CancellationToken cancellationToken = default)
{
var collections = await ListCollectionsAsync(cancellationToken).ConfigureAwait(false);
var collection = collections.FirstOrDefault(x => x == collectionName);
return collection != null ? new DuckDbVectorCollection(store, collection)
: throw new InvalidOperationException("Collection not found");
}

public async Task<IVectorCollection> GetOrCreateCollectionAsync(string collectionName, int dimensions, CancellationToken cancellationToken = default)
{
if(!await IsCollectionExistsAsync(collectionName, cancellationToken).ConfigureAwait(false))
await store.CreateCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
return new DuckDbVectorCollection(store, collectionName);
}

public async Task<bool> IsCollectionExistsAsync(string collectionName, CancellationToken cancellationToken = default)
{
return await store.DoesCollectionExistAsync(collectionName, cancellationToken).ConfigureAwait(false);
}

public async Task<IReadOnlyList<string>> ListCollectionsAsync(CancellationToken cancellationToken = default)
{
var collections = store.GetCollectionsAsync(cancellationToken);
return await collections.ToListAsync(cancellationToken).ConfigureAwait(false);
}
}
}
8 changes: 6 additions & 2 deletions src/Databases/DuckDb/src/LangChain.Databases.DuckDb.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>net4.6.2;netstandard2.0;net6.0;net8.0</TargetFrameworks>
<TargetFrameworks>netstandard2.0;net8.0</TargetFrameworks>
<NoWarn>$(NoWarn);SKEXP0020;SKEXP0001;CS3001</NoWarn>
</PropertyGroup>

<PropertyGroup Label="NuGet">
Expand All @@ -11,6 +12,9 @@

<ItemGroup>
<PackageReference Include="Microsoft.SemanticKernel.Connectors.DuckDB" />
<PackageReference Include="System.Linq.Async" />
<ProjectReference Include="..\..\..\Utilities\Pollyfils\src\LangChain.Polyfills.csproj" />
<ProjectReference Include="..\..\Abstractions\src\LangChain.Databases.Abstractions.csproj" />
</ItemGroup>

</Project>
9 changes: 9 additions & 0 deletions src/Databases/IntegrationTests/DatabaseTests.Configure.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
using LangChain.Databases.Postgres;
using LangChain.Databases.Sqlite;
using LangChain.Databases.Mongo;
using LangChain.Databases.DuckDb;
using Testcontainers.MongoDb;
using Testcontainers.PostgreSql;
using Microsoft.SemanticKernel.Connectors.DuckDB;

namespace LangChain.Databases.IntegrationTests;

Expand Down Expand Up @@ -121,6 +123,13 @@ private static async Task<DatabaseTestEnvironment> StartEnvironmentForAsync(Supp
Container = container,
};
}
case SupportedDatabase.DuckDb:
var store = await DuckDBMemoryStore.ConnectAsync("duckdb_test.db", cancellationToken);
return new DatabaseTestEnvironment
{
VectorDatabase = new DuckDbVectorDatabase(store)
};

default:
throw new ArgumentOutOfRangeException(nameof(database), database, null);
}
Expand Down
7 changes: 7 additions & 0 deletions src/Databases/IntegrationTests/DatabaseTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public partial class DatabaseTests
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.DuckDb)]
public async Task CreateAndDeleteCollection_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -58,6 +59,7 @@ await vectorDatabase.Invoking(y => y.GetCollectionAsync(environment.CollectionNa
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.DuckDb)]
public async Task AddDocuments_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -103,6 +105,7 @@ public async Task AddDocuments_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.DuckDb)]
public async Task AddTexts_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -152,6 +155,7 @@ public async Task AddTexts_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.DuckDb)]
public async Task DeleteDocuments_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -189,6 +193,7 @@ public async Task DeleteDocuments_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.DuckDb)]
public async Task SimilaritySearch_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -220,6 +225,7 @@ public async Task SimilaritySearch_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.DuckDb)]
public async Task SimilaritySearchByVector_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -247,6 +253,7 @@ public async Task SimilaritySearchByVector_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.DuckDb)]
public async Task SimilaritySearchWithScores_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<NoWarn>$(NoWarn);NETSDK1206</NoWarn>
<NoWarn>$(NoWarn);NETSDK1206;SKEXP0020</NoWarn>
</PropertyGroup>

<ItemGroup>
Expand Down
1 change: 1 addition & 0 deletions src/Databases/IntegrationTests/SupportedDatabase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ public enum SupportedDatabase
Postgres,
Redis,
Mongo,
DuckDb
}
3 changes: 2 additions & 1 deletion src/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
<PackageVersion Include="Microsoft.SemanticKernel.Connectors.OpenAI" Version="1.10.0" />
<PackageVersion Include="Microsoft.SemanticKernel.Connectors.AzureAISearch" Version="1.15.1-alpha" />
<PackageVersion Include="Microsoft.SemanticKernel.Connectors.Chroma" Version="1.15.1-alpha" />
<PackageVersion Include="Microsoft.SemanticKernel.Connectors.DuckDB" Version="1.15.1-alpha" />
<PackageVersion Include="Microsoft.SemanticKernel.Connectors.DuckDB" Version="1.16.1-alpha" />
<PackageVersion Include="Microsoft.SemanticKernel.Connectors.Pinecone" Version="1.15.1-alpha" />
<PackageVersion Include="Microsoft.SemanticKernel.Connectors.Postgres" Version="1.10.0-alpha" />
<PackageVersion Include="Microsoft.SemanticKernel.Connectors.Qdrant" Version="1.15.1-alpha" />
Expand All @@ -73,6 +73,7 @@
<PackageVersion Include="System.CommandLine" Version="2.0.0-beta4.22272.1" />
<PackageVersion Include="System.CommandLine.NamingConventionBinder" Version="2.0.0-beta4.22272.1" />
<PackageVersion Include="System.ComponentModel.Annotations" Version="5.0.0" />
<PackageVersion Include="System.Linq.Async" Version="6.0.1" />
<PackageVersion Include="System.Memory.Data" Version="8.0.0" />
<PackageVersion Include="System.Net.Http" Version="4.3.4" />
<PackageVersion Include="System.Net.Http.Json" Version="8.0.0" />
Expand Down

0 comments on commit 384a807

Please sign in to comment.