Start LanceDB Server

LanceDB is an embedded vector database for AI applications. It is open source and distributed with an Apache-2.0 license.

LanceDB datasets are persisted to disk and can be shared in Python.

Setup

npm install -S vectordb

Usage

Create a new index from texts

import os
import tempfile
from langchain.vectorstores import LanceDB
from langchain.embeddings.openai import OpenAIEmbeddings
from vectordb import connect


async def run():
    dir = tempfile.mkdtemp(prefix="lancedb-")
    db = await connect(dir)
    table = await db.create_table("vectors", [{"vector": [0] * 1536, "text": "sample", "id": 1}])

    vector_store = await LanceDB.from_texts(
        ["Hello world", "Bye bye", "hello nice world"],
        [{"id": 2}, {"id": 1}, {"id": 3}],
        OpenAIEmbeddings(),
        table=table,
    )

    result_one = await vector_store.similarity_search("hello world", 1)
    print(result_one)
    # [ Document(page_content='hello nice world', metadata={'id': 3}) ]


# Run the function
import asyncio

asyncio.run(run())

API Reference:

  • LanceDB from @langchain/community/vectorstores/lancedb

  • OpenAIEmbeddings from @langchain/openai

Create a new index from a loader

import os
import tempfile
from langchain.vectorstores import LanceDB
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders.fs import TextLoader
from vectordb import connect

# Create docs with a loader
loader = TextLoader("src/document_loaders/example_data/example.txt")
docs = loader.load()


async def run():
    dir = tempfile.mkdtemp(prefix="lancedb-")
    db = await connect(dir)
    table = await db.create_table("vectors", [{"vector": [0] * 1536, "text": "sample", "source": "a"}])

    vector_store = await LanceDB.from_documents(docs, OpenAIEmbeddings(), table=table)

    result_one = await vector_store.similarity_search("hello world", 1)
    print(result_one)
    # [
    #   Document(page_content='Foo\nBar\nBaz\n\n', metadata={'source': 'src/document_loaders/example_data/example.txt'})
    # ]


# Run the function
import asyncio

asyncio.run(run())

API Reference:

  • LanceDB from @langchain/community/vectorstores/lancedb

  • OpenAIEmbeddings from @langchain/openai

  • TextLoader from langchain/document_loaders/fs/text

Open an existing dataset

import os
import tempfile
from langchain.vectorstores import LanceDB
from langchain.embeddings.openai import OpenAIEmbeddings
from vectordb import connect


async def run():
    uri = await create_test_db()
    db = await connect(uri)
    table = await db.open_table("vectors")

    vector_store = LanceDB(OpenAIEmbeddings(), table=table)

    result_one = await vector_store.similarity_search("hello world", 1)
    print(result_one)
    # [ Document(page_content='Hello world', metadata={'id': 1}) ]


async def create_test_db():
    dir = tempfile.mkdtemp(prefix="lancedb-")
    db = await connect(dir)
    await db.create_table(
        "vectors",
        [
            {"vector": [0] * 1536, "text": "Hello world", "id": 1},
            {"vector": [0] * 1536, "text": "Bye bye", "id": 2},
            {"vector": [0] * 1536, "text": "hello nice world", "id": 3},
        ],
    )
    return dir


# Run the function
import asyncio

asyncio.run(run())

API Reference:

  • LanceDB from @langchain/community/vectorstores/lancedb

  • OpenAIEmbeddings from @langchain/openai