Start LanceDB Server¶
LanceDB is an embedded vector database for AI applications. It is open source and distributed with an Apache-2.0 license.
LanceDB datasets are persisted to disk and can be shared in Python.
Setup¶
npm install -S vectordb
Usage¶
Create a new index from texts¶
import os
import tempfile
from langchain.vectorstores import LanceDB
from langchain.embeddings.openai import OpenAIEmbeddings
from vectordb import connect
async def run():
dir = tempfile.mkdtemp(prefix="lancedb-")
db = await connect(dir)
table = await db.create_table("vectors", [{"vector": [0] * 1536, "text": "sample", "id": 1}])
vector_store = await LanceDB.from_texts(
["Hello world", "Bye bye", "hello nice world"],
[{"id": 2}, {"id": 1}, {"id": 3}],
OpenAIEmbeddings(),
table=table,
)
result_one = await vector_store.similarity_search("hello world", 1)
print(result_one)
# [ Document(page_content='hello nice world', metadata={'id': 3}) ]
# Run the function
import asyncio
asyncio.run(run())
API Reference:
LanceDB
from@langchain/community/vectorstores/lancedb
OpenAIEmbeddings
from@langchain/openai
Create a new index from a loader¶
import os
import tempfile
from langchain.vectorstores import LanceDB
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders.fs import TextLoader
from vectordb import connect
# Create docs with a loader
loader = TextLoader("src/document_loaders/example_data/example.txt")
docs = loader.load()
async def run():
dir = tempfile.mkdtemp(prefix="lancedb-")
db = await connect(dir)
table = await db.create_table("vectors", [{"vector": [0] * 1536, "text": "sample", "source": "a"}])
vector_store = await LanceDB.from_documents(docs, OpenAIEmbeddings(), table=table)
result_one = await vector_store.similarity_search("hello world", 1)
print(result_one)
# [
# Document(page_content='Foo\nBar\nBaz\n\n', metadata={'source': 'src/document_loaders/example_data/example.txt'})
# ]
# Run the function
import asyncio
asyncio.run(run())
API Reference:
LanceDB
from@langchain/community/vectorstores/lancedb
OpenAIEmbeddings
from@langchain/openai
TextLoader
fromlangchain/document_loaders/fs/text
Open an existing dataset¶
import os
import tempfile
from langchain.vectorstores import LanceDB
from langchain.embeddings.openai import OpenAIEmbeddings
from vectordb import connect
async def run():
uri = await create_test_db()
db = await connect(uri)
table = await db.open_table("vectors")
vector_store = LanceDB(OpenAIEmbeddings(), table=table)
result_one = await vector_store.similarity_search("hello world", 1)
print(result_one)
# [ Document(page_content='Hello world', metadata={'id': 1}) ]
async def create_test_db():
dir = tempfile.mkdtemp(prefix="lancedb-")
db = await connect(dir)
await db.create_table(
"vectors",
[
{"vector": [0] * 1536, "text": "Hello world", "id": 1},
{"vector": [0] * 1536, "text": "Bye bye", "id": 2},
{"vector": [0] * 1536, "text": "hello nice world", "id": 3},
],
)
return dir
# Run the function
import asyncio
asyncio.run(run())
API Reference:
LanceDB
from@langchain/community/vectorstores/lancedb
OpenAIEmbeddings
from@langchain/openai