Vector database sharding
Multimodal search
JaguarDB quantization
JaguarDB Vector API
Best Vector databases
JaguarDB in Docker
Setup JaguarDB with tar package
Setup JaguarDB on multiple nodes
Vector index sharing
How zeromove works
Video introduction
|
|
Example: RAG with local LLM
This example demonstrates how you can deploy a smaller LLM locally and develop RAG applications. This approach provides a cost-effective and practical RAG system which is data-centric but still requires solid inference. For more details, users are recommended to visit https://githhub.com/fserv/jaguar-sdk and the directory DeepSeek.
The following Python example shows loading (adding) text to vecstore store, and answering questions by the local LLM. You will need "pip install -U langchain" and other necessary packages.
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores.jaguar import Jaguar
from langchain_huggingface import HuggingFaceEmbeddings
import re, sys, requests
### Find relevant text from vector store and send to LLM
def ask(vectorstore, ollamaurl, context, prompt):
print(f"Question: {prompt}")
similar_docs = vectorstore.similarity_search(query=prompt, k=3 )
raginfo = " ".join(doc.page_content for doc in similar_docs)
if "-a" in sys.argv:
i = 1
for doc in similar_docs:
print(f"Augment {i}: {doc.page_content}")
i += 1
payload = {
"model": "deepseek-r1:1.5b",
"prompt": raginfo + ' ' + context + ' ' + prompt,
"stream": False
}
# Send the request
response = requests.post(ollamaurl, json=payload)
# Print response
if response.status_code == 200:
print("DeepSeek-R1 Response:")
reply = response.json()['response']
### remove think data:
reply = re.sub(r".*?", "", reply, flags=re.DOTALL)
print(reply)
else:
print(f"Error {response.status_code}: {response.text}")
print("
")
def main():
######### prepare to input text file and split it into chunks ########
loader = TextLoader("./milk_price.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter( separator='', chunk_size=200, chunk_overlap=80)
docs = text_splitter.split_documents(documents)
########## login to jaguardb with a API key #####################
url='http://localhost:8080/fwww/'
jaguar_api_key='demouser'
embeddings = HuggingFaceEmbeddings( model_name="BAAI/bge-large-zh")
pod = "vdb"
store = "fish_store"
vector_index = "v"
vector_type = "cosine_fraction_float"
vector_dimension = 1024
vectorstore = Jaguar(pod, store, vector_index,
vector_type, vector_dimension, url, embeddings
)
vectorstore.login(jaguar_api_key)
### create vector on the database This should be called only once ###
metadata = "category char(16)"
text_size = 300
vectorstore.create(metadata, text_size)
### ad the chunks (docs) into jaguar vector store ###
vectorstore.add_documents(docs)
### Ollama's local API endpoint
OLLAMA_URL = "http://localhost:11434/api/generate"
### instruction to LLM
context = """
You are a helpful assistant. The previous information is important.
Now answer the following question, without hallucination, giving answer to the specific question:
"""
### Your prompt/question
prompt = "进口奶粉的价格是什么"
ask(vectorstore, OLLAMA_URL, context, prompt)
### ask another question
prompt = "国产奶粉的价格是什么"
ask(vectorstore, OLLAMA_URL, context, prompt)
if "-d" in sys.argv:
vectorstore.drop()
vectorstore.logout()
if __name__ == "__main__":
main()
The program should output answers to the questions in the script.
|