Advertisement
Guest User

Untitled

a guest
May 31st, 2024
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.15 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # pip install -U langchain langchain-community fastembed chromadb langchain-groq
  3.  
  4.  
  5. from langchain.text_splitter import RecursiveCharacterTextSplitter
  6. from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
  7. from langchain_community.vectorstores import Chroma
  8. from langchain.prompts import PromptTemplate
  9. from langchain.chains import RetrievalQA
  10. from langchain_groq import ChatGroq
  11. from langchain_community.document_loaders import TextLoader
  12.  
  13.  
  14. import cfg
  15.  
  16.  
  17. # Create vector database
  18. def create_vector_database():
  19.  
  20.     loader = TextLoader('2.txt', encoding='utf-8')
  21.     documents = loader.load()
  22.  
  23.     text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
  24.     docs = text_splitter.split_documents(documents)
  25.  
  26.     #len(docs)
  27.     print(f"length of documents loaded: {len(documents)}")
  28.     print(f"total number of document chunks generated :{len(docs)}")
  29.     #docs[0]
  30.  
  31.     # Initialize Embeddings
  32.     embed_model = FastEmbedEmbeddings(model_name="BAAI/bge-base-en-v1.5")
  33.  
  34.     # Create and persist a Chroma vector database from the chunked documents
  35.     # vs = Chroma.from_documents(
  36.     #     documents=docs,
  37.     #     embedding=embed_model,
  38.     #     persist_directory="chroma_db_llamaparse1",
  39.     #     collection_name="rag"
  40.     # )
  41.    
  42.     # load saved vector database from disk
  43.     vs = Chroma(persist_directory="chroma_db_llamaparse1",
  44.                 embedding_function=embed_model,
  45.                 collection_name="rag")
  46.  
  47.     print('Vector DB created successfully !')
  48.  
  49.     return vs,embed_model
  50.  
  51.  
  52. def set_custom_prompt():
  53.     """
  54.    Prompt template for QA retrieval for each vectorstore
  55.    """
  56.  
  57.     custom_prompt_template = """Use the following pieces of information to answer the user's question.
  58. If you don't know the answer, just say that you don't know, don't try to make up an answer.
  59.  
  60. Context: {context}
  61. Question: {question}
  62.  
  63. Only return the helpful answer below and nothing else.
  64. Only answer in russian language.
  65. Helpful answer:
  66. """
  67.  
  68.     prompt = PromptTemplate(template=custom_prompt_template,
  69.                             input_variables=['context', 'question'])
  70.     return prompt
  71.  
  72.  
  73. if __name__ == '__main__':
  74.     groq_api_key = cfg.GROQ_API_KEY[0]
  75.  
  76.     vs,embed_model = create_vector_database()
  77.  
  78.     chat_model = ChatGroq(temperature=0,
  79.                           model_name="mixtral-8x7b-32768",
  80.                           api_key=groq_api_key,)
  81.  
  82.     vectorstore = Chroma(embedding_function=embed_model,
  83.                          persist_directory="chroma_db_llamaparse1",
  84.                          collection_name="rag")
  85.     #
  86.     retriever=vectorstore.as_retriever(search_kwargs={'k': 3})
  87.  
  88.     prompt = set_custom_prompt()
  89.  
  90.     qa = RetrievalQA.from_chain_type(llm=chat_model,
  91.                                      chain_type="stuff",
  92.                                      retriever=retriever,
  93.                                      return_source_documents=True,
  94.                                      chain_type_kwargs={"prompt": prompt})
  95.  
  96.     while 1:
  97.         q = input('> ')
  98.         response = qa.invoke({"query": q})
  99.         print(response['result'])
  100.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement