Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /tmp folder of rk3588 seems to be to small - models need to be downloaded to avoid insufficent space int /tmp folder
- Create "models" folder - and run inside:
- git clone https://huggingface.co/BAAI/bge-m3
- git clone https://huggingface.co/BAAI/bge-reranker-large
- Then first vector.py file to create index (some pip packages will be missing, chatgpt will help). Important setting is "Settings.chunk_size = 512" which limits a size of each chunk. If we have to work with 4096 limit - 7 chunks of 512 seems to be within this limit. Reranker might be necessary.
- [vector.py]:
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
- Settings.chunk_size = 512
- # My local documents
- documents = SimpleDirectoryReader("data", recursive=True).load_data()
- # Embeddings model
- Settings.embed_model = HuggingFaceEmbedding(model_name="./models/bge-m3")
- # Create index
- index = VectorStoreIndex.from_documents(documents)
- index.storage_context.persist(persist_dir="./vector")
- [/vector.py]
- Then modified rkllm-gradio with llama-index and reranker. Important settings are, first - limit of chunks will be chosen by bge-m3 for the context. This is not limited by 4096 size, so it might be larger, for example 20 chunks of 512 size:
- rerank = FlagEmbeddingReranker(model="./models/bge-reranker-large", top_n=20)
- Second - limit of chunks to be chosen by reranker to provide to Rockchip NPU, these have to be 7 (with chunk size of 512) to meet the 4096 limit:
- "query_engine = index.as_query_engine(similarity_top_k=7, node_postprocessors=[rerank])"
- [rkllm-gradio.py]
- import sys
- import resource
- import gradio as gr
- from ctypes_bindings import *
- from model_class import *
- from mesh_utils import *
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, StorageContext, load_index_from_storage
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
- from llama_index.postprocessor.flag_embedding_reranker import (
- FlagEmbeddingReranker,
- )
- rerank = FlagEmbeddingReranker(model="./models/bge-reranker-large", top_n=20)
- # Set environment variables
- os.environ["OPENAI_API_KEY"] = "fake"
- os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
- os.environ["GRADIO_SERVER_PORT"] = "8080"
- os.environ["RKLLM_LOG_LEVEL"] = "1"
- Settings.embed_model = HuggingFaceEmbedding(model_name="./models/bge-m3")
- Settings.llm = None
- # Set resource limit
- resource.setrlimit(resource.RLIMIT_NOFILE, (102400, 102400))
- history = []
- if __name__ == "__main__":
- # Helper function to define initializing model before class is declared
- # Without this, you would need to initialize the class before you select the model
- def initialize_model(model):
- global rkllm_model
- # Have to unload previous model in single-threaded mode
- try:
- rkllm_model.release()
- except:
- print("No model loaded! Continuing with initialization...")
- # Initialize RKLLM model
- init_msg = "=========INITIALIZING==========="
- print(init_msg)
- sys.stdout.flush()
- rkllm_model = RKLLMLoaderClass(model=model)
- model_init = f"RKLLM Model, {rkllm_model.model_name} has been initialized successfully!"
- print(model_init)
- complete_init = "=============================="
- print(complete_init)
- output = [[f"<h4 style=\"text-align:center;\">{model_init}\n</h4>", None]]
- sys.stdout.flush()
- return output
- # Helper function to stream LLM output into the chat box
- def get_RKLLM_output(message, history):
- # Ensure `message` is a dictionary with a `text` key
- if isinstance(message, str):
- message = {"text": message}
- elif not isinstance(message, dict) or "text" not in message:
- raise ValueError("Invalid message format. Expected a dictionary with a 'text' key.")
- # Create storage context from persisted data
- storage_context = StorageContext.from_defaults(persist_dir="./vector")
- # Load index from storage context
- index = load_index_from_storage(storage_context)
- # Query the index for context
- query = ""
- query_engine = index.as_query_engine(similarity_top_k=7, node_postprocessors=[rerank])
- context = str(query_engine.query(message["text"]))
- print(message)
- print(context)
- message_with_context = f"{context}\n{message}"
- try:
- yield from rkllm_model.get_RKLLM_output(message_with_context, history)
- except RuntimeError as e:
- print(f"ERROR: {e}")
- return history
- # Create a Gradio interface
- with gr.Blocks(title="Chat with RKLLM") as chatRKLLM:
- available_models = available_models()
- gr.Markdown("<div align='center'><font size='10'> Rockchip NPU </font></div>")
- with gr.Tabs():
- with gr.TabItem("Select Model"):
- model_dropdown = gr.Dropdown(choices=available_models, label="Select Model", value="None", allow_custom_value=True)
- statusBox = gr.Chatbot(height=100)
- model_dropdown.input(initialize_model, [model_dropdown], [statusBox])
- with gr.TabItem("Txt2Txt"):
- txt2txt = gr.ChatInterface(fn=get_RKLLM_output, type="messages")
- with gr.TabItem("Txt2Mesh"):
- with gr.Row():
- with gr.Column(scale=2):
- txt2txt = gr.ChatInterface(fn=get_RKLLM_output, type="messages")
- with gr.Column(scale=2):
- # Add the text box for 3D mesh input and button
- mesh_input = gr.Textbox(
- label="3D Mesh Input",
- placeholder="Paste your 3D mesh in OBJ format here...",
- lines=5,
- )
- visualize_button = gr.Button("Visualize 3D Mesh")
- output_model = gr.Model3D(
- label="3D Mesh Visualization",
- interactive=False,
- )
- # Link the button to the visualization function
- visualize_button.click(
- fn=apply_gradient_color,
- inputs=[mesh_input],
- outputs=[output_model]
- )
- print("\nNo model loaded yet!\n")
- # Enable the event queue system.
- chatRKLLM.queue()
- # Start the Gradio application.
- chatRKLLM.launch()
- print("====================")
- print("RKLLM model inference completed, releasing RKLLM model resources...")
- rkllm_model.release()
- print("====================")
- [/rkllm-gradio.py]
- ~
- ~
- ~
- ~
- ~
- ~
- ~
- ~
- ~
- ~
- ~
Advertisement
Add Comment
Please, Sign In to add comment