Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,41 +1,37 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from
|
| 3 |
-
import os
|
| 4 |
-
os.system("pip install -U huggingface_hub")
|
| 5 |
-
os.system("huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct-GGUF qwen2.5-0.5b-instruct-q2_k.gguf --local-dir . --local-dir-use-symlinks False")
|
| 6 |
-
# Model path or name
|
| 7 |
-
# ./llama-cli -m <gguf-file-path> \
|
| 8 |
-
# -co -cnv -p "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
|
| 9 |
-
# -fa -ngl 80 -n 512
|
| 10 |
-
MODEL_PATH = "./qwen2.5-0.5b-instruct-q2_k.gguf"
|
| 11 |
|
| 12 |
-
# Load the model
|
| 13 |
-
|
|
|
|
| 14 |
|
|
|
|
| 15 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
| 16 |
-
# Prepare the prompt
|
| 17 |
-
prompt = system_message
|
| 18 |
for user_msg, assistant_msg in history:
|
| 19 |
prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
|
| 20 |
prompt += f"User: {message}\nAssistant:"
|
| 21 |
|
| 22 |
-
# Generate response
|
| 23 |
response = model(
|
| 24 |
-
prompt,
|
| 25 |
-
|
| 26 |
-
temperature=temperature,
|
| 27 |
-
top_p=top_p
|
| 28 |
)
|
| 29 |
-
return response
|
| 30 |
|
| 31 |
-
#
|
|
|
|
|
|
|
|
|
|
| 32 |
demo = gr.ChatInterface(
|
| 33 |
respond,
|
| 34 |
additional_inputs=[
|
| 35 |
-
gr.Textbox(value="You are a
|
| 36 |
-
gr.Slider(minimum=
|
| 37 |
-
gr.Slider(minimum=0.1, maximum=1.5, value=0.
|
| 38 |
-
gr.Slider(minimum=0.1, maximum=0
|
| 39 |
],
|
| 40 |
)
|
| 41 |
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from llama_cpp import Llama
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
# Load the Qwen GGUF model
|
| 5 |
+
MODEL_PATH = "./qwen2.5-0.5b-instruct-q2_k.gguf" # Ensure the file exists in this path
|
| 6 |
+
model = Llama(model_path=MODEL_PATH)
|
| 7 |
|
| 8 |
+
# Define the chat function
|
| 9 |
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
| 10 |
+
# Prepare the full prompt
|
| 11 |
+
prompt = f"{system_message}\n"
|
| 12 |
for user_msg, assistant_msg in history:
|
| 13 |
prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
|
| 14 |
prompt += f"User: {message}\nAssistant:"
|
| 15 |
|
| 16 |
+
# Generate response using llama-cpp
|
| 17 |
response = model(
|
| 18 |
+
prompt,
|
| 19 |
+
max_tokens=max_tokens,
|
| 20 |
+
temperature=temperature,
|
| 21 |
+
top_p=top_p
|
| 22 |
)
|
|
|
|
| 23 |
|
| 24 |
+
# Extract text response
|
| 25 |
+
return response["choices"][0]["text"].strip()
|
| 26 |
+
|
| 27 |
+
# Define Gradio chat interface
|
| 28 |
demo = gr.ChatInterface(
|
| 29 |
respond,
|
| 30 |
additional_inputs=[
|
| 31 |
+
gr.Textbox(value="You are a helpful AI assistant.", label="System message"),
|
| 32 |
+
gr.Slider(minimum=10, maximum=512, value=100, step=10, label="Max new tokens"),
|
| 33 |
+
gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature"),
|
| 34 |
+
gr.Slider(minimum=0.1, maximum=1.0, value=0.75, step=0.05, label="Top-p (nucleus sampling)"),
|
| 35 |
],
|
| 36 |
)
|
| 37 |
|