Spaces:

JimmyK300
/

Qwen2.5-0.5B-instruct

Sleeping

Qwen2.5-0.5B-instruct / app.py

Update app.py

fe0631f verified 10 months ago

1.54 kB

	import gradio as gr
	from llama_cpp import Llama
	import os

	os.system("pip install -U huggingface_hub")
	os.system("huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct-GGUF qwen2.5-0.5b-instruct-q2_k.gguf --local-dir . --local-dir-use-symlinks False")

	# Load the Qwen GGUF model
	MODEL_PATH = "./qwen2.5-0.5b-instruct-q2_k.gguf" # Ensure the file exists in this path
	model = Llama(model_path=MODEL_PATH)

	# Define the chat function
	def respond(message, history, system_message, max_tokens, temperature, top_p):
	# Prepare the full prompt
	prompt = f"{system_message}\n"
	for user_msg, assistant_msg in history:
	prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
	prompt += f"User: {message}\nAssistant:"

	# Generate response using llama-cpp
	response = model(
	prompt,
	max_tokens=max_tokens,
	# temperature=temperature,
	# top_p=top_p
	)

	# Extract text response
	return response["choices"][0]["text"].strip()

	# Define Gradio chat interface
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a helpful AI assistant.", label="System message"),
	gr.Slider(minimum=10, maximum=1024, value=256, step=10, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature no effect"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p (nucleus sampling) no effect"),
	],
	)

	# Launch Gradio app
	if __name__ == "__main__":
	demo.launch()