Spaces:
Sleeping
Sleeping
File size: 2,139 Bytes
88d1ce9 4b8b036 88d1ce9 4b8b036 88d1ce9 4962f60 88d1ce9 4b8b036 88d1ce9 4b8b036 81c47bb 4b8b036 88d1ce9 4b8b036 88d1ce9 4b8b036 88d1ce9 3a06e2d 88d1ce9 4b8b036 88d1ce9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import spaces
import threading
model_name = "baidu/ERNIE-4.5-21B-A3B-Thinking"
# Load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
torch_dtype=torch.bfloat16,
)
@spaces.GPU(duration=120)
def chat(message, history):
messages = history + [{"role": "user", "content": message}]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], add_special_tokens=False, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = {
**model_inputs,
"streamer": streamer,
"max_new_tokens": 1024,
"do_sample": True,
"temperature": 0.7,
}
thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
new_history = history + [{"role": "user", "content": message}]
new_history.append({"role": "assistant", "content": ""})
yield new_history, ""
generated_text = ""
for new_token in streamer:
generated_text += new_token
new_history[-1]["content"] = generated_text
yield new_history, ""
thread.join()
with gr.Blocks(title="ERNIE Chat") as demo:
gr.Markdown("# ERNIE-4.5-21B-A3B-Thinking Chat App")
chatbot = gr.Chatbot(
height=500,
type="messages",
show_copy_button=True,
avatar_images=None
)
msg = gr.Textbox(
placeholder="Type your message here...",
show_label=False,
container=True,
scale=7,
)
with gr.Row():
clear_btn = gr.Button("Clear", variant="secondary")
msg.submit(chat, [msg, chatbot], [chatbot, msg])
clear_btn.click(lambda: ([], ""), None, [chatbot, msg], queue=False)
if __name__ == "__main__":
demo.launch() |