Ultiima-q2 / app.py
Sakalti's picture
Update app.py
56b6cfd verified
import gradio as gr
import mlx.core as mx
import mlx.nn as nn
import spaces
from mlx_lm import load, generate
from transformers import AutoTokenizer
# モデルとトークナイザーのロード
model_name = "Sakalti/ultiima-78B-Q2-mlx"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = load(model_name)
@spaces.gpu(duration=100)
def chat(prompt, top_p, top_k, max_new_tokens, system_prompt):
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer([text], return_tensors="pt")
# モデルに入力を渡して生成
output = generate(
model,
inputs.input_ids,
tokenizer=tokenizer,
max_tokens=max_new_tokens,
top_p=top_p,
top_k=top_k
)
return output
# GradioのUI設定
chat_interface = gr.ChatInterface(
fn=chat,
additional_inputs=[
gr.Textbox(value="あなたはフレンドリーなチャットボットです。", label="System Prompt"),
gr.Slider(0.0, 1.0, value=0.9, label="Top-p"),
gr.Slider(1, 100, value=50, label="Top-k"),
gr.Slider(1, 1024, value=512, step=1, label="Max New Tokens")
]
)
chat_interface.launch()