Spaces:

TeamTonic
/

TonicsYI-6B-200k

Paused

App Files Files Community

Tonic commited on Nov 24, 2023

Commit

ea82e95

1 Parent(s): ca5433e

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -4

app.py CHANGED Viewed

@@ -17,7 +17,8 @@ model = AutoModelForCausalLM.from_pretrained("larryvrh/Yi-34B-200K-Llamafied", d
 # tokenizer = YiTokenizer.from_pretrained("./")
 # model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True, quantization_config=gptq_config)
-def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800):
     prompt = message.strip()
     input_ids = tokenizer.encode(prompt, return_tensors='pt')
     input_ids = input_ids.to(model.device)
@@ -28,7 +29,7 @@ def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800)
         top_p=top_p,
         top_k=top_k,
         pad_token_id=tokenizer.eos_token_id,
-        do_sample=True
     )
     response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
     return [("bot", response)]
@@ -47,14 +48,15 @@ with gr.Blocks(theme='ParityError/Anime') as demo:
         chatbot = gr.Chatbot(label='TonicYi-30B-200K')
     with gr.Accordion(label='Advanced options', open=False):
-        max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=3800)
         temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
         top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
         top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=900)
     submit_button.click(
         fn=predict,
-        inputs=[textbox, max_new_tokens, temperature, top_p, top_k],
         outputs=chatbot
     )

 # tokenizer = YiTokenizer.from_pretrained("./")
 # model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto", trust_remote_code=True, quantization_config=gptq_config)
+def predict(message, max_new_tokens=4056, temperature=3.5, top_p=0.9, top_k=800, do_sample=False):
     prompt = message.strip()
     input_ids = tokenizer.encode(prompt, return_tensors='pt')
     input_ids = input_ids.to(model.device)
         top_p=top_p,
         top_k=top_k,
         pad_token_id=tokenizer.eos_token_id,
+        do_sample=do_sample
     )
     response = tokenizer.decode(response_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
     return [("bot", response)]
         chatbot = gr.Chatbot(label='TonicYi-30B-200K')
     with gr.Accordion(label='Advanced options', open=False):
+        max_new_tokens = gr.Slider(label='Max New Tokens', minimum=1, maximum=55000, step=1, value=8000)
         temperature = gr.Slider(label='Temperature', minimum=0.1, maximum=4.0, step=0.1, value=1.2)
         top_p = gr.Slider(label='Top-P (nucleus sampling)', minimum=0.05, maximum=1.0, step=0.05, value=0.9)
         top_k = gr.Slider(label='Top-K', minimum=1, maximum=1000, step=1, value=900)
+        do_sample_checkbox = gr.Checkbox(label='Do Sample', value=True, tooltip="Disable for faster inference")
     submit_button.click(
         fn=predict,
+        inputs=[textbox, max_new_tokens, temperature, top_p, top_k, do_sample_checkbox],
         outputs=chatbot
     )