import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig import torch model_name = "ajibawa-2023/Young-Children-Storyteller-Mistral-7B" quant_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4" ) tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, quantization_config=quant_config, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True ) def generate_story(prompt, max_length=400, temperature=0.7, top_p=0.9): formatted_prompt = f"### Instruction:\nCreate a story for young children about: {prompt}\n\n### Response:\n" inputs = tokenizer.encode(formatted_prompt, return_tensors="pt").to(model.device) outputs = model.generate( inputs, max_length=max_length, temperature=temperature, top_p=top_p, do_sample=True, pad_token_id=tokenizer.eos_token_id, repetition_penalty=1.1 ) return tokenizer.decode(outputs[0], skip_special_tokens=True).split("### Response:")[-1].strip() gr.Interface(fn=generate_story, inputs="text", outputs="text").launch()