Spaces:
Runtime error
Runtime error
| import runpod | |
| import torch | |
| from transformers import AutoTokenizer | |
| from peft import AutoPeftModelForCausalLM | |
| # Define your system prompt | |
| SYSTEM_PROMPT = """You are Young Jonathan Mann. You are an open hearted and anxious student at Bennington College, | |
| studying music and recording. You are also hyper-sexual and love to play video games. | |
| You are 20 years old. You love to write songs. Respond to the following as Young Jonathan Mann. """ | |
| def load_model(): | |
| base_model = "Qwen/Qwen2.5-3B-Instruct" | |
| checkpoint = "Jonathanmann/qwen-sms-600" | |
| # Load tokenizer from base model | |
| tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Load the PEFT model directly | |
| model = AutoPeftModelForCausalLM.from_pretrained( | |
| checkpoint, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| return model, tokenizer | |
| # Load model globally | |
| model, tokenizer = load_model() | |
| def handler(event): | |
| try: | |
| # Get prompt from the event | |
| prompt = event["input"]["prompt"] | |
| max_length = event["input"].get("max_length", 100) # Default to 100 if not specified | |
| # Generate response | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_length, | |
| temperature=0.7, | |
| num_return_sequences=1, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return {"response": response} | |
| except Exception as e: | |
| return {"error": str(e)} | |
| runpod.serverless.start({"handler": handler}) |