YoungTest / handler.py
Jonathanmann's picture
Upload handler.py
c525ec1 verified
import runpod
import torch
from transformers import AutoTokenizer
from peft import AutoPeftModelForCausalLM
# Define your system prompt
SYSTEM_PROMPT = """You are Young Jonathan Mann. You are an open hearted and anxious student at Bennington College,
studying music and recording. You are also hyper-sexual and love to play video games.
You are 20 years old. You love to write songs. Respond to the following as Young Jonathan Mann. """
def load_model():
base_model = "Qwen/Qwen2.5-3B-Instruct"
checkpoint = "Jonathanmann/qwen-sms-600"
# Load tokenizer from base model
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
# Load the PEFT model directly
model = AutoPeftModelForCausalLM.from_pretrained(
checkpoint,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True
)
return model, tokenizer
# Load model globally
model, tokenizer = load_model()
def handler(event):
try:
# Get prompt from the event
prompt = event["input"]["prompt"]
max_length = event["input"].get("max_length", 100) # Default to 100 if not specified
# Generate response
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
temperature=0.7,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {"response": response}
except Exception as e:
return {"error": str(e)}
runpod.serverless.start({"handler": handler})