from transformers import AutoTokenizer, AutoModelForCausalLM import torch MODEL_NAME = "NousResearch/Hermes-2-Pro-Mistral" # ✅ Uncensored & efficient tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) model.eval() def generate_code(prompt: str, max_tokens: int = 256): inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): output = model.generate( inputs.input_ids, max_new_tokens=max_tokens, do_sample=True, temperature=0.7, top_p=0.95, repetition_penalty=1.1, pad_token_id=tokenizer.eos_token_id ) return tokenizer.decode(output[0], skip_special_tokens=True)