import gradio as gr import json import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel # Load model and tokenizer @gr.utils.async_cache def load_model(): print("Loading model...") base_model = AutoModelForCausalLM.from_pretrained( "HuggingFaceTB/SmolLM-360M", torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-360M") if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load LoRA adapters model = PeftModel.from_pretrained(base_model, "waliaMuskaan011/calendar-event-extractor-smollm") model.eval() print("Model loaded successfully!") return model, tokenizer model, tokenizer = load_model() def extract_calendar_event(event_text): """Extract calendar information from natural language text.""" if not event_text.strip(): return "Please enter some text describing a calendar event." # Build prompt prompt = f"""Extract calendar fields from: "{event_text}". Return ONLY valid JSON with keys [action,date,time,attendees,location,duration,recurrence,notes]. Use null for unknown. """ try: # Tokenize and generate inputs = tokenizer(prompt, return_tensors="pt") inputs = {k: v.to(model.device) for k, v in inputs.items()} with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=160, temperature=0.0, do_sample=False, pad_token_id=tokenizer.eos_token_id ) # Decode response full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract JSON part (after the prompt) response_start = full_response.find('"}') if response_start != -1: json_part = full_response[response_start + 2:].strip() else: # Fallback: take everything after "Use null for unknown." prompt_end = full_response.find("Use null for unknown.") if prompt_end != -1: json_part = full_response[prompt_end + len("Use null for unknown."):].strip() else: json_part = full_response.split("\n")[-1].strip() # Try to parse as JSON for validation try: parsed = json.loads(json_part) return json.dumps(parsed, indent=2, ensure_ascii=False) except json.JSONDecodeError: return f"Generated (may need manual cleanup):\n{json_part}" except Exception as e: return f"Error processing request: {str(e)}" # Create Gradio interface with gr.Blocks(title="Calendar Event Extractor", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 📅 Calendar Event Extractor This AI model extracts structured calendar information from natural language text. Powered by fine-tuned SmolLM-360M with LoRA adapters. **Try it out**: Enter any calendar-related text and get structured JSON output! """) with gr.Row(): with gr.Column(): input_text = gr.Textbox( label="📝 Event Description", placeholder="e.g., 'Meeting with John tomorrow at 2pm for 1 hour'", lines=3 ) extract_btn = gr.Button("🔍 Extract Event Info", variant="primary") with gr.Column(): output_json = gr.Textbox( label="📋 Extracted Information (JSON)", lines=10, max_lines=15 ) # Examples gr.Markdown("### 🔍 Try these examples:") examples = gr.Examples( examples=[ ["Quick meeting at the coworking space on 10th May 2025 starting at 11:00 am for 45 minutes"], ["Coffee chat with Sarah tomorrow at 3pm"], ["Weekly standup every Monday at 9am on Zoom"], ["Doctor appointment next Friday at 2:30 PM for 30 minutes"], ["Team lunch at the new restaurant on 15th December"], ["Call with client on 25/12/2024 at 10:00 AM, needs to discuss project timeline"], ], inputs=[input_text], outputs=[output_json], fn=extract_calendar_event, cache_examples=False ) extract_btn.click( fn=extract_calendar_event, inputs=[input_text], outputs=[output_json] ) gr.Markdown(""" --- **Model Details**: Fine-tuned SmolLM-360M using LoRA • **Dataset**: ~2500 calendar events • **Training**: Custom augmentation pipeline [🔗 Model Card](https://huggingface.co/waliaMuskaan011/calendar-event-extractor-smollm) • [💻 Training Code](https://github.com/muskaanwalia098/Calendar-Event-Entity-Extraction) """) if __name__ == "__main__": demo.launch()