Spaces:

waliaMuskaan011
/

calendar-event-extraction-demo

Sleeping

File size: 5,004 Bytes

import gradio as gr
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Load model and tokenizer
@gr.utils.async_cache
def load_model():
    print("Loading model...")
    base_model = AutoModelForCausalLM.from_pretrained(
        "HuggingFaceTB/SmolLM-360M",
        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-360M")
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    # Load LoRA adapters
    model = PeftModel.from_pretrained(base_model, "waliaMuskaan011/calendar-event-extractor-smollm")
    model.eval()
    print("Model loaded successfully!")
    return model, tokenizer

model, tokenizer = load_model()

def extract_calendar_event(event_text):
    """Extract calendar information from natural language text."""
    
    if not event_text.strip():
        return "Please enter some text describing a calendar event."
    
    # Build prompt
    prompt = f"""Extract calendar fields from: "{event_text}".
Return ONLY valid JSON with keys [action,date,time,attendees,location,duration,recurrence,notes].
Use null for unknown.
"""
    
    try:
        # Tokenize and generate
        inputs = tokenizer(prompt, return_tensors="pt")
        inputs = {k: v.to(model.device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=160,
                temperature=0.0,
                do_sample=False,
                pad_token_id=tokenizer.eos_token_id
            )
        
        # Decode response
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract JSON part (after the prompt)
        response_start = full_response.find('"}') 
        if response_start != -1:
            json_part = full_response[response_start + 2:].strip()
        else:
            # Fallback: take everything after "Use null for unknown."
            prompt_end = full_response.find("Use null for unknown.")
            if prompt_end != -1:
                json_part = full_response[prompt_end + len("Use null for unknown."):].strip()
            else:
                json_part = full_response.split("\n")[-1].strip()
        
        # Try to parse as JSON for validation
        try:
            parsed = json.loads(json_part)
            return json.dumps(parsed, indent=2, ensure_ascii=False)
        except json.JSONDecodeError:
            return f"Generated (may need manual cleanup):\n{json_part}"
            
    except Exception as e:
        return f"Error processing request: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="Calendar Event Extractor", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # 📅 Calendar Event Extractor
    
    This AI model extracts structured calendar information from natural language text.
    Powered by fine-tuned SmolLM-360M with LoRA adapters.
    
    **Try it out**: Enter any calendar-related text and get structured JSON output!
    """)
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="📝 Event Description",
                placeholder="e.g., 'Meeting with John tomorrow at 2pm for 1 hour'",
                lines=3
            )
            extract_btn = gr.Button("🔍 Extract Event Info", variant="primary")
            
        with gr.Column():
            output_json = gr.Textbox(
                label="📋 Extracted Information (JSON)",
                lines=10,
                max_lines=15
            )
    
    # Examples
    gr.Markdown("### 🔍 Try these examples:")
    examples = gr.Examples(
        examples=[
            ["Quick meeting at the coworking space on 10th May 2025 starting at 11:00 am for 45 minutes"],
            ["Coffee chat with Sarah tomorrow at 3pm"],
            ["Weekly standup every Monday at 9am on Zoom"],
            ["Doctor appointment next Friday at 2:30 PM for 30 minutes"],
            ["Team lunch at the new restaurant on 15th December"],
            ["Call with client on 25/12/2024 at 10:00 AM, needs to discuss project timeline"],
        ],
        inputs=[input_text],
        outputs=[output_json],
        fn=extract_calendar_event,
        cache_examples=False
    )
    
    extract_btn.click(
        fn=extract_calendar_event,
        inputs=[input_text],
        outputs=[output_json]
    )
    
    gr.Markdown("""
    ---
    **Model Details**: Fine-tuned SmolLM-360M using LoRA • **Dataset**: ~2500 calendar events • **Training**: Custom augmentation pipeline
    
    [🔗 Model Card](https://huggingface.co/waliaMuskaan011/calendar-event-extractor-smollm) • [💻 Training Code](https://github.com/muskaanwalia098/Calendar-Event-Entity-Extraction)
    """)

if __name__ == "__main__":
    demo.launch()