File size: 5,004 Bytes
05215f3
 
 
 
 
 
bdb7d79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05215f3
bdb7d79
05215f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import gradio as gr
import json
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Load model and tokenizer
@gr.utils.async_cache
def load_model():
    print("Loading model...")
    base_model = AutoModelForCausalLM.from_pretrained(
        "HuggingFaceTB/SmolLM-360M",
        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-360M")
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    # Load LoRA adapters
    model = PeftModel.from_pretrained(base_model, "waliaMuskaan011/calendar-event-extractor-smollm")
    model.eval()
    print("Model loaded successfully!")
    return model, tokenizer

model, tokenizer = load_model()

def extract_calendar_event(event_text):
    """Extract calendar information from natural language text."""
    
    if not event_text.strip():
        return "Please enter some text describing a calendar event."
    
    # Build prompt
    prompt = f"""Extract calendar fields from: "{event_text}".
Return ONLY valid JSON with keys [action,date,time,attendees,location,duration,recurrence,notes].
Use null for unknown.
"""
    
    try:
        # Tokenize and generate
        inputs = tokenizer(prompt, return_tensors="pt")
        inputs = {k: v.to(model.device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=160,
                temperature=0.0,
                do_sample=False,
                pad_token_id=tokenizer.eos_token_id
            )
        
        # Decode response
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Extract JSON part (after the prompt)
        response_start = full_response.find('"}') 
        if response_start != -1:
            json_part = full_response[response_start + 2:].strip()
        else:
            # Fallback: take everything after "Use null for unknown."
            prompt_end = full_response.find("Use null for unknown.")
            if prompt_end != -1:
                json_part = full_response[prompt_end + len("Use null for unknown."):].strip()
            else:
                json_part = full_response.split("\n")[-1].strip()
        
        # Try to parse as JSON for validation
        try:
            parsed = json.loads(json_part)
            return json.dumps(parsed, indent=2, ensure_ascii=False)
        except json.JSONDecodeError:
            return f"Generated (may need manual cleanup):\n{json_part}"
            
    except Exception as e:
        return f"Error processing request: {str(e)}"

# Create Gradio interface
with gr.Blocks(title="Calendar Event Extractor", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # πŸ“… Calendar Event Extractor
    
    This AI model extracts structured calendar information from natural language text.
    Powered by fine-tuned SmolLM-360M with LoRA adapters.
    
    **Try it out**: Enter any calendar-related text and get structured JSON output!
    """)
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="πŸ“ Event Description",
                placeholder="e.g., 'Meeting with John tomorrow at 2pm for 1 hour'",
                lines=3
            )
            extract_btn = gr.Button("πŸ” Extract Event Info", variant="primary")
            
        with gr.Column():
            output_json = gr.Textbox(
                label="πŸ“‹ Extracted Information (JSON)",
                lines=10,
                max_lines=15
            )
    
    # Examples
    gr.Markdown("### πŸ” Try these examples:")
    examples = gr.Examples(
        examples=[
            ["Quick meeting at the coworking space on 10th May 2025 starting at 11:00 am for 45 minutes"],
            ["Coffee chat with Sarah tomorrow at 3pm"],
            ["Weekly standup every Monday at 9am on Zoom"],
            ["Doctor appointment next Friday at 2:30 PM for 30 minutes"],
            ["Team lunch at the new restaurant on 15th December"],
            ["Call with client on 25/12/2024 at 10:00 AM, needs to discuss project timeline"],
        ],
        inputs=[input_text],
        outputs=[output_json],
        fn=extract_calendar_event,
        cache_examples=False
    )
    
    extract_btn.click(
        fn=extract_calendar_event,
        inputs=[input_text],
        outputs=[output_json]
    )
    
    gr.Markdown("""
    ---
    **Model Details**: Fine-tuned SmolLM-360M using LoRA β€’ **Dataset**: ~2500 calendar events β€’ **Training**: Custom augmentation pipeline
    
    [πŸ”— Model Card](https://huggingface.co/waliaMuskaan011/calendar-event-extractor-smollm) β€’ [πŸ’» Training Code](https://github.com/muskaanwalia098/Calendar-Event-Entity-Extraction)
    """)

if __name__ == "__main__":
    demo.launch()