#!/usr/bin/env python3 """ Gradio Interface for Dynamic Forecast System Interactive interface for time-aware forecasting with run date selection. """ import os import gradio as gr import polars as pl import pandas as pd from datetime import datetime, timedelta from datasets import load_dataset from src.forecasting.dynamic_forecast import DynamicForecast from src.forecasting.feature_availability import FeatureAvailability # Global variables for caching dataset = None forecaster = None borders = None def load_data(): """Load dataset once at startup.""" global dataset, forecaster, borders print("[*] Loading dataset from HuggingFace...") # Load HF token from environment variable hf_token = os.getenv("HF_TOKEN") if not hf_token: raise ValueError( "HF_TOKEN not found in environment variables. " "Please set HF_TOKEN in your environment or .env file." ) ds = load_dataset( "evgueni-p/fbmc-features-24month", split="train", token=hf_token ) dataset = pl.from_pandas(ds.to_pandas()) # Ensure timestamp is datetime if dataset['timestamp'].dtype == pl.String: dataset = dataset.with_columns(pl.col('timestamp').str.to_datetime()) elif dataset['timestamp'].dtype != pl.Datetime: dataset = dataset.with_columns(pl.col('timestamp').cast(pl.Datetime)) # Initialize forecaster forecaster = DynamicForecast( dataset=dataset, context_hours=512, forecast_hours=336 # Fixed at 14 days ) # Extract borders target_cols = [col for col in dataset.columns if col.startswith('target_border_')] borders = [col.replace('target_border_', '') for col in target_cols] print(f"[OK] Loaded {len(dataset)} rows, {len(dataset.columns)} columns") print(f"[OK] Found {len(borders)} borders") print(f"[OK] Date range: {dataset['timestamp'].min()} to {dataset['timestamp'].max()}") return True def get_dataset_info(): """Get dataset information for display.""" if dataset is None: return "Dataset not loaded" date_min = str(dataset['timestamp'].min()) date_max = str(dataset['timestamp'].max()) info = f""" **Dataset Information** - Total rows: {len(dataset):,} - Total columns: {len(dataset.columns)} - Date range: {date_min} to {date_max} - Borders available: {len(borders)} """ return info def get_feature_summary(): """Get feature categorization summary.""" if forecaster is None: return "Forecaster not initialized" summary = forecaster.get_feature_summary() text = f""" **Feature Categorization** - Full-horizon D+14: {summary['full_horizon_d14']} features (temporal, weather, CNEC outages, LTA) - Partial D+1: {summary['partial_d1']} features (load forecasts, masked D+2-D+14) - Historical only: {summary['historical']} features (prices, generation, demand, lags, etc.) - **Total: {summary['total']} features** """ return text def validate_run_date(run_date_str): """Validate run date is within dataset bounds.""" if not run_date_str: return False, "Please select a run date" try: run_date = datetime.strptime(run_date_str, "%Y-%m-%d %H:%M:%S") except: return False, "Invalid date format (use YYYY-MM-DD HH:MM:SS)" dataset_min = dataset['timestamp'].min() dataset_max = dataset['timestamp'].max() # Run date must have 512 hours of context before it min_valid = dataset_min + timedelta(hours=512) # Run date must have 336 hours of future data after it max_valid = dataset_max - timedelta(hours=336) if run_date < min_valid: return False, f"Run date too early (need 512h context). Minimum: {min_valid}" if run_date > max_valid: return False, f"Run date too late (need 336h future data). Maximum: {max_valid}" return True, "Run date valid" def prepare_forecast(run_date_str, border): """Prepare forecast data for selected run date and border.""" if dataset is None or forecaster is None: return "Error: Dataset not loaded", "", "" # Validate inputs if not border: return "Error: Please select a border", "", "" is_valid, msg = validate_run_date(run_date_str) if not is_valid: return f"Error: {msg}", "", "" try: run_date = datetime.strptime(run_date_str, "%Y-%m-%d %H:%M:%S") # Prepare data context_data, future_data = forecaster.prepare_forecast_data(run_date, border) # Validate no leakage is_valid, errors = forecaster.validate_no_leakage( context_data, future_data, run_date ) if not is_valid: error_msg = "Data leakage detected:\n" + "\n".join(f"- {e}" for e in errors) return error_msg, "", "" # Build result summary forecast_start = run_date + timedelta(hours=1) forecast_end = forecast_start + timedelta(hours=335) result = f""" **Forecast Configuration** - Border: {border} - Run date: {run_date} - Forecast horizon: D+1 to D+14 (336 hours, FIXED) - Forecast period: {forecast_start} to {forecast_end} **Data Preparation Summary** - Context shape: {context_data.shape} (historical data) - Future shape: {future_data.shape} (future covariates) - Context dates: {context_data['timestamp'].min()} to {context_data['timestamp'].max()} - Future dates: {future_data['timestamp'].min()} to {future_data['timestamp'].max()} - Leakage validation: PASSED **Feature Availability** - Full-horizon D+14: Available for all 336 hours - Partial D+1 (load forecasts): Available for first 24 hours, masked 25-336 - Historical features: Not used for forecasting (context only) **Next Steps** 1. Data has been prepared with time-aware extraction 2. Load forecast masking applied (D+1 only) 3. LTA forward-filling applied (constant across horizon) 4. Ready for Chronos-2 inference (requires GPU) **Note**: This is a dry-run demonstration. Actual inference requires GPU with Chronos-2 model. """ # Create context preview context_preview = context_data.head(10).to_string() # Create future preview future_preview = future_data.head(10).to_string() return result, context_preview, future_preview except Exception as e: return f"Error: {str(e)}", "", "" def create_interface(): """Create Gradio interface.""" # Load data at startup load_data() with gr.Blocks(title="FBMC Dynamic Forecast System") as app: gr.Markdown("# FBMC Dynamic Forecast System") gr.Markdown(""" **Time-Aware Forecasting with Run Date Selection** This interface demonstrates the dynamic forecast pipeline that prevents data leakage by using only data available at the selected run date. **Key Features**: - Dynamic run date selection (prevents data leakage) - Fixed 14-day forecast horizon (D+1 to D+14, always 336 hours) - Time-aware feature categorization (603 full + 12 partial + 1,899 historical) - Availability masking for partial features (load forecasts D+1 only) - Built-in leakage validation """) with gr.Tab("Forecast Configuration"): with gr.Row(): with gr.Column(): gr.Markdown("### Dataset Information") dataset_info = gr.Textbox( label="Dataset Info", value=get_dataset_info(), lines=8, interactive=False ) feature_summary = gr.Textbox( label="Feature Summary", value=get_feature_summary(), lines=10, interactive=False ) with gr.Column(): gr.Markdown("### Forecast Configuration") run_date_input = gr.Textbox( label="Run Date (YYYY-MM-DD HH:MM:SS)", placeholder="2025-08-15 23:00:00", value="2025-08-15 23:00:00" ) border_dropdown = gr.Dropdown( label="Border", choices=borders if borders else [], value=borders[0] if borders else None ) gr.Markdown(""" **Forecast Horizon**: Fixed at 14 days (D+1 to D+14, 336 hours) **Validation Rules**: - Run date must have 512 hours of historical context - Run date must have 336 hours of future data (for this demo) - Valid range: ~22 days from dataset start to ~14 days before dataset end """) prepare_btn = gr.Button("Prepare Forecast Data", variant="primary") with gr.Row(): result_output = gr.Textbox( label="Forecast Preparation Result", lines=25, interactive=False ) with gr.Tab("Data Preview"): with gr.Row(): context_preview = gr.Textbox( label="Context Data (first 10 rows)", lines=20, interactive=False ) future_preview = gr.Textbox( label="Future Covariates (first 10 rows)", lines=20, interactive=False ) with gr.Tab("About"): gr.Markdown(""" ## About This System ### Purpose Prevent data leakage in FBMC cross-border flow forecasting by implementing time-aware data extraction that respects feature availability windows. ### Architecture 1. **Feature Categorization**: All 2,514 features categorized by availability - Full-horizon D+14: 603 features (temporal, weather, outages, LTA) - Partial D+1: 12 features (load forecasts, masked D+2-D+14) - Historical: 1,899 features (prices, generation, demand, lags) 2. **Time-Aware Extraction**: DynamicForecast class - Extracts context data (all data before run_date) - Extracts future covariates (D+1 to D+14 only) - Applies availability masking for partial features 3. **Leakage Validation**: Built-in checks - Context timestamps < run_date - Future timestamps >= run_date + 1 hour - No overlap between context and future - Only future covariates in future data ### Forecast Horizon - **FIXED at 14 days** (D+1 to D+14, 336 hours) - No horizon selector needed (always forecasts full 14 days) - D+1 starts 1 hour after run_date (ET convention) ### Feature Availability - **Load Forecasts**: Published day-ahead, available D+1 only - **Weather**: Forecasts available for full D+14 horizon - **CNEC Outages**: Planned maintenance published weeks ahead - **LTA**: Long-term allocations, forward-filled from D+0 - **Historical**: Prices, generation, demand (context only) ### Time Conventions - **Electricity Time (ET)**: Hour 1 = 00:00-01:00, Hour 24 = 23:00-00:00 - **D+1**: Next day, hours 1-24 (24 hours starting at 00:00) - **D+14**: 14 days ahead (336 hours total) ### Model - **Chronos 2 Large** (710M params, zero-shot inference) - Supports partial availability via NaN masking - Multivariate time series forecasting ### Files - `src/forecasting/feature_availability.py`: Feature categorization - `src/forecasting/dynamic_forecast.py`: Time-aware data extraction - `smoke_test.py`, `full_inference.py`: Updated inference scripts - `tests/test_feature_availability.py`: Unit tests (27 tests, all passing) ### Authors Evgueni Poloukarov, 2025-11-13 """) # Wire up the button prepare_btn.click( fn=prepare_forecast, inputs=[run_date_input, border_dropdown], outputs=[result_output, context_preview, future_preview] ) return app if __name__ == "__main__": app = create_interface() app.launch( server_name="0.0.0.0", server_port=7860, share=False )