import os
import torch
from PIL import Image
import numpy as np
import warnings

from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from gradio.routes import mount_gradio_app
import gradio as gr

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    AutoProcessor,
)

# Suppress warnings
warnings.filterwarnings("ignore", category=UserWarning, module="gradio.analytics")
warnings.filterwarnings("ignore", category=FutureWarning)

# Force CPU Only
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
torch.cuda.is_available = lambda: False
device = "cpu"
print("Running on CPU ✅")

# ---------------- LOAD CHAT MODEL ----------------
MODEL_ID = "microsoft/Phi-3.5-mini-instruct"

try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    # Add padding token if it doesn't exist
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.float32,  # Changed from deprecated torch_dtype
        device_map="cpu",
        low_cpu_mem_usage=True,
        trust_remote_code=True
    ).eval()
    print("Chat model loaded ✅")
except Exception as e:
    print(f"Chat model failed to load: {e}")
    raise

# ---------------- LOAD VISION MODEL ----------------
models = {}
processors = {}

try:
    VISION_ID = ""
    # Disable flash attention to avoid the error
    models[VISION_ID] = AutoModelForCausalLM.from_pretrained(
        VISION_ID,
        trust_remote_code=True,
        torch_dtype=torch.float32,  # Changed from deprecated torch_dtype
        device_map="cpu",
        low_cpu_mem_usage=True,
        attn_implementation="eager",  # Force eager attention
        _attn_implementation_internal="eager"  # Additional parameter for compatibility
    ).eval()

    processors[VISION_ID] = AutoProcessor.from_pretrained(
        VISION_ID,
        trust_remote_code=True
    )
    print("Vision model loaded ✅")
except Exception as e:
    print(f"Vision model failed to load: {e}")
    # Don't raise here to allow the app to run without vision capabilities

# ---------------- CHAT FUNCTION ----------------
def chat_simple(message, history):
    try:
        conversation = [{"role": "system", "content": "You are a helpful assistant."}]
        for user, assistant in history:
            conversation.append({"role": "user", "content": user})
            conversation.append({"role": "assistant", "content": assistant})
        conversation.append({"role": "user", "content": message})

        input_ids = tokenizer.apply_chat_template(
            conversation, 
            add_generation_prompt=True, 
            return_tensors="pt",
            padding=True,  # Added padding for stability
            truncation=True  # Added truncation for long conversations
        )
        
        with torch.no_grad():  # Added for efficiency
            output = model.generate(
                input_ids, 
                max_new_tokens=256,
                pad_token_id=tokenizer.pad_token_id,
                do_sample=False,
                temperature=0.7,
                use_cache=False
            )

        reply = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=True)
        return reply.strip()
    except Exception as e:
        return f"Error in chat: {str(e)}"

# ---------------- VISION FUNCTION ----------------
def run_vision(image, text_input, model_id):
    if not image:
        return "⚠️ Please upload an image first."
    
    if model_id not in models:
        return "⚠️ Vision model not loaded."

    try:
        model_vision = models[model_id]
        processor = processors[model_id]

        if isinstance(image, np.ndarray):
            img = Image.fromarray(image).convert("RGB")
        else:
            img = image.convert("RGB") if hasattr(image, 'convert') else Image.open(image).convert("RGB")
            
        placeholder = "<|image_1|>\n"
        prompt = placeholder + (text_input or "Describe this image")

        messages = [{"role": "user", "content": prompt}]
        template = processor.tokenizer.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=True
        )
        inputs = processor(template, [img], return_tensors="pt")

        with torch.no_grad():
            output = model_vision.generate(
                **inputs, 
                max_new_tokens=400, 
                do_sample=False,
                pad_token_id=processor.tokenizer.pad_token_id or processor.tokenizer.eos_token_id,
                temperature=0.7,
                use_cache=False
            )
            
        output = output[:, inputs["input_ids"].shape[1]:]
        response = processor.batch_decode(output, skip_special_tokens=True)[0]
        return response.strip()
    except Exception as e:
        return f"Error in vision processing: {str(e)}"

# ---------------- FASTAPI BACKEND ----------------
api = FastAPI(title="Phi-3.5 AI Assistant", version="1.0.0")

@api.get("/")
async def root():
    return {"message": "Phi-3.5 AI Assistant API", "status": "running"}

@api.get("/health")
async def health():
    return {
        "status": "ok", 
        "device": device, 
        "vision_loaded": len(models) > 0,
        "models_available": list(models.keys())
    }

@api.post("/api/chat")
async def api_chat(message: str = Form(...)):
    try:
        if not message.strip():
            raise HTTPException(status_code=400, detail="Message cannot be empty")
            
        conversation = [{"role": "user", "content": message}]
        input_ids = tokenizer.apply_chat_template(
            conversation, 
            add_generation_prompt=True, 
            return_tensors="pt"
        )
        
        with torch.no_grad():
            output = model.generate(
                input_ids, 
                max_new_tokens=256,
                pad_token_id=tokenizer.pad_token_id,
                use_cache=False
            )
            
        reply = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=True)
        return {"response": reply.strip()}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}")

@api.post("/api/vision")
async def api_vision(
    image: UploadFile = File(...), 
    text_input: str = Form("Describe this image"),
    model_id: str = Form("microsoft/Phi-3.5-vision-instruct")
):
    try:
        if not image.content_type.startswith('image/'):
            raise HTTPException(status_code=400, detail="File must be an image")
            
        if model_id not in models:
            raise HTTPException(status_code=400, detail="Vision model not available")
            
        # Read and process image
        image_data = await image.read()
        img = Image.open(io.BytesIO(image_data)).convert("RGB")
        
        response = run_vision(np.array(img), text_input, model_id)
        return {"response": response}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Vision processing error: {str(e)}")

# ---------------- GRADIO UI ----------------
def create_ui():
    with gr.Blocks(title="Phi-3.5 AI Assistant", theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 🚀 Phi-3.5 AI Assistant")
        
        with gr.Tab("💬 Chat"):
            gr.Markdown("### Chat with Phi-3.5 Mini")
            gr.ChatInterface(
                fn=chat_simple,
                title="Phi-3.5 Mini Chat",
                description="Ask me anything! I'm here to help."
            )

        with gr.Tab("👁️ Vision"):
            gr.Markdown("### Vision Analysis with Phi-3.5 Vision")
            with gr.Row():
                with gr.Column():
                    img = gr.Image(
                        label="Upload Image",
                        type="numpy",
                        height=300
                    )
                    txt = gr.Textbox(
                        label="Prompt",
                        value="What's in this image?",
                        placeholder="Describe what you see in the image..."
                    )
                    model_sel = gr.Dropdown(
                        choices=list(models.keys()),
                        value=list(models.keys())[0] if models else None,
                        label="Model",
                        interactive=len(models) > 1
                    )
                    analyze_btn = gr.Button("🔍 Analyze", variant="primary")
                    
                with gr.Column():
                    out = gr.Textbox(
                        label="Analysis Result",
                        placeholder="Results will appear here...",
                        lines=6
                    )
                    
            examples = gr.Examples(
                examples=[
                    ["What's in this image?", "microsoft/Phi-3.5-vision-instruct"],
                    ["Describe this image in detail", "microsoft/Phi-3.5-vision-instruct"]
                ],
                inputs=[txt, model_sel],
                label="Example Prompts"
            )
            
            analyze_btn.click(
                run_vision, 
                inputs=[img, txt, model_sel], 
                outputs=out
            )

        with gr.Tab("ℹ️ System Info"):
            gr.Markdown("### System Information")
            gr.JSON(value={
                "device": device,
                "vision_loaded": len(models) > 0,
                "available_models": list(models.keys()),
                "chat_model": MODEL_ID
            })

    return demo

# Import required for image processing
import io

# Create and mount Gradio app
gradio_app = create_ui()
app = mount_gradio_app(api, gradio_app, path="/")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)