Spaces:

ginipick
/

NH-Korea

Running

App Files Files Community

ginipick commited on Apr 24

Commit

a136f76

verified ·

1 Parent(s): 2a3abbd

Create app.py

Browse files

Files changed (1) hide show

app.py +1225 -0

app.py ADDED Viewed

	@@ -0,0 +1,1225 @@

+# ──────────────────────────────── Imports ────────────────────────────────
+import os, json, re, logging, requests, markdown, time, io
+from datetime import datetime
+import random
+import base64
+from io import BytesIO
+from PIL import Image
+import streamlit as st
+from openai import OpenAI  # OpenAI 라이브러리
+from gradio_client import Client
+import pandas as pd
+import PyPDF2  # For handling PDF files
+# ──────────────────────────────── Environment Variables / Constants ─────────────────────────
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
+BRAVE_KEY      = os.getenv("SERPHOUSE_API_KEY", "")  # Keep this name
+BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
+BRAVE_IMAGE_ENDPOINT = "https://api.search.brave.com/res/v1/images/search"
+BRAVE_VIDEO_ENDPOINT = "https://api.search.brave.com/res/v1/videos/search"
+BRAVE_NEWS_ENDPOINT = "https://api.search.brave.com/res/v1/news/search"
+IMAGE_API_URL  = "http://211.233.58.201:7896"
+MAX_TOKENS     = 7999
+# 안정적인 대체 이미지 URL 목록
+FALLBACK_IMAGES = [
+    "https://images.pexels.com/photos/2559941/pexels-photo-2559941.jpeg?auto=compress&cs=tinysrgb&w=600",
+    "https://images.pexels.com/photos/417074/pexels-photo-417074.jpeg?auto=compress&cs=tinysrgb&w=600",
+    "https://images.pexels.com/photos/312839/pexels-photo-312839.jpeg?auto=compress&cs=tinysrgb&w=600",
+    "https://images.pexels.com/photos/3844788/pexels-photo-3844788.jpeg?auto=compress&cs=tinysrgb&w=600",
+    "https://images.pexels.com/photos/33041/antelope-canyon-lower-canyon-arizona.jpg?auto=compress&cs=tinysrgb&w=600",
+    "https://images.pexels.com/photos/572897/pexels-photo-572897.jpeg?auto=compress&cs=tinysrgb&w=600",
+    "https://images.pexels.com/photos/773471/pexels-photo-773471.jpeg?auto=compress&cs=tinysrgb&w=600",
+    "https://images.pexels.com/photos/1366630/pexels-photo-1366630.jpeg?auto=compress&cs=tinysrgb&w=600",
+    "https://images.pexels.com/photos/1237119/pexels-photo-1237119.jpeg?auto=compress&cs=tinysrgb&w=600",
+    "https://images.pexels.com/photos/1429567/pexels-photo-1429567.jpeg?auto=compress&cs=tinysrgb&w=600",
+]
+# Search modes and style definitions (in English)
+SEARCH_MODES = {
+    "comprehensive": "Comprehensive answer with multiple sources",
+    "academic": "Academic and research-focused results",
+    "news": "Latest news and current events",
+    "technical": "Technical and specialized information",
+    "educational": "Educational and learning resources"
+}
+RESPONSE_STYLES = {
+    "professional": "Professional and formal tone",
+    "casual": "Friendly and conversational tone",
+    "simple": "Simple and easy to understand",
+    "detailed": "Detailed and thorough explanations"
+}
+# Example search queries
+EXAMPLE_QUERIES = {
+    "example1": "What are the latest developments in quantum computing?",
+    "example2": "How does climate change affect biodiversity in tropical rainforests?",
+    "example3": "What are the economic implications of artificial intelligence in the job market?"
+}
+# ──────────────────────────────── Logging ────────────────────────────────
+logging.basicConfig(level=logging.INFO,
+                    format="%(asctime)s - %(levelname)s - %(message)s")
+# ──────────────────────────────── OpenAI Client ──────────────────────────
+# OpenAI 클라이언트에 타임아웃과 재시도 로직 추가
+@st.cache_resource
+def get_openai_client():
+    """Create an OpenAI client with timeout and retry settings."""
+    if not OPENAI_API_KEY:
+        raise RuntimeError("⚠️ OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")
+    return OpenAI(
+        api_key=OPENAI_API_KEY,
+        timeout=60.0,  # 타임아웃 60초로 설정
+        max_retries=3  # 재시도 횟수 3회로 설정
+    )
+# ──────────────────────────────── System Prompt ─────────────────────────
+def get_system_prompt(mode="comprehensive", style="professional", include_search_results=True, include_uploaded_files=False) -> str:
+    """
+    Generate a system prompt for the perplexity-like interface based on:
+    - The selected search mode and style
+    - Guidelines for using web search results and uploaded files
+    """
+    # Base prompt for comprehensive mode
+    comprehensive_prompt = """
+You are an advanced AI assistant that provides comprehensive answers with multiple sources, similar to Perplexity.
+Your task is to:
+1. Thoroughly analyze the user's query
+2. Provide a clear, well-structured answer integrating information from multiple sources
+3. Include relevant images, videos, and links in your response
+4. Format your answer with proper headings, bullet points, and sections
+5. Cite sources inline and provide a references section at the end
+Important guidelines:
+- Organize information logically with clear section headings
+- Use bullet points and numbered lists for clarity
+- Include specific, factual information whenever possible
+- Provide balanced perspectives on controversial topics
+- Display relevant statistics, data, or quotes when appropriate
+- Format your response using markdown for readability
+"""
+    # Alternative modes
+    mode_prompts = {
+        "academic": """
+Your focus is on providing academic and research-focused responses:
+- Prioritize peer-reviewed research and academic sources
+- Include citations in a formal academic format
+- Discuss methodologies and research limitations where relevant
+- Present different scholarly perspectives on the topic
+- Use precise, technical language appropriate for an academic audience
+""",
+        "news": """
+Your focus is on providing the latest news and current events:
+- Prioritize recent news articles and current information
+- Include publication dates for all news sources
+- Present multiple perspectives from different news outlets
+- Distinguish between facts and opinions/editorial content
+- Update information with the most recent developments
+""",
+        "technical": """
+Your focus is on providing technical and specialized information:
+- Use precise technical terminology appropriate to the field
+- Include code snippets, formulas, or technical diagrams where relevant
+- Break down complex concepts into step-by-step explanations
+- Reference technical documentation, standards, and best practices
+- Consider different technical approaches or methodologies
+""",
+        "educational": """
+Your focus is on providing educational and learning resources:
+- Structure information in a learning-friendly progression
+- Include examples, analogies, and visual explanations
+- Highlight key concepts and definitions
+- Suggest further learning resources at different difficulty levels
+- Present information that's accessible to learners at various levels
+"""
+    }
+    # Response styles
+    style_guides = {
+        "professional": "Use a professional, authoritative voice. Clearly explain technical terms and present data systematically.",
+        "casual": "Use a relaxed, conversational style with a friendly tone. Include relatable examples and occasionally use informal expressions.",
+        "simple": "Use straightforward language and avoid jargon. Keep sentences and paragraphs short. Explain concepts as if to someone with no background in the subject.",
+        "detailed": "Provide thorough explanations with comprehensive background information. Explore nuances and edge cases. Present multiple perspectives and detailed analysis."
+    }
+    # Guidelines for using search results
+    search_guide = """
+Guidelines for Using Search Results:
+- Include source links directly in your response using markdown: [Source Name](URL)
+- For each major claim or piece of information, indicate its source
+- If sources conflict, explain the different perspectives and their reliability
+- Include 3-5 relevant images by writing: ![Image description](image_url)
+- Include 1-2 relevant video links when appropriate by writing: [Video: Title](video_url)
+- Format search information into a cohesive, well-structured response
+- Include a "References" section at the end listing all major sources with links
+"""
+    # Guidelines for using uploaded files
+    upload_guide = """
+Guidelines for Using Uploaded Files:
+- Treat the uploaded files as primary sources for your response
+- Extract and highlight key information from files that directly addresses the query
+- Quote relevant passages and cite the specific file
+- For numerical data in CSV files, consider creating summary statements
+- For PDF content, reference specific sections or pages
+- Integrate file information seamlessly with web search results
+- When information conflicts, prioritize file content over general web results
+"""
+    # Choose base prompt based on mode
+    if mode == "comprehensive":
+        final_prompt = comprehensive_prompt
+    else:
+        final_prompt = comprehensive_prompt + "\n" + mode_prompts.get(mode, "")
+    # Add style guide
+    if style in style_guides:
+        final_prompt += f"\n\nTone and Style: {style_guides[style]}"
+    # Add search results guidance
+    if include_search_results:
+        final_prompt += f"\n\n{search_guide}"
+    # Add uploaded files guidance
+    if include_uploaded_files:
+        final_prompt += f"\n\n{upload_guide}"
+    # Additional formatting instructions
+    final_prompt += """
+\n\nAdditional Formatting Requirements:
+- Use markdown headings (## and ###) to organize your response
+- Use bold text (**text**) for emphasis on important points
+- Include a "Related Questions" section at the end with 3-5 follow-up questions
+- Format your response with proper spacing and paragraph breaks
+- Make all links clickable by using proper markdown format: [text](url)
+"""
+    return final_prompt
+# ──────────────────────────────── Brave Search API ────────────────────────
+@st.cache_data(ttl=3600)
+def brave_search(query: str, count: int = 20):
+    """
+    Call the Brave Web Search API → list[dict]
+    Returns fields: index, title, link, snippet, displayed_link
+    """
+    if not BRAVE_KEY:
+        raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
+    headers = {
+        "Accept": "application/json",
+        "Accept-Encoding": "gzip",
+        "X-Subscription-Token": BRAVE_KEY
+    }
+    params = {"q": query, "count": str(count)}
+    for attempt in range(3):
+        try:
+            r = requests.get(BRAVE_ENDPOINT, headers=headers, params=params, timeout=15)
+            r.raise_for_status()
+            data = r.json()
+            logging.info(f"Brave search result data structure: {list(data.keys())}")
+            raw = data.get("web", {}).get("results") or data.get("results", [])
+            if not raw:
+                logging.warning(f"No Brave search results found. Response: {data}")
+                raise ValueError("No search results found.")
+            arts = []
+            for i, res in enumerate(raw[:count], 1):
+                url = res.get("url", res.get("link", ""))
+                host = re.sub(r"https?://(www\.)?", "", url).split("/")[0]
+                arts.append({
+                    "index": i,
+                    "title": res.get("title", "No title"),
+                    "link": url,
+                    "snippet": res.get("description", res.get("text", "No snippet")),
+                    "displayed_link": host
+                })
+            logging.info(f"Brave search success: {len(arts)} results")
+            return arts
+        except Exception as e:
+            logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
+            if attempt < 2:
+                time.sleep(2)
+    return []
+@st.cache_data(ttl=3600)
+def brave_image_search(query: str, count: int = 10):
+    """
+    Call the Brave Image Search API → list[dict]
+    Returns fields: index, title, image_url, source_url
+    """
+    if not BRAVE_KEY:
+        raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
+    headers = {
+        "Accept": "application/json",
+        "Accept-Encoding": "gzip",
+        "X-Subscription-Token": BRAVE_KEY
+    }
+    params = {
+        "q": query,
+        "count": str(count),
+        "search_lang": "en",
+        "country": "us",
+        "spellcheck": "1"
+    }
+    for attempt in range(3):
+        try:
+            r = requests.get(BRAVE_IMAGE_ENDPOINT, headers=headers, params=params, timeout=15)
+            r.raise_for_status()
+            data = r.json()
+            results = []
+            for i, img in enumerate(data.get("results", [])[:count], 1):
+                results.append({
+                    "index": i,
+                    "title": img.get("title", "Image"),
+                    "image_url": img.get("image", {}).get("url", ""),
+                    "source_url": img.get("source", ""),
+                    "width": img.get("image", {}).get("width", 0),
+                    "height": img.get("image", {}).get("height", 0)
+                })
+            logging.info(f"Brave image search success: {len(results)} results")
+            return results
+        except Exception as e:
+            logging.error(f"Brave image search failure (attempt {attempt+1}/3): {e}")
+            if attempt < 2:
+                time.sleep(2)
+    return []
+@st.cache_data(ttl=3600)
+def brave_video_search(query: str, count: int = 5):
+    """
+    Call the Brave Video Search API → list[dict]
+    Returns fields: index, title, video_url, thumbnail_url, source
+    """
+    if not BRAVE_KEY:
+        raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
+    headers = {
+        "Accept": "application/json",
+        "Accept-Encoding": "gzip",
+        "X-Subscription-Token": BRAVE_KEY
+    }
+    params = {
+        "q": query,
+        "count": str(count)
+    }
+    for attempt in range(3):
+        try:
+            r = requests.get(BRAVE_VIDEO_ENDPOINT, headers=headers, params=params, timeout=15)
+            r.raise_for_status()
+            data = r.json()
+            results = []
+            for i, vid in enumerate(data.get("results", [])[:count], 1):
+                results.append({
+                    "index": i,
+                    "title": vid.get("title", "Video"),
+                    "video_url": vid.get("url", ""),
+                    "thumbnail_url": vid.get("thumbnail", {}).get("src", ""),
+                    "source": vid.get("provider", {}).get("name", "Unknown source")
+                })
+            logging.info(f"Brave video search success: {len(results)} results")
+            return results
+        except Exception as e:
+            logging.error(f"Brave video search failure (attempt {attempt+1}/3): {e}")
+            if attempt < 2:
+                time.sleep(2)
+    return []
+@st.cache_data(ttl=3600)
+def brave_news_search(query: str, count: int = 5):
+    """
+    Call the Brave News Search API → list[dict]
+    Returns fields: index, title, url, description, source, date
+    """
+    if not BRAVE_KEY:
+        raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
+    headers = {
+        "Accept": "application/json",
+        "Accept-Encoding": "gzip",
+        "X-Subscription-Token": BRAVE_KEY
+    }
+    params = {
+        "q": query,
+        "count": str(count)
+    }
+    for attempt in range(3):
+        try:
+            r = requests.get(BRAVE_NEWS_ENDPOINT, headers=headers, params=params, timeout=15)
+            r.raise_for_status()
+            data = r.json()
+            results = []
+            for i, news in enumerate(data.get("results", [])[:count], 1):
+                results.append({
+                    "index": i,
+                    "title": news.get("title", "News article"),
+                    "url": news.get("url", ""),
+                    "description": news.get("description", ""),
+                    "source": news.get("source", "Unknown source"),
+                    "date": news.get("age", "Unknown date")
+                })
+            logging.info(f"Brave news search success: {len(results)} results")
+            return results
+        except Exception as e:
+            logging.error(f"Brave news search failure (attempt {attempt+1}/3): {e}")
+            if attempt < 2:
+                time.sleep(2)
+    return []
+def mock_results(query: str) -> str:
+    """Fallback search results if API fails"""
+    ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    return (f"# Fallback Search Content (Generated: {ts})\n\n"
+            f"The search API request failed. Please generate a response based on any pre-existing knowledge about '{query}'.\n\n"
+            f"You may consider the following points:\n\n"
+            f"- Basic concepts and importance of {query}\n"
+            f"- Commonly known related statistics or trends\n"
+            f"- Typical expert opinions on this subject\n"
+            f"- Questions that readers might have\n\n"
+            f"Note: This is fallback guidance, not real-time data.\n\n")
+def do_web_search(query: str) -> str:
+    """Perform web search and format the results."""
+    try:
+        # Web search
+        arts = brave_search(query, 20)
+        if not arts:
+            logging.warning("No search results, using fallback content")
+            return mock_results(query)
+        # Image search
+        images = brave_image_search(query, 5)
+        # Video search
+        videos = brave_video_search(query, 2)
+        # News search
+        news = brave_news_search(query, 3)
+        # Format all results
+        result = "# Web Search Results\nUse these results to provide a comprehensive answer with multiple sources. Include relevant images, videos, and links.\n\n"
+        # Add web results
+        result += "## Web Results\n\n"
+        for a in arts[:10]:  # Limit to top 10 results
+            result += f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
+            result += f"**Source**: [{a['displayed_link']}]({a['link']})\n\n---\n"
+        # Add image results if available
+        if images:
+            result += "## Image Results\n\n"
+            for img in images:
+                if img.get('image_url'):
+                    result += f"![{img['title']}]({img['image_url']})\n\n"
+                    result += f"**Source**: [{img.get('source_url', 'Image source')}]({img.get('source_url', '#')})\n\n"
+        # Add video results if available
+        if videos:
+            result += "## Video Results\n\n"
+            for vid in videos:
+                result += f"### {vid['title']}\n\n"
+                if vid.get('thumbnail_url'):
+                    result += f"![Thumbnail]({vid['thumbnail_url']})\n\n"
+                result += f"**Watch**: [{vid['source']}]({vid['video_url']})\n\n"
+        # Add news results if available
+        if news:
+            result += "## News Results\n\n"
+            for n in news:
+                result += f"### {n['title']}\n\n{n['description']}\n\n"
+                result += f"**Source**: [{n['source']}]({n['url']}) - {n['date']}\n\n---\n"
+        return result
+    except Exception as e:
+        logging.error(f"Web search process failed: {str(e)}")
+        return mock_results(query)
+# ──────────────────────────────── File Upload Handling ─────────────────────
+def process_text_file(file):
+    """Handle text file"""
+    try:
+        content = file.read()
+        file.seek(0)
+        text = content.decode('utf-8', errors='ignore')
+        if len(text) > 10000:
+            text = text[:9700] + "...(truncated)..."
+        result = f"## Text File: {file.name}\n\n"
+        result += text
+        return result
+    except Exception as e:
+        logging.error(f"Error processing text file: {str(e)}")
+        return f"Error processing text file: {str(e)}"
+def process_csv_file(file):
+    """Handle CSV file"""
+    try:
+        content = file.read()
+        file.seek(0)
+        df = pd.read_csv(io.BytesIO(content))
+        result = f"## CSV File: {file.name}\n\n"
+        result += f"- Rows: {len(df)}\n"
+        result += f"- Columns: {len(df.columns)}\n"
+        result += f"- Column Names: {', '.join(df.columns.tolist())}\n\n"
+        result += "### Data Preview\n\n"
+        preview_df = df.head(10)
+        try:
+            markdown_table = preview_df.to_markdown(index=False)
+            if markdown_table:
+                result += markdown_table + "\n\n"
+            else:
+                result += "Unable to display CSV data.\n\n"
+        except Exception as e:
+            logging.error(f"Markdown table conversion error: {e}")
+            result += "Displaying data as text:\n\n"
+            result += str(preview_df) + "\n\n"
+        num_cols = df.select_dtypes(include=['number']).columns
+        if len(num_cols) > 0:
+            result += "### Basic Statistical Information\n\n"
+            try:
+                stats_df = df[num_cols].describe().round(2)
+                stats_markdown = stats_df.to_markdown()
+                if stats_markdown:
+                    result += stats_markdown + "\n\n"
+                else:
+                    result += "Unable to display statistical information.\n\n"
+            except Exception as e:
+                logging.error(f"Statistical info conversion error: {e}")
+                result += "Unable to generate statistical information.\n\n"
+        return result
+    except Exception as e:
+        logging.error(f"CSV file processing error: {str(e)}")
+        return f"Error processing CSV file: {str(e)}"
+def process_pdf_file(file):
+    """Handle PDF file"""
+    try:
+        # Read file in bytes
+        file_bytes = file.read()
+        file.seek(0)
+        # Use PyPDF2
+        pdf_file = io.BytesIO(file_bytes)
+        reader = PyPDF2.PdfReader(pdf_file, strict=False)
+        # Basic info
+        result = f"## PDF File: {file.name}\n\n"
+        result += f"- Total pages: {len(reader.pages)}\n\n"
+        # Extract text by page (limit to first 5 pages)
+        max_pages = min(5, len(reader.pages))
+        all_text = ""
+        for i in range(max_pages):
+            try:
+                page = reader.pages[i]
+                page_text = page.extract_text()
+                current_page_text = f"### Page {i+1}\n\n"
+                if page_text and len(page_text.strip()) > 0:
+                    # Limit to 1500 characters per page
+                    if len(page_text) > 1500:
+                        current_page_text += page_text[:1500] + "...(truncated)...\n\n"
+                    else:
+                        current_page_text += page_text + "\n\n"
+                else:
+                    current_page_text += "(No text could be extracted from this page)\n\n"
+                all_text += current_page_text
+                # If total text is too long, break
+                if len(all_text) > 8000:
+                    all_text += "...(truncating remaining pages; PDF is too large)...\n\n"
+                    break
+            except Exception as page_err:
+                logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
+                all_text += f"### Page {i+1}\n\n(Error extracting content: {str(page_err)})\n\n"
+        if len(reader.pages) > max_pages:
+            all_text += f"\nNote: Only the first {max_pages} pages are shown out of {len(reader.pages)} total.\n\n"
+        result += "### PDF Content\n\n" + all_text
+        return result
+    except Exception as e:
+        logging.error(f"PDF file processing error: {str(e)}")
+        return f"## PDF File: {file.name}\n\nError occurred: {str(e)}\n\nThis PDF file cannot be processed."
+def process_uploaded_files(files):
+    """Combine the contents of all uploaded files into one string."""
+    if not files:
+        return None
+    result = "# Uploaded File Contents\n\n"
+    result += "Below is the content from the files provided by the user. Integrate this data as a main source of information for your response.\n\n"
+    for file in files:
+        try:
+            ext = file.name.split('.')[-1].lower()
+            if ext == 'txt':
+                result += process_text_file(file) + "\n\n---\n\n"
+            elif ext == 'csv':
+                result += process_csv_file(file) + "\n\n---\n\n"
+            elif ext == 'pdf':
+                result += process_pdf_file(file) + "\n\n---\n\n"
+            else:
+                result += f"### Unsupported File: {file.name}\n\n---\n\n"
+        except Exception as e:
+            logging.error(f"File processing error {file.name}: {e}")
+            result += f"### File processing error: {file.name}\n\nError: {e}\n\n---\n\n"
+    return result
+# ──────────────────────────────── Image & Utility ─────────────────────────
+def create_placeholder_image(text, width=600, height=400):
+    """Create a placeholder image with text."""
+    try:
+        # 이미지 생성
+        from PIL import Image, ImageDraw, ImageFont
+        import numpy as np
+        # 랜덤 컬러 생성
+        r = random.randint(100, 240)
+        g = random.randint(100, 240)
+        b = random.randint(100, 240)
+        # 이미지 생성 및 배경색 설정
+        img = Image.new('RGB', (width, height), color=(r, g, b))
+        draw = ImageDraw.Draw(img)
+        # 텍스트 추가 (폰트가 없으면 기본 폰트 사용)
+        try:
+            font = ImageFont.truetype("arial.ttf", 20)
+        except:
+            font = ImageFont.load_default()
+        # 텍스트가 너무 길면 줄바꿈
+        words = text.split()
+        lines = []
+        current_line = []
+        for word in words:
+            current_line.append(word)
+            if len(' '.join(current_line)) > 30:  # 적당한 길이에서 줄바꿈
+                lines.append(' '.join(current_line[:-1]))
+                current_line = [word]
+        if current_line:
+            lines.append(' '.join(current_line))
+        text_to_draw = '\n'.join(lines)
+        # 텍스트 위치 계산 (중앙)
+        textsize = draw.textsize(text_to_draw, font=font)
+        text_x = (width - textsize[0]) / 2
+        text_y = (height - textsize[1]) / 2
+        # 텍스트 그리기
+        draw.text((text_x, text_y), text_to_draw, fill=(255, 255, 255), font=font)
+        # 이미지를 base64로 인코딩
+        buffered = BytesIO()
+        img.save(buffered, format="JPEG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
+        return img_str
+    except Exception as e:
+        logging.error(f"Error creating placeholder image: {e}")
+        return None
+def get_random_fallback_image():
+    """Get a random fallback image from the list."""
+    return random.choice(FALLBACK_IMAGES)
+def extract_image_urls_from_search(image_results, query):
+    """Extract valid image URLs from Brave image search results, with fallbacks."""
+    # 안정적인 대체 이미지로 시작 (최소 3개 보장)
+    valid_urls = [
+        {
+            'url': get_random_fallback_image(),
+            'title': f"Related to: {query} ({i+1})",
+            'source': "https://www.pexels.com/"
+        } for i in range(3)
+    ]
+    # API 결과에서 검증된 이미지 추가
+    if image_results:
+        for img in image_results:
+            url = img.get('image_url')
+            if url and url.startswith('http'):
+                # 이미 추가된 URL 개수가 5개 미만인 경우에만 추가
+                if len(valid_urls) < 5:
+                    valid_urls.append({
+                        'url': url,
+                        'title': img.get('title', f"Related to: {query}"),
+                        'source': img.get('source_url', '')
+                    })
+    return valid_urls
+def extract_video_data_from_search(video_results):
+    """Extract valid video data from Brave video search results."""
+    if not video_results:
+        return []
+    valid_videos = []
+    for vid in video_results:
+        url = vid.get('video_url')
+        if url and url.startswith('http'):
+            valid_videos.append({
+                'url': url,
+                'title': vid.get('title', 'Video'),
+                'thumbnail': vid.get('thumbnail_url', ''),
+                'source': vid.get('source', 'Video source')
+            })
+    return valid_videos
+def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
+    """Image generation function."""
+    if not prompt:
+        return None, "Insufficient prompt"
+    try:
+        res = Client(IMAGE_API_URL).predict(
+            prompt=prompt, width=w, height=h, guidance=g,
+            inference_steps=steps, seed=seed,
+            do_img2img=False, init_image=None,
+            image2image_strength=0.8, resize_img=True,
+            api_name="/generate_image"
+        )
+        return res[0], f"Seed: {res[1]}"
+    except Exception as e:
+        logging.error(e)
+        return None, str(e)
+def extract_image_prompt(response_text: str, topic: str):
+    """
+    Generate a single-line English image prompt from the response content.
+    """
+    client = get_openai_client()
+    try:
+        response = client.chat.completions.create(
+            model="gpt-4.1-mini",
+            messages=[
+                {"role": "system", "content": "Generate a single-line English image prompt from the following text. Return only the prompt text, nothing else."},
+                {"role": "user", "content": f"Topic: {topic}\n\n---\n{response_text}\n\n---"}
+            ],
+            temperature=1,
+            max_tokens=80,
+            top_p=1
+        )
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        logging.error(f"OpenAI image prompt generation error: {e}")
+        return f"A professional photo related to {topic}, high quality"
+def md_to_html(md: str, title="Perplexity-like Response"):
+    """Convert Markdown to HTML."""
+    return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
+def keywords(text: str, top=5):
+    """Simple keyword extraction."""
+    cleaned = re.sub(r"[^가-힣a-zA-Z0-9\s]", "", text)
+    return " ".join(cleaned.split()[:top])
+# ──────────────────────────────── Streamlit UI ────────────────────────────
+def perplexity_app():
+    st.title("Perplexity-like AI Assistant")
+    # Set default session state
+    if "ai_model" not in st.session_state:
+        st.session_state.ai_model = "gpt-4.1-mini"  # 고정 모델 설정
+    if "messages" not in st.session_state:
+        st.session_state.messages = []
+    if "auto_save" not in st.session_state:
+        st.session_state.auto_save = True
+    if "generate_image" not in st.session_state:
+        st.session_state.generate_image = False
+    if "web_search_enabled" not in st.session_state:
+        st.session_state.web_search_enabled = True
+    if "search_mode" not in st.session_state:
+        st.session_state.search_mode = "comprehensive"
+    if "response_style" not in st.session_state:
+        st.session_state.response_style = "professional"
+    # Sidebar UI
+    sb = st.sidebar
+    sb.title("Search Settings")
+    sb.subheader("Response Configuration")
+    sb.selectbox(
+        "Search Mode",
+        options=list(SEARCH_MODES.keys()),
+        format_func=lambda x: SEARCH_MODES[x],
+        key="search_mode"
+    )
+    sb.selectbox(
+        "Response Style",
+        options=list(RESPONSE_STYLES.keys()),
+        format_func=lambda x: RESPONSE_STYLES[x],
+        key="response_style"
+    )
+    # Example queries
+    sb.subheader("Example Queries")
+    c1, c2, c3 = sb.columns(3)
+    if c1.button("Quantum Computing", key="ex1"):
+        process_example(EXAMPLE_QUERIES["example1"])
+    if c2.button("Climate Change", key="ex2"):
+        process_example(EXAMPLE_QUERIES["example2"])
+    if c3.button("AI Economics", key="ex3"):
+        process_example(EXAMPLE_QUERIES["example3"])
+    sb.subheader("Other Settings")
+    sb.toggle("Auto Save", key="auto_save")
+    sb.toggle("Auto Image Generation", key="generate_image")
+    web_search_enabled = sb.toggle("Use Web Search", value=st.session_state.web_search_enabled)
+    st.session_state.web_search_enabled = web_search_enabled
+    if web_search_enabled:
+        st.sidebar.info("✅ Web search results will be integrated into the response.")
+    # Download the latest response
+    latest_response = next(
+        (m["content"] for m in reversed(st.session_state.messages)
+         if m["role"] == "assistant" and m["content"].strip()),
+        None
+    )
+    if latest_response:
+        # Extract a title from the response - first heading or first line
+        title_match = re.search(r"# (.*?)(\n|$)", latest_response)
+        if title_match:
+            title = title_match.group(1).strip()
+        else:
+            first_line = latest_response.split('\n', 1)[0].strip()
+            title = first_line[:40] + "..." if len(first_line) > 40 else first_line
+        sb.subheader("Download Latest Response")
+        d1, d2 = sb.columns(2)
+        d1.download_button("Download as Markdown", latest_response,
+                           file_name=f"{title}.md", mime="text/markdown")
+        d2.download_button("Download as HTML", md_to_html(latest_response, title),
+                           file_name=f"{title}.html", mime="text/html")
+    # JSON conversation record upload
+    up = sb.file_uploader("Load Conversation History (.json)", type=["json"], key="json_uploader")
+    if up:
+        try:
+            st.session_state.messages = json.load(up)
+            sb.success("Conversation history loaded successfully")
+        except Exception as e:
+            sb.error(f"Failed to load: {e}")
+    # JSON conversation record download
+    if sb.button("Download Conversation as JSON"):
+        sb.download_button(
+            "Save",
+            data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
+            file_name="conversation_history.json",
+            mime="application/json"
+        )
+    # File Upload
+    st.subheader("Upload Files")
+    uploaded_files = st.file_uploader(
+        "Upload files to be used as reference (txt, csv, pdf)",
+        type=["txt", "csv", "pdf"],
+        accept_multiple_files=True,
+        key="file_uploader"
+    )
+    if uploaded_files:
+        file_count = len(uploaded_files)
+        st.success(f"{file_count} files uploaded. They will be used as sources for your query.")
+        with st.expander("Preview Uploaded Files", expanded=False):
+            for idx, file in enumerate(uploaded_files):
+                st.write(f"**File Name:** {file.name}")
+                ext = file.name.split('.')[-1].lower()
+                if ext == 'txt':
+                    preview = file.read(1000).decode('utf-8', errors='ignore')
+                    file.seek(0)
+                    st.text_area(
+                        f"Preview of {file.name}",
+                        preview + ("..." if len(preview) >= 1000 else ""),
+                        height=150
+                    )
+                elif ext == 'csv':
+                    try:
+                        df = pd.read_csv(file)
+                        file.seek(0)
+                        st.write("CSV Preview (up to 5 rows)")
+                        st.dataframe(df.head(5))
+                    except Exception as e:
+                        st.error(f"CSV preview failed: {e}")
+                elif ext == 'pdf':
+                    try:
+                        file_bytes = file.read()
+                        file.seek(0)
+                        pdf_file = io.BytesIO(file_bytes)
+                        reader = PyPDF2.PdfReader(pdf_file, strict=False)
+                        pc = len(reader.pages)
+                        st.write(f"PDF File: {pc} pages")
+                        if pc > 0:
+                            try:
+                                page_text = reader.pages[0].extract_text()
+                                preview = page_text[:500] if page_text else "(No text extracted)"
+                                st.text_area("Preview of the first page", preview + "...", height=150)
+                            except:
+                                st.warning("Failed to extract text from the first page")
+                    except Exception as e:
+                        st.error(f"PDF preview failed: {e}")
+                if idx < file_count - 1:
+                    st.divider()
+    # Display existing messages
+    for m in st.session_state.messages:
+        with st.chat_message(m["role"]):
+            # Process markdown to allow clickable links and properly rendered content
+            st.markdown(m["content"], unsafe_allow_html=True)
+            # Display images if present
+            if "images" in m and m["images"]:
+                st.subheader("Related Images")
+                cols = st.columns(min(3, len(m["images"])))
+                for i, img_data in enumerate(m["images"]):
+                    col_idx = i % len(cols)
+                    with cols[col_idx]:
+                        try:
+                            img_url = img_data.get('url', '')
+                            caption = img_data.get('title', 'Related image')
+                            if img_url:
+                                st.image(img_url, caption=caption, use_column_width=True)
+                                if img_data.get('source'):
+                                    st.markdown(f"[Source]({img_data['source']})")
+                        except Exception as img_err:
+                            st.warning(f"Could not display image: {img_err}")
+            # Display videos if present
+            if "videos" in m and m["videos"]:
+                st.subheader("Related Videos")
+                for video in m["videos"]:
+                    video_title = video.get('title', 'Related video')
+                    video_url = video.get('url', '')
+                    thumbnail = video.get('thumbnail', '')
+                    # Display video information with thumbnail if available
+                    if thumbnail:
+                        col1, col2 = st.columns([1, 3])
+                        with col1:
+                            try:
+                                st.image(thumbnail, width=120)
+                            except:
+                                st.write("🎬")
+                        with col2:
+                            st.markdown(f"**[{video_title}]({video_url})**")
+                            st.write(f"Source: {video.get('source', 'Unknown')}")
+                    else:
+                        st.markdown(f"🎬 **[{video_title}]({video_url})**")
+                        st.write(f"Source: {video.get('source', 'Unknown')}")
+    # User input
+    query = st.chat_input("Enter your query or question here.")
+    if query:
+        process_input(query, uploaded_files)
+    # 사이드바 하단 배지(링크) 추가
+    sb.markdown("---")
+    sb.markdown("Created by [https://ginigen.com](https://ginigen.com) | [YouTube Channel](https://www.youtube.com/@ginipickaistudio)")
+def process_example(topic):
+    """Process the selected example query."""
+    process_input(topic, [])
+def process_input(query: str, uploaded_files):
+    # Add user's message
+    if not any(m["role"] == "user" and m["content"] == query for m in st.session_state.messages):
+        st.session_state.messages.append({"role": "user", "content": query})
+    with st.chat_message("user"):
+        st.markdown(query)
+    with st.chat_message("assistant"):
+        placeholder = st.empty()
+        message_placeholder = st.empty()
+        full_response = ""
+        use_web_search = st.session_state.web_search_enabled
+        has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
+        try:
+            # 상태 표시를 위한 상태 컴포넌트
+            status = st.status("Preparing to answer your query...")
+            status.update(label="Initializing client...")
+            client = get_openai_client()
+            # Web search
+            search_content = None
+            image_results = []
+            video_results = []
+            news_results = []
+            if use_web_search:
+                status.update(label="Performing web search...")
+                with st.spinner("Searching the web..."):
+                    search_content = do_web_search(keywords(query, top=5))
+                # Perform specific searches for media
+                try:
+                    status.update(label="Finding images and videos...")
+                    image_results = brave_image_search(query, 5)
+                    video_results = brave_video_search(query, 2)
+                    news_results = brave_news_search(query, 3)
+                except Exception as search_err:
+                    logging.error(f"Media search error: {search_err}")
+            # Process uploaded files → content
+            file_content = None
+            if has_uploaded_files:
+                status.update(label="Processing uploaded files...")
+                with st.spinner("Analyzing files..."):
+                    file_content = process_uploaded_files(uploaded_files)
+            # Extract usable image and video data with fallbacks
+            valid_images = extract_image_urls_from_search(image_results, query)
+            valid_videos = extract_video_data_from_search(video_results)
+            # Build system prompt
+            status.update(label="Preparing comprehensive answer...")
+            sys_prompt = get_system_prompt(
+                mode=st.session_state.search_mode,
+                style=st.session_state.response_style,
+                include_search_results=use_web_search,
+                include_uploaded_files=has_uploaded_files
+            )
+            # OpenAI API 호출 준비
+            status.update(label="Generating response...")
+            # 메시지 구성
+            api_messages = [
+                {"role": "system", "content": sys_prompt}
+            ]
+            user_content = query
+            # 검색 결과가 있으면 사용자 프롬프트에 추가
+            if search_content:
+                user_content += "\n\n" + search_content
+            # 파일 내용이 있으면 사용자 프롬프트에 추가
+            if file_content:
+                user_content += "\n\n" + file_content
+            # Include specific image information
+            if valid_images:
+                user_content += "\n\n# Available Images\n"
+                for i, img in enumerate(valid_images[:5]):
+                    user_content += f"\n{i+1}. ![{img['title']}]({img['url']})\n"
+                    if img['source']:
+                        user_content += f"   Source: {img['source']}\n"
+            # Include specific video information
+            if valid_videos:
+                user_content += "\n\n# Available Videos\n"
+                for i, vid in enumerate(valid_videos[:2]):
+                    user_content += f"\n{i+1}. **{vid['title']}** - [{vid['source']}]({vid['url']})\n"
+            # 사용자 메시지 추가
+            api_messages.append({"role": "user", "content": user_content})
+            # OpenAI API 스트리밍 호출 - 고정 모델 "gpt-4.1-mini" 사용
+            try:
+                # 스트리밍 방식으로 API 호출
+                stream = client.chat.completions.create(
+                    model="gpt-4.1-mini",  # 고정 모델 사용
+                    messages=api_messages,
+                    temperature=1,
+                    max_tokens=MAX_TOKENS,
+                    top_p=1,
+                    stream=True  # 스트리밍 활��화
+                )
+                # 스트리밍 응답 처리
+                for chunk in stream:
+                    if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None:
+                        content_delta = chunk.choices[0].delta.content
+                        full_response += content_delta
+                        message_placeholder.markdown(full_response + "▌", unsafe_allow_html=True)
+                # 최종 응답 표시 (커서 제거)
+                message_placeholder.markdown(full_response, unsafe_allow_html=True)
+                # Display related images if available
+                if valid_images:
+                    st.subheader("Related Images")
+                    image_cols = st.columns(min(3, len(valid_images)))
+                    for i, img_data in enumerate(valid_images):
+                        col_idx = i % len(image_cols)
+                        try:
+                            with image_cols[col_idx]:
+                                # 이미지 URL 체크
+                                img_url = img_data['url']
+                                caption = img_data['title']
+                                try:
+                                    # 이미지 표시 시도
+                                    st.image(img_url, caption=caption, use_column_width=True)
+                                    if img_data.get('source'):
+                                        st.markdown(f"[Source]({img_data['source']})")
+                                except Exception as img_err:
+                                    # 실패 시 대체 이미지 (Pexels 안정적인 이미지)
+                                    st.image(get_random_fallback_image(),
+                                             caption=f"{caption} (Fallback image)",
+                                             use_column_width=True)
+                                    st.markdown("[Source: Pexels](https://www.pexels.com/)")
+                                    logging.warning(f"Using fallback image: {img_err}")
+                        except Exception as col_err:
+                            logging.error(f"Error displaying image in column: {col_err}")
+                            continue
+                # Display related videos if available
+                if valid_videos:
+                    st.subheader("Related Videos")
+                    for video in valid_videos:
+                        video_title = video.get('title', 'Related video')
+                        video_url = video.get('url', '')
+                        thumbnail = video.get('thumbnail', '')
+                        # Display video information with thumbnail if available
+                        if thumbnail:
+                            try:
+                                col1, col2 = st.columns([1, 3])
+                                with col1:
+                                    try:
+                                        st.image(thumbnail, width=120)
+                                    except:
+                                        st.write("🎬")
+                                with col2:
+                                    st.markdown(f"**[{video_title}]({video_url})**")
+                                    st.write(f"Source: {video.get('source', 'Unknown')}")
+                            except Exception as vid_err:
+                                # 오류시 기본 형식으로 표시
+                                st.markdown(f"🎬 **[{video_title}]({video_url})**")
+                                st.write(f"Source: {video.get('source', 'Unknown')}")
+                        else:
+                            st.markdown(f"🎬 **[{video_title}]({video_url})**")
+                            st.write(f"Source: {video.get('source', 'Unknown')}")
+                status.update(label="Response completed!", state="complete")
+                # Save the response with images and videos in the session state
+                st.session_state.messages.append({
+                    "role": "assistant",
+                    "content": full_response,
+                    "images": valid_images,
+                    "videos": valid_videos
+                })
+            except Exception as api_error:
+                error_message = str(api_error)
+                logging.error(f"API error: {error_message}")
+                status.update(label=f"Error: {error_message}", state="error")
+                raise Exception(f"Response generation error: {error_message}")
+            # Additional image generation if enabled
+            if st.session_state.generate_image and full_response:
+                with st.spinner("Generating custom image..."):
+                    try:
+                        ip = extract_image_prompt(full_response, query)
+                        img, cap = generate_image(ip)
+                        if img:
+                            st.subheader("AI-Generated Image")
+                            st.image(img, caption=cap)
+                    except Exception as img_error:
+                        logging.error(f"Image generation error: {str(img_error)}")
+                        st.warning("Custom image generation failed. Using web images only.")
+            # Download buttons
+            if full_response:
+                st.subheader("Download This Response")
+                c1, c2 = st.columns(2)
+                c1.download_button(
+                    "Markdown",
+                    data=full_response,
+                    file_name=f"{query[:30]}.md",
+                    mime="text/markdown"
+                )
+                c2.download_button(
+                    "HTML",
+                    data=md_to_html(full_response, query[:30]),
+                    file_name=f"{query[:30]}.html",
+                    mime="text/html"
+                )
+            # Auto save
+            if st.session_state.auto_save and st.session_state.messages:
+                try:
+                    fn = f"conversation_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json"
+                    with open(fn, "w", encoding="utf-8") as fp:
+                        json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
+                except Exception as e:
+                    logging.error(f"Auto-save failed: {e}")
+        except Exception as e:
+            error_message = str(e)
+            placeholder.error(f"An error occurred: {error_message}")
+            logging.error(f"Process input error: {error_message}")
+            ans = f"An error occurred while processing your request: {error_message}"
+            st.session_state.messages.append({"role": "assistant", "content": ans})
+# ──────────────────────────────── main ────────────────────────────────────
+def main():
+    perplexity_app()
+if __name__ == "__main__":
+    main()