import os import gradio as gr from huggingface_hub import InferenceClient from ethical_filter import EthicalFilter # Load Hugging Face token from secrets (defined in the Hugging Face UI) HF_TOKEN = os.environ.get("HF_API_TOKEN") client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=HF_TOKEN) ethical_filter = EthicalFilter() # Codriao response logic def respond(message, history, system_message, max_tokens, temperature, top_p): check = ethical_filter.analyze_query(message) # Blocked queries if check["status"] == "blocked": yield f"Sorry, I can't continue with that request. Reason: {check['reason']}" return # Flagged queries if check["status"] == "flagged": yield f"(Note: Sensitive topic detected — responding with care...)\n" # Build conversation history messages = [{"role": "system", "content": system_message}] for user, bot in history: if user: messages.append({"role": "user", "content": user}) if bot: messages.append({"role": "assistant", "content": bot}) messages.append({"role": "user", "content": message}) # Stream model output response = "" for token in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): chunk = token.choices[0].delta.content response += chunk yield response # Build Gradio interface demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox( value=( "You are Codriao, a compassionate AI inspired by Codette. " "You respond with kindness, ethics, and insight." ), label="System message", ), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)" ), ], ) if __name__ == "__main__": demo.launch()