GradLLM / app.py
johnbridges's picture
trying a openai over rabbitmq test
15d27ef
raw
history blame
1.92 kB
# app.py
import asyncio
import logging
import gradio as gr
from config import settings
from openai_server import ChatCompletionsServer, ImagesServer
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
log = logging.getLogger("app")
try:
import spaces
@spaces.GPU(duration=60)
def gpu_entrypoint() -> str:
return "gpu: ready"
except Exception:
def gpu_entrypoint() -> str:
return "gpu: not available (CPU only)"
chat_srv = ChatCompletionsServer(settings.AMQP_URL, exchange_name="oa.chat.create", routing_key="default")
img_srv = ImagesServer(settings.AMQP_URL, exchange_name="oa.images.generate", routing_key="default")
async def _startup_init():
try:
await asyncio.gather(chat_srv.start(), img_srv.start())
return "OpenAI MQ servers: ready"
except Exception as e:
log.exception("Startup init failed")
return f"ERROR: {e}"
async def ping() -> str:
return "ok"
with gr.Blocks(title="OpenAI over RabbitMQ", theme=gr.themes.Soft()) as demo:
gr.Markdown("## OpenAI-compatible server over RabbitMQ")
with gr.Tabs():
with gr.Tab("Service"):
with gr.Row():
btn = gr.Button("Ping")
out = gr.Textbox(label="Ping result")
btn.click(ping, inputs=None, outputs=out)
init_status = gr.Textbox(label="Startup status", interactive=False)
demo.load(fn=_startup_init, inputs=None, outputs=init_status)
with gr.Tab("@spaces.GPU Probe"):
with gr.Row():
gpu_btn = gr.Button("GPU Ready Probe", variant="primary")
gpu_out = gr.Textbox(label="GPU Probe Result", interactive=False)
gpu_btn.click(gpu_entrypoint, inputs=None, outputs=gpu_out)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True, debug=True)