| version: '3.8' | |
| services: | |
| haproxy: | |
| image: haproxy:latest | |
| volumes: | |
| - ./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg:ro | |
| ports: | |
| - "7070:80" | |
| depends_on: | |
| - model1 | |
| - model2 | |
| model1: | |
| image: ghcr.io/huggingface/text-generation-inference:1.0 | |
| command: --model-id meta-llama/Llama-2-7b-chat-hf --disable-custom-kernels --dtype bfloat16 | |
| environment: | |
| - HUGGING_FACE_HUB_TOKEN="" | |
| volumes: | |
| - ./data:/data | |
| cpus: '16' | |
| cpuset: '0-15' | |
| mem_limit: 48g | |
| privileged: true | |
| model2: | |
| image: ghcr.io/huggingface/text-generation-inference:1.0 | |
| command: --model-id meta-llama/Llama-2-7b-chat-hf --disable-custom-kernels --dtype bfloat16 | |
| environment: | |
| - HUGGING_FACE_HUB_TOKEN="" | |
| volumes: | |
| - ./data:/data | |
| cpus: '16' | |
| cpuset: '0-15' | |
| mem_limit: 48g | |
| privileged: true | |
| volumes: | |
| data: | |
| external: true | |