Update README.md
Browse files
README.md
CHANGED
|
@@ -305,7 +305,7 @@ python benchmarks/benchmark_latency.py --input-len 256 --output-len 256 --model
|
|
| 305 |
|
| 306 |
### float8dq
|
| 307 |
```Shell
|
| 308 |
-
python benchmarks/benchmark_latency.py --input-len 256 --output-len 256 --model pytorch/Phi-4-mini-instruct-float8dq --batch-size 1
|
| 309 |
```
|
| 310 |
|
| 311 |
## benchmark_serving
|
|
@@ -333,7 +333,7 @@ vllm serve pytorch/Phi-4-mini-instruct-float8dq --tokenizer microsoft/Phi-4-mini
|
|
| 333 |
|
| 334 |
Client:
|
| 335 |
```Shell
|
| 336 |
-
python benchmarks/benchmark_serving.py --backend vllm --dataset-name sharegpt --tokenizer microsoft/Phi-4-mini-instruct --dataset-path ./ShareGPT_V3_unfiltered_cleaned_split.json --model jerryzh168/phi4-mini-float8dq --num-prompts 1
|
| 337 |
```
|
| 338 |
|
| 339 |
# Disclaimer
|
|
|
|
| 305 |
|
| 306 |
### float8dq
|
| 307 |
```Shell
|
| 308 |
+
VLLM_DISABLE_COMPILE_CACHE=1 python benchmarks/benchmark_latency.py --input-len 256 --output-len 256 --model pytorch/Phi-4-mini-instruct-float8dq --batch-size 1
|
| 309 |
```
|
| 310 |
|
| 311 |
## benchmark_serving
|
|
|
|
| 333 |
|
| 334 |
Client:
|
| 335 |
```Shell
|
| 336 |
+
VLLM_DISABLE_COMPILE_CACHE=1 python benchmarks/benchmark_serving.py --backend vllm --dataset-name sharegpt --tokenizer microsoft/Phi-4-mini-instruct --dataset-path ./ShareGPT_V3_unfiltered_cleaned_split.json --model jerryzh168/phi4-mini-float8dq --num-prompts 1
|
| 337 |
```
|
| 338 |
|
| 339 |
# Disclaimer
|