Spaces:
Sleeping
Sleeping
Alan Liu
commited on
Commit
·
989cd20
1
Parent(s):
3698d0a
fix bug
Browse files
app.py
CHANGED
|
@@ -132,7 +132,7 @@ with col2:
|
|
| 132 |
with col3: # Prefilling
|
| 133 |
prefilling_operation_count = prefilling_operation(model_config, inference_config)
|
| 134 |
inference_info['inference_prefilling_time'] = prefilling_operation_count['total'] / (gpu_config['TFLOP']*10**12)
|
| 135 |
-
inference_info['inference_prefilling_throughput'] = inference_config['input_seq_length']/inference_info['inference_prefilling_time']
|
| 136 |
cached_parameter_count['kv_cache'] = 2 * (inference_config['batchsize'] * (model_config['hidden_size'] * model_config['num_hidden_layers'] * inference_config['input_seq_length']))
|
| 137 |
|
| 138 |
operation_items = {key: "{:,}".format(int(prefilling_operation_count[key])) for key in prefilling_operation_count if key not in subtotal_operations}
|
|
@@ -157,8 +157,8 @@ with col3: # Prefilling
|
|
| 157 |
with col4: # Prefilling
|
| 158 |
generation_operation_count = generation_operation(model_config, inference_config)
|
| 159 |
inference_info['inference_generation_time'] = generation_operation_count['total'] / (gpu_config['TFLOP']*10**12)
|
| 160 |
-
inference_info['inference_generation_throughput'] = inference_config['output_seq_length']/inference_info['inference_generation_time']
|
| 161 |
-
inference_info['inference_client_generation_throughput'] = inference_config['output_seq_length'] / (inference_info['inference_prefilling_time'] + inference_info['inference_generation_time'])
|
| 162 |
cached_parameter_count['kv_cache'] = 2 * (inference_config['batchsize'] * (model_config['hidden_size'] * model_config['num_hidden_layers'] * (inference_config['input_seq_length']+inference_config['output_seq_length'])))
|
| 163 |
|
| 164 |
operation_items = {key: "{:,}".format(int(generation_operation_count[key])) for key in generation_operation_count if key not in subtotal_operations}
|
|
|
|
| 132 |
with col3: # Prefilling
|
| 133 |
prefilling_operation_count = prefilling_operation(model_config, inference_config)
|
| 134 |
inference_info['inference_prefilling_time'] = prefilling_operation_count['total'] / (gpu_config['TFLOP']*10**12)
|
| 135 |
+
inference_info['inference_prefilling_throughput'] = inference_config['input_seq_length']*inference_config['batchsize']/inference_info['inference_prefilling_time']
|
| 136 |
cached_parameter_count['kv_cache'] = 2 * (inference_config['batchsize'] * (model_config['hidden_size'] * model_config['num_hidden_layers'] * inference_config['input_seq_length']))
|
| 137 |
|
| 138 |
operation_items = {key: "{:,}".format(int(prefilling_operation_count[key])) for key in prefilling_operation_count if key not in subtotal_operations}
|
|
|
|
| 157 |
with col4: # Prefilling
|
| 158 |
generation_operation_count = generation_operation(model_config, inference_config)
|
| 159 |
inference_info['inference_generation_time'] = generation_operation_count['total'] / (gpu_config['TFLOP']*10**12)
|
| 160 |
+
inference_info['inference_generation_throughput'] = inference_config['output_seq_length']*inference_config['batchsize']/inference_info['inference_generation_time']
|
| 161 |
+
inference_info['inference_client_generation_throughput'] = inference_config['output_seq_length']*inference_config['batchsize'] / (inference_info['inference_prefilling_time'] + inference_info['inference_generation_time'])
|
| 162 |
cached_parameter_count['kv_cache'] = 2 * (inference_config['batchsize'] * (model_config['hidden_size'] * model_config['num_hidden_layers'] * (inference_config['input_seq_length']+inference_config['output_seq_length'])))
|
| 163 |
|
| 164 |
operation_items = {key: "{:,}".format(int(generation_operation_count[key])) for key in generation_operation_count if key not in subtotal_operations}
|