|
|
|
|
|
from dash import Dash, html, dcc, Input, Output |
|
|
import pandas as pd |
|
|
import plotly.express as px |
|
|
from graphs.model_market_share import create_plotly_stacked_area_chart, create_plotly_world_map, create_plotly_range_slider, create_leaderboard |
|
|
from graphs.model_characteristics import create_plotly_language_concentration_chart, create_plotly_publication_curves_with_legend |
|
|
|
|
|
|
|
|
app = Dash() |
|
|
server = app.server |
|
|
|
|
|
|
|
|
model_topk_df = pd.read_pickle("data_frames/model_topk_df.pkl") |
|
|
model_gini_df = pd.read_pickle("data_frames/model_gini_df.pkl") |
|
|
model_hhi_df = pd.read_pickle("data_frames/model_hhi_df.pkl") |
|
|
language_concentration_df = pd.read_pickle("data_frames/language_concentration_df.pkl") |
|
|
license_concentration_df = pd.read_pickle("data_frames/download_license_cumsum_df.pkl") |
|
|
download_method_cumsum_df = pd.read_pickle("data_frames/download_method_cumsum_df.pkl") |
|
|
download_arch_cumsum_df = pd.read_pickle("data_frames/download_arch_cumsum_df.pkl") |
|
|
nat_topk_df = pd.read_pickle("data_frames/nat_topk_df.pkl") |
|
|
country_concentration_df = pd.read_pickle("data_frames/country_concentration_df.pkl") |
|
|
author_concentration_df = pd.read_pickle("data_frames/author_concentration_df.pkl") |
|
|
model_concentration_df = pd.read_pickle("data_frames/model_concentration_df.pkl") |
|
|
|
|
|
|
|
|
TEMP_MODEL_EVENTS = { |
|
|
|
|
|
"Llama 3": "2024-04-17", |
|
|
"Stable Cascade": "2024-02-02", |
|
|
"Stable Diffusion 3": "2024-05-30", |
|
|
|
|
|
"DeepSeek-R1": "2025-01-20", |
|
|
"Gemma-3 12B QAT": "2025-04-15", |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"DALLE2-PyTorch": "2022-06-25", |
|
|
"Stable Diffusion": "2022-08-10", |
|
|
"CLIP ViT": "2021-01-05", |
|
|
"YOLOv8": "2023-04-26", |
|
|
"Sentence Transformer MiniLM v2": "2021-08-30", |
|
|
} |
|
|
|
|
|
PALETTE_0 = [ |
|
|
"#335C67", |
|
|
"#FFF3B0", |
|
|
"#E09F3E", |
|
|
"#9E2A2B", |
|
|
"#540B0E" |
|
|
] |
|
|
|
|
|
fig = create_plotly_stacked_area_chart( |
|
|
model_topk_df, model_gini_df, model_hhi_df, TEMP_MODEL_EVENTS, PALETTE_0 |
|
|
) |
|
|
|
|
|
LANG_SEGMENT_ORDER = [ |
|
|
'Monolingual: EN', 'Monolingual: HR', 'Monolingual: M/LR', |
|
|
'Multilingual: HR', 'Multilingual', 'Unknown', |
|
|
] |
|
|
fig2 = create_plotly_language_concentration_chart( |
|
|
language_concentration_df, 'time', 'metric', 'value', LANG_SEGMENT_ORDER, PALETTE_0 |
|
|
) |
|
|
|
|
|
LICENSE_SEGMENT_ORDER = [ |
|
|
"Open Use", "Open Use (Acceptable Use Policy)", "Open Use (Non-Commercial Only)", "Attribution", |
|
|
"Acceptable Use Policy", "Non-Commercial Only", "Undocumented", "Undocumented (Acceptable Use Policy)", |
|
|
] |
|
|
fig3 = create_plotly_language_concentration_chart( |
|
|
license_concentration_df, 'period', 'status', 'percent', LICENSE_SEGMENT_ORDER, PALETTE_0 |
|
|
) |
|
|
|
|
|
METHOD_PLOT_CHOICES = { |
|
|
"cumulative": "none", |
|
|
"y_col": "percent", |
|
|
"y_log": False, |
|
|
"period": "W", |
|
|
} |
|
|
fig4 = create_plotly_publication_curves_with_legend( |
|
|
download_method_cumsum_df, METHOD_PLOT_CHOICES, PALETTE_0 |
|
|
) |
|
|
|
|
|
ARCHITECTURE_PLOT_CHOICES = { |
|
|
"cumulative": "none", |
|
|
"y_col": "percent", |
|
|
"y_log": False, |
|
|
"period": "W", |
|
|
} |
|
|
fig5 = create_plotly_publication_curves_with_legend( |
|
|
download_arch_cumsum_df, ARCHITECTURE_PLOT_CHOICES, PALETTE_0 |
|
|
) |
|
|
|
|
|
fig6 = create_plotly_world_map( |
|
|
country_concentration_df, "time", "metric", "value" |
|
|
) |
|
|
|
|
|
fig7 = create_leaderboard( |
|
|
country_concentration_df, author_concentration_df, model_concentration_df |
|
|
) |
|
|
|
|
|
slider = create_plotly_range_slider( |
|
|
model_topk_df |
|
|
) |
|
|
|
|
|
slider2 = create_plotly_range_slider( |
|
|
country_concentration_df |
|
|
) |
|
|
|
|
|
|
|
|
fig.update_layout(font_family="Inter") |
|
|
fig2.update_layout(font_family="Inter") |
|
|
fig3.update_layout(font_family="Inter") |
|
|
fig4.update_layout(font_family="Inter") |
|
|
fig5.update_layout(font_family="Inter") |
|
|
fig6.update_layout(font_family="Inter") |
|
|
slider.update_layout(font_family="Inter") |
|
|
slider2.update_layout(font_family="Inter") |
|
|
|
|
|
|
|
|
app.layout = html.Div( |
|
|
[ |
|
|
html.Div( |
|
|
[ |
|
|
html.Div(children='Visualizing the Open Model Ecosystem', style={'fontSize': 28, 'fontWeight': 'bold', 'marginBottom': 6}), |
|
|
html.Div(children='An interactive dashboard to explore trends in open models on Hugging Face', style={'fontSize': 16, 'marginBottom': 12}), |
|
|
html.Hr(style={'marginTop': 8, 'marginBottom': 8}), |
|
|
], |
|
|
style={'textAlign': 'center'} |
|
|
), |
|
|
html.Div( |
|
|
[ |
|
|
dcc.Tabs([ |
|
|
dcc.Tab(label='Model Market Share', children=[ |
|
|
html.Div([ |
|
|
html.Div(children='Select time range to update all graphs below:', style={'fontSize': 16, 'marginBottom': 6, 'marginTop': 10}), |
|
|
dcc.Graph(figure=slider2, id='time-slider', style={'height': '100px'}), |
|
|
html.Div( |
|
|
id='output-container-range-slider', |
|
|
style={ |
|
|
'textAlign': 'center', |
|
|
'fontSize': 20, |
|
|
'marginBottom': 15, |
|
|
'marginTop': 30, |
|
|
'backgroundColor': 'white', |
|
|
'borderRadius': '12px', |
|
|
'boxShadow': '0 2px 12px rgba(0,0,0,0.10)', |
|
|
'padding': '18px', |
|
|
'display': 'inline-block', |
|
|
} |
|
|
), |
|
|
], style={'marginBottom': 12, 'justifyContent': 'center', 'textAlign': 'center'}), |
|
|
html.Div([ |
|
|
dcc.Graph(id='stacked-area-chart'), |
|
|
], style={'marginBottom': 12}), |
|
|
html.Div([ |
|
|
html.Div( |
|
|
dcc.Graph(id='world-map-with-slider'), |
|
|
style={'display': 'flex', 'justifyContent': 'center'} |
|
|
), |
|
|
dcc.Graph(id='leaderboard'), |
|
|
], style={'marginBottom': 12}) |
|
|
]), |
|
|
dcc.Tab(label='Model Characteristics', children=[ |
|
|
dcc.Graph(id='language-concentration-chart'), |
|
|
html.Div([ |
|
|
dcc.Dropdown(['Language Concentration', 'Architecture', 'License', 'Method'], 'Language Concentration', id='dropdown'), |
|
|
], style={'marginTop': 6}), |
|
|
]), |
|
|
]) |
|
|
], |
|
|
style={ |
|
|
'backgroundColor': 'white', |
|
|
'borderRadius': '18px', |
|
|
'boxShadow': '0 4px 24px rgba(0,0,0,0.10)', |
|
|
'padding': '32px', |
|
|
'margin': '32px auto', |
|
|
'maxWidth': '1250px', |
|
|
} |
|
|
) |
|
|
], |
|
|
style={'fontFamily': 'Inter', 'backgroundColor': '#f7f7fa', 'minHeight': '100vh'} |
|
|
) |
|
|
|
|
|
@app.callback( |
|
|
Output('output-container-range-slider', 'children'), |
|
|
[Input('time-slider', 'relayoutData')] |
|
|
) |
|
|
def update_output(relayout_data): |
|
|
if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data: |
|
|
start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d') |
|
|
end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d') |
|
|
return f'Selected time range: {start_time} to {end_time}' |
|
|
else: |
|
|
return 'Selected time range: All data' |
|
|
|
|
|
|
|
|
@app.callback( |
|
|
Output('language-concentration-chart', 'figure'), |
|
|
[Input('dropdown', 'value')] |
|
|
) |
|
|
def update_graph(selected_metric): |
|
|
if selected_metric == 'Language Concentration': |
|
|
return fig2 |
|
|
elif selected_metric == 'License': |
|
|
return fig3 |
|
|
elif selected_metric == 'Method': |
|
|
return fig4 |
|
|
elif selected_metric == 'Architecture': |
|
|
return fig5 |
|
|
|
|
|
@app.callback( |
|
|
Output('world-map-with-slider', 'figure'), |
|
|
[Input('time-slider', 'relayoutData')] |
|
|
) |
|
|
def update_map(relayout_data): |
|
|
if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data: |
|
|
start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d') |
|
|
end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d') |
|
|
updated_fig = create_plotly_world_map( |
|
|
country_concentration_df, "time", "metric", "value", start_time=start_time, end_time=end_time |
|
|
) |
|
|
updated_fig.update_layout(font_family="Inter") |
|
|
return updated_fig |
|
|
else: |
|
|
return fig6 |
|
|
|
|
|
@app.callback( |
|
|
Output('leaderboard', 'figure'), |
|
|
[Input('time-slider', 'relayoutData')] |
|
|
) |
|
|
def update_leaderboard(relayout_data): |
|
|
if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data: |
|
|
start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d') |
|
|
end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d') |
|
|
updated_fig = create_leaderboard( |
|
|
country_concentration_df, author_concentration_df, model_concentration_df, start_time=start_time, end_time=end_time |
|
|
) |
|
|
updated_fig.update_layout(font_family="Inter") |
|
|
return updated_fig |
|
|
else: |
|
|
return fig7 |
|
|
|
|
|
@app.callback( |
|
|
Output('stacked-area-chart', 'figure'), |
|
|
[Input('time-slider', 'relayoutData')] |
|
|
) |
|
|
def update_stacked_area(relayout_data): |
|
|
if relayout_data and 'xaxis.range[0]' in relayout_data and 'xaxis.range[1]' in relayout_data: |
|
|
start_time = pd.to_datetime(relayout_data['xaxis.range[0]']).strftime('%Y-%m-%d') |
|
|
end_time = pd.to_datetime(relayout_data['xaxis.range[1]']).strftime('%Y-%m-%d') |
|
|
updated_fig = create_plotly_stacked_area_chart( |
|
|
model_topk_df, model_gini_df, model_hhi_df, TEMP_MODEL_EVENTS, PALETTE_0, |
|
|
start_time=start_time, end_time=end_time |
|
|
) |
|
|
updated_fig.update_layout(font_family="Inter") |
|
|
return updated_fig |
|
|
else: |
|
|
return fig |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
app.run(debug=True) |
|
|
|