Spaces:

economies-open-ai
/

open-model-evolution

Running

File size: 18,702 Bytes

import pandas as pd
from dash import html, dcc
from dash_iconify import DashIconify
import dash_mantine_components as dmc
import base64

button_style = {
    "display": "inline-block",
    "marginBottom": "10px",
    "marginRight": "15px",
    "marginTop": "30px",
    "padding": "6px 16px",
    "backgroundColor": "#082030",
    "color": "white",
    "borderRadius": "6px",
    "textDecoration": "none",
    "fontWeight": "bold",
    "fontSize": "14px",
}

country_icon_map = {
    "USA": "🇺🇸",
    "China": "🇨🇳",
    "Germany": "🇩🇪",
    "France": "🇫🇷",
    "India": "🇮🇳",
    "Italy": "🇮🇹",
    "Japan": "🇯🇵",
    "South Korea": "🇰🇷",
    "United Kingdom": "🇬🇧",
    "Canada": "🇨🇦",
    "Brazil": "🇧🇷",
    "Australia": "🇦🇺",
    "Unknown": "❓",
    "Finland": "🇫🇮",
    "Lebanon": "🇱🇧",
    "Iceland": "🇮🇸",
    "Singapore": "🇸🇬",
    "Israel": "🇮🇱",
    "Iran": "🇮🇷",
    "Hong Kong": "🇭🇰",
    "Netherlands": "🇳🇱",
    "Chile": "🇨🇱",
    "Vietnam": "🇻🇳",
    "Russia": "🇷🇺",
    "Qatar": "🇶🇦",
    "Switzerland": "🇨🇭",
    "User": "👤",
    "International/Online": "🌐",
}

company_icon_map = {
    "google": "../assets/icons/google.png",
    "distilbert": "../assets/icons/hugging-face.png",
    "sentence-transformers": "../assets/icons/hugging-face.png",
    "facebook": "../assets/icons/meta.png",
    "openai": "../assets/icons/openai.png",
}

meta_cols_map = {
    "org_country_single": ["org_country_single"],
    "author": ["org_country_single", "author", "merged_country_groups_single"],
    "model": [
        "org_country_single",
        "author",
        "merged_country_groups_single",
        "merged_modality",
        "downloads",
    ],
}


# Chip renderer
def chip(text, bg_color="#F0F0F0"):
    return html.Span(
        text,
        style={
            "backgroundColor": bg_color,
            "padding": "4px 10px",
            "borderRadius": "12px",
            "margin": "2px",
            "display": "inline-flex",
            "alignItems": "center",
            "fontSize": "14px",
        },
    )


# Progress bar for % of total
def progress_bar(percent, bar_color="#082030"):
    return html.Div(
        style={
            "position": "relative",
            "backgroundColor": "#E0E0E0",
            "borderRadius": "8px",
            "height": "20px",
            "width": "100%",
            "overflow": "hidden",
        },
        children=[
            html.Div(
                style={
                    "backgroundColor": bar_color,
                    "width": f"{percent}%",
                    "height": "100%",
                    "borderRadius": "8px",
                    "transition": "width 0.5s",
                }
            ),
            html.Div(
                f"{percent:.1f}%",
                style={
                    "position": "absolute",
                    "top": 0,
                    "left": "50%",
                    "transform": "translateX(-50%)",
                    "color": "black",
                    "fontWeight": "bold",
                    "fontSize": "12px",
                    "lineHeight": "20px",
                    "textAlign": "center",
                },
            ),
        ],
    )


# Helper to convert DataFrame to CSV and encode for download
def df_to_download_link(df, filename):
    csv_string = df.to_csv(index=False)
    b64 = base64.b64encode(csv_string.encode()).decode()
    return html.Div(
        html.A(
            children=dmc.ActionIcon(
                DashIconify(icon="mdi:download", width=24),
                size="lg",
                color="#082030",
            ),
            id=f"download-{filename}",
            download=f"{filename}.csv",
            href=f"data:text/csv;base64,{b64}",
            target="_blank",
            title="Download CSV",
            style={
                "padding": "6px 12px",
                "display": "inline-flex",
                "alignItems": "center",
                "justifyContent": "center",
            },
        ),
        style={"textAlign": "right"},
    )


# Render multiple chips in one row
def render_chips(metadata_list, chip_color):
    chips = []
    for icon, name in metadata_list:
        if isinstance(icon, str) and icon.endswith((".png", ".jpg", ".jpeg", ".svg")):
            chips.append(
                html.Span(
                    [
                        html.Img(
                            src=icon, style={"height": "18px", "marginRight": "6px"}
                        ),
                        name,
                    ],
                    style={
                        "backgroundColor": chip_color,
                        "padding": "4px 10px",
                        "borderRadius": "12px",
                        "margin": "2px",
                        "display": "inline-flex",
                        "alignItems": "left",
                        "fontSize": "14px",
                    },
                )
            )
        else:
            chips.append(chip(f"{icon} {name}", chip_color))
    return html.Div(
        chips, style={"display": "flex", "flexWrap": "wrap", "justifyContent": "left"}
    )


def render_table_content(
    df, download_df, chip_color, bar_color="#082030", filename="data"
):
    return html.Div(
        [
            html.Table(
                [
                    html.Thead(
                        html.Tr(
                            [
                                html.Th(
                                    "Rank",
                                    style={
                                        "backgroundColor": "#F0F0F0",
                                        "textAlign": "left",
                                    },
                                ),
                                html.Th(
                                    "Name",
                                    style={
                                        "backgroundColor": "#F0F0F0",
                                        "textAlign": "left",
                                    },
                                ),
                                html.Th(
                                    "Metadata",
                                    style={
                                        "backgroundColor": "#F0F0F0",
                                        "textAlign": "left",
                                        "marginRight": "10px",
                                    },
                                ),
                                html.Th(
                                    "% of Total",
                                    style={
                                        "backgroundColor": "#F0F0F0",
                                        "textAlign": "left",
                                    },
                                ),
                            ]
                        )
                    ),
                    html.Tbody(
                        [
                            html.Tr(
                                [
                                    html.Td(idx + 1, style={"textAlign": "center"}),
                                    html.Td(row["Name"], style={"textAlign": "left"}),
                                    html.Td(render_chips(row["Metadata"], chip_color)),
                                    html.Td(
                                        progress_bar(row["% of total"], bar_color),
                                        style={"textAlign": "center"},
                                    ),
                                ]
                            )
                            for idx, row in df.iterrows()
                        ]
                    ),
                ],
                style={"borderCollapse": "collapse", "width": "100%"},
            ),
        ]
    )


# Table renderer
def render_table(
    df, download_df, title, chip_color, bar_color="#AC482A", filename="data"
):
    return html.Div(
        id=f"{filename}-div",
        children=[
            html.Div(
                [
                    html.H4(
                        title,
                        style={
                            "textAlign": "left",
                            "marginBottom": "10px",
                            "fontSize": "20px",
                            "display": "inline-block",
                        },
                    ),
                    df_to_download_link(download_df, filename),
                ],
                style={
                    "display": "flex",
                    "alignItems": "center",
                    "justifyContent": "space-between",
                },
            ),
            html.Div(
                id=f"{filename}-table",
                children=[
                    html.Table(
                        [
                            html.Thead(
                                html.Tr(
                                    [
                                        html.Th(
                                            "Rank",
                                            style={
                                                "backgroundColor": "#F0F0F0",
                                                "textAlign": "left",
                                            },
                                        ),
                                        html.Th(
                                            "Name",
                                            style={
                                                "backgroundColor": "#F0F0F0",
                                                "textAlign": "left",
                                            },
                                        ),
                                        html.Th(
                                            "Metadata",
                                            style={
                                                "backgroundColor": "#F0F0F0",
                                                "textAlign": "left",
                                                "marginRight": "10px",
                                            },
                                        ),
                                        html.Th(
                                            "% of Total",
                                            style={
                                                "backgroundColor": "#F0F0F0",
                                                "textAlign": "left",
                                            },
                                        ),
                                    ]
                                )
                            ),
                            html.Tbody(
                                [
                                    html.Tr(
                                        [
                                            html.Td(
                                                idx + 1, style={"textAlign": "center"}
                                            ),
                                            html.Td(
                                                row["Name"], style={"textAlign": "left"}
                                            ),
                                            html.Td(
                                                render_chips(
                                                    row["Metadata"], chip_color
                                                )
                                            ),
                                            html.Td(
                                                progress_bar(
                                                    row["% of total"], bar_color
                                                ),
                                                style={"textAlign": "center"},
                                            ),
                                        ]
                                    )
                                    for idx, row in df.iterrows()
                                ]
                            ),
                        ],
                        style={
                            "borderCollapse": "collapse",
                            "width": "100%",
                            "border": "none",
                        },
                    ),
                ],
            ),
            dcc.Loading(
                id=f"loading-{filename}-toggle",
                type="dot",
                color="#082030",
                children=html.Div(
                    [
                        html.Button(
                            "▼ Show Top 50",
                            id=f"{filename}-toggle",
                            n_clicks=0,
                            style={**button_style, "border": "none"},
                        )
                    ],
                    style={"marginTop": "5px", "textAlign": "left"},
                ),
            ),
        ],
        style={"marginBottom": "20px"},
    )


# Function to get top N leaderboard
def get_top_n_leaderboard(filtered_df, group_col, top_n=10):
    top = (
        filtered_df.groupby(group_col)["downloads"]
        .sum()
        .nlargest(top_n)
        .reset_index()
        .rename(columns={group_col: "Name", "downloads": "Total Value"})
    )
    total_value = top["Total Value"].sum()
    top["% of total"] = top["Total Value"] / total_value * 100 if total_value else 0

    # Create a downloadable version of the leaderboard
    download_top = top.copy()
    download_top["Total Value"] = download_top["Total Value"].astype(int)
    download_top["% of total"] = download_top["% of total"].round(2)

    top["Name"].replace("User", "user")

    # All relevant metadata columns
    meta_cols = meta_cols_map.get(group_col, [])
    # Collect all metadata per top n for each category (country, author, model)
    meta_map = {}
    download_map = {}
    for name in top["Name"]:
        name_data = filtered_df[filtered_df[group_col] == name]
        meta_map[name] = {}
        download_map[name] = {}
        for col in meta_cols:
            if col in name_data.columns:
                unique_vals = name_data[col].unique()
                meta_map[name][col] = list(unique_vals)
                download_map[name][col] = list(unique_vals)

    # Function to build metadata chips
    def build_metadata(nm):
        meta = meta_map.get(nm, {})
        chips = []
        # Countries
        for c in meta.get("org_country_single", []):
            if c == "United States of America":
                c = "USA"
            if c == "user":
                c = "User"
            chips.append((country_icon_map.get(c, ""), c))
        # Author
        for a in meta.get("author", []):
            icon = company_icon_map.get(a, "")
            if icon == "":
                if meta.get("merged_country_groups_single", ["User"])[0] != "User":
                    icon = "🏢"
                else:
                    icon = "👤"
            chips.append((icon, a))
        # Downloads
        # Sum downloads if multiple entries
        total_downloads = sum(
            d for d in meta.get("downloads", []) if pd.notna(d)
        )  # Check if d is not NaN
        if total_downloads:
            chips.append(("⬇️", f"{int(total_downloads):,}"))

        # Modality
        for m in meta.get("merged_modality", []):
            chips.append(("", m))

        # Estimated Parameters
        for p in meta.get("estimated_parameters", []):
            if pd.notna(p):  # Check if p is not NaN
                if p >= 1e9:
                    p_str = f"{p / 1e9:.1f}B"
                elif p >= 1e6:
                    p_str = f"{p / 1e6:.1f}M"
                elif p >= 1e3:
                    p_str = f"{p / 1e3:.1f}K"
                else:
                    p_str = str(p)
                chips.append(("⚙️", p_str))
        return chips

    # Function to create downloadable dataframe
    def build_download_metadata(nm):
        meta = download_map.get(nm, {})
        download_info = {}
        for col in meta_cols:
            # don't add empty columns
            if col not in meta or not meta[col]:
                continue
            vals = meta.get(col, [])
            if vals:
                # Join list into a single string for CSV
                download_info[col] = ", ".join(str(v) for v in vals)
            else:
                download_info[col] = ""
        return download_info

    # Apply metadata builder to top dataframe
    top["Metadata"] = top["Name"].astype(object).apply(build_metadata)
    download_info_list = [build_download_metadata(nm) for nm in download_top["Name"]]
    download_info_df = pd.DataFrame(download_info_list)
    download_top = pd.concat([download_top, download_info_df], axis=1)

    return top[["Name", "Metadata", "% of total"]], download_top


def create_leaderboard(filtered_df, board_type, top_n=10):
    if filtered_df.empty:
        return html.Div("No data in selected range")

    # Merge HF and USA
    filtered_df["org_country_single"] = filtered_df["org_country_single"].replace(
        {"HF": "United States of America"}
    )
    # Merge International and Online
    filtered_df["org_country_single"] = filtered_df["org_country_single"].replace(
        {"International": "International/Online", "Online": "International/Online"}
    )

    # Build leaderboards
    top_countries, download_top_countries = get_top_n_leaderboard(
        filtered_df, "org_country_single", top_n
    )
    top_developers, download_top_developers = get_top_n_leaderboard(
        filtered_df, "author", top_n
    )
    top_models, download_top_models = get_top_n_leaderboard(filtered_df, "model", top_n)

    if board_type == "countries":
        return render_table(
            top_countries,
            download_top_countries,
            "Top Countries",
            chip_color="#F0F9FF",
            bar_color="#082030",
            filename="top_countries",
        )
    elif board_type == "developers":
        return render_table(
            top_developers,
            download_top_developers,
            "Top Developers",
            chip_color="#F0F9FF",
            bar_color="#082030",
            filename="top_developers",
        )
    else:
        return render_table(
            top_models,
            download_top_models,
            "Top Models",
            chip_color="#F0F9FF",
            bar_color="#082030",
            filename="top_models",
        )