Spaces:
Sleeping
Sleeping
Commit
·
3e8cd27
1
Parent(s):
1c82c33
final data
Browse files- app.py +23 -28
- grid_eval_gpt4o.json +0 -0
app.py
CHANGED
|
@@ -18,15 +18,15 @@ st.set_page_config(layout="wide")
|
|
| 18 |
# config['preauthorized']
|
| 19 |
# )
|
| 20 |
|
| 21 |
-
file_path = '
|
| 22 |
|
| 23 |
# Load your data
|
| 24 |
@st.cache_data()
|
| 25 |
def load_data():
|
| 26 |
with open(file_path, 'r') as file:
|
| 27 |
data = json.load(file)
|
| 28 |
-
random.shuffle(data)
|
| 29 |
-
data = data[
|
| 30 |
return data
|
| 31 |
|
| 32 |
def save_data(data):
|
|
@@ -36,12 +36,13 @@ def save_data(data):
|
|
| 36 |
|
| 37 |
def download_json(data):
|
| 38 |
return json.dumps(data, indent=4)
|
|
|
|
| 39 |
data = load_data()
|
| 40 |
|
| 41 |
for query in data:
|
| 42 |
for result in query['results']:
|
| 43 |
-
if '
|
| 44 |
-
result['
|
| 45 |
|
| 46 |
# State management for current query index
|
| 47 |
if 'current_query_index' not in st.session_state:
|
|
@@ -112,8 +113,18 @@ def display_query():
|
|
| 112 |
mime="application/json"
|
| 113 |
)
|
| 114 |
|
| 115 |
-
st.
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
if st.session_state.graded_queries >= len(data):
|
| 118 |
save_data(st.session_state.data)
|
| 119 |
st.success(f"{len(data)} Queries graded and data saved!")
|
|
@@ -123,36 +134,22 @@ def display_query():
|
|
| 123 |
st.header(f"Query: {current_query['query']}")
|
| 124 |
status_color = 'green' if current_query.get('status', None) is not None else 'red'
|
| 125 |
st.markdown(f"{current_query['grid_pos_str']} | Query Grade: <b style='color: {status_color};'>{'Graded' if status_color == 'green' else 'Ungraded'}</b>", unsafe_allow_html = True)
|
| 126 |
-
|
| 127 |
st.subheader("Results:")
|
| 128 |
for index, result in enumerate(current_query['results']):
|
| 129 |
st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
|
| 130 |
col1, col2 = st.columns([3, 2], gap="small")
|
| 131 |
-
with col1:
|
| 132 |
-
# title_style = f"color: {'green' if result.get('verified') is True else 'red' if result.get('verified') is False else 'white'};"
|
| 133 |
-
|
| 134 |
st.markdown(f"<h5>{result['title']}</h5>", unsafe_allow_html=True)
|
| 135 |
st.markdown(f"[<span style='font-size: 0.8em;'>{truncate_text(result['url'], length = 50)}</span>]({result['url']}) | {result['published_date']}", unsafe_allow_html=True)
|
| 136 |
st.markdown(f"{truncate_text(result['text'], length = len(result['model_trace']))}")
|
| 137 |
with col2:
|
| 138 |
grade_color = 'green' if result['grade'].lower() == 'yes' else 'red'
|
| 139 |
-
st.markdown(f"<b style='color: {grade_color};'>
|
| 140 |
st.write(result['model_trace'])
|
| 141 |
|
| 142 |
-
if st.checkbox("
|
| 143 |
-
result['
|
| 144 |
-
|
| 145 |
-
# btn_cols = st.columns([1, 1])
|
| 146 |
-
# with btn_cols[0]:
|
| 147 |
-
# if st.button('Accept', key=f'accept-{index}'):
|
| 148 |
-
# result['verified'] = True
|
| 149 |
-
# if result.get('verified') is True:
|
| 150 |
-
# st.write('Accepted')
|
| 151 |
-
# with btn_cols[1]:
|
| 152 |
-
# if st.button('Reject', key=f'reject-{index}'):
|
| 153 |
-
# result['verified'] = False
|
| 154 |
-
# if result.get('verified') is False:
|
| 155 |
-
# st.write('Rejected')
|
| 156 |
|
| 157 |
st.markdown("</div>", unsafe_allow_html=True)
|
| 158 |
st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
|
|
@@ -160,8 +157,6 @@ def display_query():
|
|
| 160 |
# Show current query and its results
|
| 161 |
current_query = st.session_state.data[st.session_state.current_query_index]
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
| 165 |
display_query()
|
| 166 |
|
| 167 |
col1, col2 = st.columns([5, 1], gap="small")
|
|
|
|
| 18 |
# config['preauthorized']
|
| 19 |
# )
|
| 20 |
|
| 21 |
+
file_path = 'grid_eval_gpt4o.json'
|
| 22 |
|
| 23 |
# Load your data
|
| 24 |
@st.cache_data()
|
| 25 |
def load_data():
|
| 26 |
with open(file_path, 'r') as file:
|
| 27 |
data = json.load(file)
|
| 28 |
+
# random.shuffle(data)
|
| 29 |
+
# data = data[]
|
| 30 |
return data
|
| 31 |
|
| 32 |
def save_data(data):
|
|
|
|
| 36 |
|
| 37 |
def download_json(data):
|
| 38 |
return json.dumps(data, indent=4)
|
| 39 |
+
|
| 40 |
data = load_data()
|
| 41 |
|
| 42 |
for query in data:
|
| 43 |
for result in query['results']:
|
| 44 |
+
if 'agree' not in result:
|
| 45 |
+
result['agree'] = True
|
| 46 |
|
| 47 |
# State management for current query index
|
| 48 |
if 'current_query_index' not in st.session_state:
|
|
|
|
| 113 |
mime="application/json"
|
| 114 |
)
|
| 115 |
|
| 116 |
+
index = st.text_input(f"At index {st.session_state.current_query_index + 1}. Graded: {st.session_state.graded_queries}/{len(st.session_state.data)}", placeholder="Go to index:")
|
| 117 |
+
if index:
|
| 118 |
+
try:
|
| 119 |
+
index = int(index) - 1
|
| 120 |
+
if index < 0 or index >= len(data):
|
| 121 |
+
st.error("Invalid index.")
|
| 122 |
+
else:
|
| 123 |
+
st.session_state.current_query_index = index
|
| 124 |
+
st.rerun()
|
| 125 |
+
except ValueError:
|
| 126 |
+
st.error("Please enter a valid integer.")
|
| 127 |
+
|
| 128 |
if st.session_state.graded_queries >= len(data):
|
| 129 |
save_data(st.session_state.data)
|
| 130 |
st.success(f"{len(data)} Queries graded and data saved!")
|
|
|
|
| 134 |
st.header(f"Query: {current_query['query']}")
|
| 135 |
status_color = 'green' if current_query.get('status', None) is not None else 'red'
|
| 136 |
st.markdown(f"{current_query['grid_pos_str']} | Query Grade: <b style='color: {status_color};'>{'Graded' if status_color == 'green' else 'Ungraded'}</b>", unsafe_allow_html = True)
|
| 137 |
+
st.markdown(f"Model's Query Gen Reasoning Trace: {current_query['reasoning_trace'][0]}")
|
| 138 |
st.subheader("Results:")
|
| 139 |
for index, result in enumerate(current_query['results']):
|
| 140 |
st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
|
| 141 |
col1, col2 = st.columns([3, 2], gap="small")
|
| 142 |
+
with col1:
|
|
|
|
|
|
|
| 143 |
st.markdown(f"<h5>{result['title']}</h5>", unsafe_allow_html=True)
|
| 144 |
st.markdown(f"[<span style='font-size: 0.8em;'>{truncate_text(result['url'], length = 50)}</span>]({result['url']}) | {result['published_date']}", unsafe_allow_html=True)
|
| 145 |
st.markdown(f"{truncate_text(result['text'], length = len(result['model_trace']))}")
|
| 146 |
with col2:
|
| 147 |
grade_color = 'green' if result['grade'].lower() == 'yes' else 'red'
|
| 148 |
+
st.markdown(f"Model Grade: <b style='color: {grade_color};'>{result['grade']}</b>", unsafe_allow_html=True)
|
| 149 |
st.write(result['model_trace'])
|
| 150 |
|
| 151 |
+
if st.checkbox("Reject", value= not result.get('agree'), key=f'verify-{index}'):
|
| 152 |
+
result['agree'] = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
st.markdown("</div>", unsafe_allow_html=True)
|
| 155 |
st.markdown(f"<div class='rounded-box'>", unsafe_allow_html=True)
|
|
|
|
| 157 |
# Show current query and its results
|
| 158 |
current_query = st.session_state.data[st.session_state.current_query_index]
|
| 159 |
|
|
|
|
|
|
|
| 160 |
display_query()
|
| 161 |
|
| 162 |
col1, col2 = st.columns([5, 1], gap="small")
|
grid_eval_gpt4o.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|