Commit
·
3960366
1
Parent(s):
f03baa2
Make API calls asynchronus
Browse files- agent/core.py +1 -1
- tools/code_index.py +50 -17
- tools/github_tools.py +1 -1
- tools/utils.py +10 -7
agent/core.py
CHANGED
|
@@ -22,7 +22,7 @@ api_key = MISTRAL_API_KEY
|
|
| 22 |
model = "devstral-small-latest"
|
| 23 |
client = Mistral(api_key=api_key)
|
| 24 |
|
| 25 |
-
def run_agent(issue_url: str, branch_name: str = "main") -> str:
|
| 26 |
"""
|
| 27 |
Run the agent workflow on a given GitHub issue URL.
|
| 28 |
"""
|
|
|
|
| 22 |
model = "devstral-small-latest"
|
| 23 |
client = Mistral(api_key=api_key)
|
| 24 |
|
| 25 |
+
async def run_agent(issue_url: str, branch_name: str = "main") -> str:
|
| 26 |
"""
|
| 27 |
Run the agent workflow on a given GitHub issue URL.
|
| 28 |
"""
|
tools/code_index.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
import os
|
| 3 |
-
import re
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
import time
|
| 6 |
from typing import List, Dict
|
|
@@ -74,33 +74,60 @@ def select_relevant_files_semantic(issue_description: str, file_paths: List[str]
|
|
| 74 |
# ''',
|
| 75 |
# ['.github/FUNDING.yml', '.github/workflows/process_challenge.yml', '.gitignore', 'README.md', 'annotations/test_annotations_devsplit.json', 'annotations/test_annotations_testsplit.json', 'challenge_config.yaml', 'challenge_data/__init__.py', 'challenge_data/challenge_1/__init__.py', 'challenge_data/challenge_1/main.py', 'evaluation_script/__init__.py', 'evaluation_script/main.py', 'github/challenge_processing_script.py', 'github/config.py', 'github/host_config.json', 'github/requirements.txt', 'github/utils.py', 'logo.jpg', 'remote_challenge_evaluation/README.md', 'remote_challenge_evaluation/eval_ai_interface.py', 'remote_challenge_evaluation/evaluate.py', 'remote_challenge_evaluation/main.py', 'remote_challenge_evaluation/requirements.txt', 'run.sh', 'submission.json', 'templates/challenge_phase_1_description.html', 'templates/challenge_phase_2_description.html', 'templates/description.html', 'templates/evaluation_details.html', 'templates/submission_guidelines.html', 'templates/terms_and_conditions.html', 'worker/__init__.py', 'worker/run.py']))
|
| 76 |
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
model_name = "codestral-embed"
|
| 79 |
embed_model = MistralAIEmbedding(model_name=model_name, api_key=MISTRAL_API_KEY)
|
| 80 |
print(f"[Indexing] Starting to index repository: {owner}/{repo} at ref {ref}...")
|
| 81 |
-
|
|
|
|
| 82 |
|
| 83 |
if issue_description:
|
| 84 |
-
file_paths = select_relevant_files_semantic(issue_description, file_paths)
|
| 85 |
|
| 86 |
documents = []
|
|
|
|
| 87 |
for path in file_paths:
|
| 88 |
_, ext = os.path.splitext(path)
|
| 89 |
if ext.lower() not in INCLUDE_FILE_EXTENSIONS:
|
| 90 |
continue
|
| 91 |
|
| 92 |
try:
|
| 93 |
-
content = fetch_file_content
|
| 94 |
documents.append(Document(text=content, metadata={"file_path": path}))
|
| 95 |
print(f"[Indexing] Added file: {path}")
|
| 96 |
-
|
| 97 |
except Exception as e:
|
| 98 |
print(f"[Warning] Skipping file {path} due to error: {e}")
|
| 99 |
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
print(f"[Indexing] Finished indexing {len(documents)} files.")
|
| 102 |
return index
|
| 103 |
|
|
|
|
| 104 |
# print(build_repo_index("aditi-dsi", "EvalAI-Starters", "master",
|
| 105 |
# '''
|
| 106 |
# 🛠️ Configuration Error: Placeholder values detected in host_config.json
|
|
@@ -113,11 +140,12 @@ def build_repo_index(owner: str, repo: str, ref: str = "main", issue_description
|
|
| 113 |
# '''))
|
| 114 |
|
| 115 |
|
| 116 |
-
def retrieve_context(owner: str, repo: str, ref: str, issue_description: str) -> List[str]:
|
| 117 |
print("Issue Description:", issue_description)
|
| 118 |
-
index = build_repo_index(owner, repo, ref, issue_description)
|
| 119 |
Settings.llm = MistralAI(model="codestral-latest", api_key=MISTRAL_API_KEY)
|
| 120 |
Settings.embed_model = MistralAIEmbedding(model_name="codestral-embed", api_key=MISTRAL_API_KEY)
|
|
|
|
| 121 |
retriever = index.as_retriever(similarity_top_k=3)
|
| 122 |
|
| 123 |
query_engine = RetrieverQueryEngine(
|
|
@@ -127,16 +155,21 @@ def retrieve_context(owner: str, repo: str, ref: str, issue_description: str) ->
|
|
| 127 |
SimilarityPostprocessor(similarity_top_k=3, similarity_cutoff=0.75)
|
| 128 |
],
|
| 129 |
)
|
|
|
|
| 130 |
query = (
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
)
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
print(response)
|
| 141 |
return response
|
| 142 |
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
import numpy as np
|
| 3 |
import os
|
|
|
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
import time
|
| 6 |
from typing import List, Dict
|
|
|
|
| 74 |
# ''',
|
| 75 |
# ['.github/FUNDING.yml', '.github/workflows/process_challenge.yml', '.gitignore', 'README.md', 'annotations/test_annotations_devsplit.json', 'annotations/test_annotations_testsplit.json', 'challenge_config.yaml', 'challenge_data/__init__.py', 'challenge_data/challenge_1/__init__.py', 'challenge_data/challenge_1/main.py', 'evaluation_script/__init__.py', 'evaluation_script/main.py', 'github/challenge_processing_script.py', 'github/config.py', 'github/host_config.json', 'github/requirements.txt', 'github/utils.py', 'logo.jpg', 'remote_challenge_evaluation/README.md', 'remote_challenge_evaluation/eval_ai_interface.py', 'remote_challenge_evaluation/evaluate.py', 'remote_challenge_evaluation/main.py', 'remote_challenge_evaluation/requirements.txt', 'run.sh', 'submission.json', 'templates/challenge_phase_1_description.html', 'templates/challenge_phase_2_description.html', 'templates/description.html', 'templates/evaluation_details.html', 'templates/submission_guidelines.html', 'templates/terms_and_conditions.html', 'worker/__init__.py', 'worker/run.py']))
|
| 76 |
|
| 77 |
+
|
| 78 |
+
# Assuming these are async now or wrapped appropriately
|
| 79 |
+
# async def fetch_repo_files(...)
|
| 80 |
+
# async def fetch_file_content(...)
|
| 81 |
+
# async def VectorStoreIndex.from_documents(...)
|
| 82 |
+
|
| 83 |
+
async def async_retry_on_429(func, *args, max_retries=3, delay=1, **kwargs):
|
| 84 |
+
for attempt in range(max_retries):
|
| 85 |
+
try:
|
| 86 |
+
return await func(*args, **kwargs)
|
| 87 |
+
except Exception as e:
|
| 88 |
+
status = getattr(e, 'response', None) and getattr(e.response, 'status_code', None)
|
| 89 |
+
if status == 429:
|
| 90 |
+
print(f"[Retry] Rate limit hit while calling {func.__name__}. Attempt {attempt+1}/{max_retries}. Retrying in {delay} seconds...")
|
| 91 |
+
await asyncio.sleep(delay)
|
| 92 |
+
delay *= 2
|
| 93 |
+
else:
|
| 94 |
+
raise
|
| 95 |
+
|
| 96 |
+
async def build_repo_index(owner: str, repo: str, ref: str = "main", issue_description: str = "") -> VectorStoreIndex:
|
| 97 |
model_name = "codestral-embed"
|
| 98 |
embed_model = MistralAIEmbedding(model_name=model_name, api_key=MISTRAL_API_KEY)
|
| 99 |
print(f"[Indexing] Starting to index repository: {owner}/{repo} at ref {ref}...")
|
| 100 |
+
|
| 101 |
+
file_paths = await async_retry_on_429(fetch_repo_files, owner, repo, ref)
|
| 102 |
|
| 103 |
if issue_description:
|
| 104 |
+
file_paths = select_relevant_files_semantic(issue_description, file_paths) # stays sync unless heavy
|
| 105 |
|
| 106 |
documents = []
|
| 107 |
+
|
| 108 |
for path in file_paths:
|
| 109 |
_, ext = os.path.splitext(path)
|
| 110 |
if ext.lower() not in INCLUDE_FILE_EXTENSIONS:
|
| 111 |
continue
|
| 112 |
|
| 113 |
try:
|
| 114 |
+
content = await async_retry_on_429(fetch_file_content, owner, repo, path, ref)
|
| 115 |
documents.append(Document(text=content, metadata={"file_path": path}))
|
| 116 |
print(f"[Indexing] Added file: {path}")
|
| 117 |
+
await asyncio.sleep(0.1)
|
| 118 |
except Exception as e:
|
| 119 |
print(f"[Warning] Skipping file {path} due to error: {e}")
|
| 120 |
|
| 121 |
+
try:
|
| 122 |
+
index = await async_retry_on_429(VectorStoreIndex.from_documents, documents, embed_model=embed_model)
|
| 123 |
+
except Exception as e:
|
| 124 |
+
print(f"[Error] Failed to build index due to: {e}")
|
| 125 |
+
raise
|
| 126 |
+
|
| 127 |
print(f"[Indexing] Finished indexing {len(documents)} files.")
|
| 128 |
return index
|
| 129 |
|
| 130 |
+
|
| 131 |
# print(build_repo_index("aditi-dsi", "EvalAI-Starters", "master",
|
| 132 |
# '''
|
| 133 |
# 🛠️ Configuration Error: Placeholder values detected in host_config.json
|
|
|
|
| 140 |
# '''))
|
| 141 |
|
| 142 |
|
| 143 |
+
async def retrieve_context(owner: str, repo: str, ref: str, issue_description: str) -> List[str]:
|
| 144 |
print("Issue Description:", issue_description)
|
| 145 |
+
index = await build_repo_index(owner, repo, ref, issue_description)
|
| 146 |
Settings.llm = MistralAI(model="codestral-latest", api_key=MISTRAL_API_KEY)
|
| 147 |
Settings.embed_model = MistralAIEmbedding(model_name="codestral-embed", api_key=MISTRAL_API_KEY)
|
| 148 |
+
|
| 149 |
retriever = index.as_retriever(similarity_top_k=3)
|
| 150 |
|
| 151 |
query_engine = RetrieverQueryEngine(
|
|
|
|
| 155 |
SimilarityPostprocessor(similarity_top_k=3, similarity_cutoff=0.75)
|
| 156 |
],
|
| 157 |
)
|
| 158 |
+
|
| 159 |
query = (
|
| 160 |
+
f"Please give relevant information from the codebase that highly matches the keywords of this issue and is useful for solving or understanding this issue: {issue_description}\n"
|
| 161 |
+
"STRICT RULES:\n"
|
| 162 |
+
"- ONLY use information available in the retriever context.\n"
|
| 163 |
+
"- DO NOT generate or assume any information outside the given context.\n"
|
| 164 |
+
f"- ONLY include context that is highly relevant and clearly useful for understanding or solving this issue: {issue_description}\n"
|
| 165 |
+
"- DO NOT include generic, loosely related, or unrelated content.\n"
|
| 166 |
)
|
| 167 |
+
|
| 168 |
+
print("Query:", query)
|
| 169 |
+
|
| 170 |
+
# If query_engine.query is sync, wrap it in a thread
|
| 171 |
+
response = await asyncio.to_thread(query_engine.query, query)
|
| 172 |
+
|
| 173 |
print(response)
|
| 174 |
return response
|
| 175 |
|
tools/github_tools.py
CHANGED
|
@@ -44,4 +44,4 @@ def post_comment(owner, repo, issue_num, comment_body):
|
|
| 44 |
else:
|
| 45 |
raise Exception(f"Failed to post comment: {response.status_code} {response.text}")
|
| 46 |
|
| 47 |
-
# print(post_comment("aditi-dsi", "testing-cryptope", "3", "This is a test comment from OpenSorus."))
|
|
|
|
| 44 |
else:
|
| 45 |
raise Exception(f"Failed to post comment: {response.status_code} {response.text}")
|
| 46 |
|
| 47 |
+
# print(post_comment("aditi-dsi", "testing-cryptope", "3", "This is a test comment from OpenSorus."))
|
tools/utils.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import base64
|
| 2 |
from datetime import datetime, timezone, timedelta
|
| 3 |
import jwt
|
|
@@ -93,7 +94,7 @@ def get_installation_token(installation_id):
|
|
| 93 |
|
| 94 |
# print(get_installation_token(69452220))
|
| 95 |
|
| 96 |
-
def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
|
| 97 |
"""
|
| 98 |
Lists all files in the repository by recursively fetching the Git tree from GitHub API.
|
| 99 |
Returns a list of file paths.
|
|
@@ -105,7 +106,8 @@ def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
|
|
| 105 |
"Authorization": f"Bearer {token}",
|
| 106 |
"Accept": "application/vnd.github.v3+json"
|
| 107 |
}
|
| 108 |
-
|
|
|
|
| 109 |
if response.status_code != 200:
|
| 110 |
raise Exception(f"Failed to list repository files: {response.status_code} {response.text}")
|
| 111 |
|
|
@@ -115,18 +117,20 @@ def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
|
|
| 115 |
|
| 116 |
# print(fetch_repo_files("aditi-dsi", "EvalAI-Starters", "master"))
|
| 117 |
|
| 118 |
-
def fetch_file_content(owner: str, repo: str, path: str, ref: str = "main") -> str:
|
| 119 |
"""
|
| 120 |
Fetches the content of a file from the GitHub repository.
|
| 121 |
"""
|
| 122 |
installation_id = get_installation_id(owner, repo)
|
| 123 |
-
token = get_installation_token
|
|
|
|
| 124 |
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}"
|
| 125 |
headers = {
|
| 126 |
"Authorization": f"Bearer {token}",
|
| 127 |
"Accept": "application/vnd.github.v3+json"
|
| 128 |
}
|
| 129 |
-
|
|
|
|
| 130 |
if response.status_code != 200:
|
| 131 |
raise Exception(f"Failed to fetch file content {path}: {response.status_code} {response.text}")
|
| 132 |
|
|
@@ -134,5 +138,4 @@ def fetch_file_content(owner: str, repo: str, path: str, ref: str = "main") -> s
|
|
| 134 |
content = base64.b64decode(content_json["content"]).decode("utf-8", errors="ignore")
|
| 135 |
return content
|
| 136 |
|
| 137 |
-
# print(fetch_file_content("aditi-dsi", "testing-cryptope", "frontend/src/lib/buildSwap.ts", "main"))
|
| 138 |
-
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
import base64
|
| 3 |
from datetime import datetime, timezone, timedelta
|
| 4 |
import jwt
|
|
|
|
| 94 |
|
| 95 |
# print(get_installation_token(69452220))
|
| 96 |
|
| 97 |
+
async def fetch_repo_files(owner: str, repo: str, ref: str = "main") -> List[str]:
|
| 98 |
"""
|
| 99 |
Lists all files in the repository by recursively fetching the Git tree from GitHub API.
|
| 100 |
Returns a list of file paths.
|
|
|
|
| 106 |
"Authorization": f"Bearer {token}",
|
| 107 |
"Accept": "application/vnd.github.v3+json"
|
| 108 |
}
|
| 109 |
+
|
| 110 |
+
response = await asyncio.to_thread(github_request, "GET", url, headers=headers)
|
| 111 |
if response.status_code != 200:
|
| 112 |
raise Exception(f"Failed to list repository files: {response.status_code} {response.text}")
|
| 113 |
|
|
|
|
| 117 |
|
| 118 |
# print(fetch_repo_files("aditi-dsi", "EvalAI-Starters", "master"))
|
| 119 |
|
| 120 |
+
async def fetch_file_content(owner: str, repo: str, path: str, ref: str = "main") -> str:
|
| 121 |
"""
|
| 122 |
Fetches the content of a file from the GitHub repository.
|
| 123 |
"""
|
| 124 |
installation_id = get_installation_id(owner, repo)
|
| 125 |
+
token = await asyncio.to_thread(get_installation_token, installation_id)
|
| 126 |
+
|
| 127 |
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={ref}"
|
| 128 |
headers = {
|
| 129 |
"Authorization": f"Bearer {token}",
|
| 130 |
"Accept": "application/vnd.github.v3+json"
|
| 131 |
}
|
| 132 |
+
|
| 133 |
+
response = await asyncio.to_thread(github_request, "GET", url, headers=headers)
|
| 134 |
if response.status_code != 200:
|
| 135 |
raise Exception(f"Failed to fetch file content {path}: {response.status_code} {response.text}")
|
| 136 |
|
|
|
|
| 138 |
content = base64.b64decode(content_json["content"]).decode("utf-8", errors="ignore")
|
| 139 |
return content
|
| 140 |
|
| 141 |
+
# print(fetch_file_content("aditi-dsi", "testing-cryptope", "frontend/src/lib/buildSwap.ts", "main"))
|
|
|