Upload 4 files
Browse files- Dockerfile +18 -0
- agent_collaboratif_avid.py +1084 -0
- chainlit.md +14 -0
- requirements.txt +74 -0
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Utilise une image Python officielle comme base
|
| 2 |
+
FROM python:3.11-slim-buster
|
| 3 |
+
|
| 4 |
+
# Définit le répertoire de travail dans le conteneur
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copie le fichier requirements.txt et installe les dépendances
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
+
|
| 11 |
+
# Copie le reste de l'application
|
| 12 |
+
COPY . .
|
| 13 |
+
|
| 14 |
+
# Expose le port 7860
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
# Commande pour lancer l'application Chainlit
|
| 18 |
+
CMD ["chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
|
agent_collaboratif_avid.py
ADDED
|
@@ -0,0 +1,1084 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agent Collaboratif LangGraph pour l'Université Gustave Eiffel
|
| 3 |
+
===============================================================
|
| 4 |
+
|
| 5 |
+
Ce script implémente un agent collaboratif multi-base utilisant LangGraph pour orchestrer
|
| 6 |
+
des recherches dans 4 bases vectorielles Pinecone liées aux thématiques de Ville Durable.
|
| 7 |
+
|
| 8 |
+
Architecture:
|
| 9 |
+
- Workflow LangGraph avec nodes spécialisés
|
| 10 |
+
- Retrievers Langchain-Pinecone avec similarity search + score
|
| 11 |
+
- Filtrage par catégorie pour chaque base
|
| 12 |
+
- Validation anti-hallucination en boucle
|
| 13 |
+
- Orchestration intelligente des recherches
|
| 14 |
+
|
| 15 |
+
Prérequis:
|
| 16 |
+
- pip install langgraph langchain langchain-pinecone langchain-openai pinecone
|
| 17 |
+
- Variables d'environnement: PINECONE_API_KEY, OPENAI_API_KEY
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
import os
|
| 21 |
+
import json
|
| 22 |
+
from typing import TypedDict, Annotated, List, Dict, Any, Sequence
|
| 23 |
+
from operator import add
|
| 24 |
+
|
| 25 |
+
from langchain_openai import ChatOpenAI
|
| 26 |
+
from langchain_pinecone import PineconeVectorStore
|
| 27 |
+
from langchain_core.embeddings import Embeddings
|
| 28 |
+
from langchain_core.documents import Document
|
| 29 |
+
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
|
| 30 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 31 |
+
from langchain_core.output_parsers import JsonOutputParser
|
| 32 |
+
|
| 33 |
+
from langgraph.graph import StateGraph, END
|
| 34 |
+
from langgraph.prebuilt import ToolNode
|
| 35 |
+
#from langgraph.checkpoint.memory import MemorySaver
|
| 36 |
+
|
| 37 |
+
from pinecone import Pinecone
|
| 38 |
+
import asyncio
|
| 39 |
+
|
| 40 |
+
# =============================================================================
|
| 41 |
+
# CONFIGURATION GLOBALE
|
| 42 |
+
# =============================================================================
|
| 43 |
+
|
| 44 |
+
# Configuration API
|
| 45 |
+
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
|
| 46 |
+
#OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
| 47 |
+
#OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL")
|
| 48 |
+
#OPENAI_MODEL_NAME = os.environ.get("OPENAI_MODEL_NAME")
|
| 49 |
+
|
| 50 |
+
OPENAI_API_KEY="sk-b8b00f8614a2496aa013103aaa53e753"
|
| 51 |
+
OPENAI_MODEL_NAME="mistralai/Mistral-Small-3.1-24B-Instruct-2503"
|
| 52 |
+
OPENAI_BASE_URL="https://ragarenn.eskemm-numerique.fr/sso/ch@t/api"
|
| 53 |
+
|
| 54 |
+
HUGGINGFACE_MODEL = os.environ.get("HUGGINGFACE_MODEL", "sentence-transformers/all-mpnet-base-v2")
|
| 55 |
+
PINECONE_INDEX_NAME = "all-jdlp"
|
| 56 |
+
|
| 57 |
+
# Configuration modèle
|
| 58 |
+
MAX_VALIDATION_LOOPS = 1
|
| 59 |
+
SIMILARITY_TOP_K = 10
|
| 60 |
+
SIMILARITY_SCORE_THRESHOLD = 0.5
|
| 61 |
+
|
| 62 |
+
# Validation des variables d'environnement
|
| 63 |
+
if not PINECONE_API_KEY:
|
| 64 |
+
raise ValueError("❌ PINECONE_API_KEY non définie. Exécutez: export PINECONE_API_KEY='votre-clé'")
|
| 65 |
+
if not OPENAI_API_KEY:
|
| 66 |
+
raise ValueError("❌ OPENAI_API_KEY non définie. Exécutez: export OPENAI_API_KEY='votre-clé'")
|
| 67 |
+
|
| 68 |
+
# =============================================================================
|
| 69 |
+
# EMBEDDINGS HUGGINGFACE
|
| 70 |
+
# =============================================================================
|
| 71 |
+
|
| 72 |
+
class HuggingFaceEmbeddings(Embeddings):
|
| 73 |
+
"""
|
| 74 |
+
Classe d'embeddings utilisant HuggingFace Transformers.
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
def __init__(self, model_name: str = HUGGINGFACE_MODEL):
|
| 78 |
+
"""
|
| 79 |
+
Initialise les embeddings HuggingFace.
|
| 80 |
+
|
| 81 |
+
Args:
|
| 82 |
+
model_name: Nom du modèle HuggingFace à utiliser
|
| 83 |
+
"""
|
| 84 |
+
from sentence_transformers import SentenceTransformer
|
| 85 |
+
|
| 86 |
+
self.model_name = model_name
|
| 87 |
+
print(f"🤗 Chargement du modèle HuggingFace: {model_name}")
|
| 88 |
+
self.model = SentenceTransformer(model_name)
|
| 89 |
+
self.dimension = self.model.get_sentence_embedding_dimension()
|
| 90 |
+
print(f"✅ Modèle chargé (dimension: {self.dimension})")
|
| 91 |
+
|
| 92 |
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
| 93 |
+
"""
|
| 94 |
+
Génère des embeddings pour une liste de documents.
|
| 95 |
+
|
| 96 |
+
Args:
|
| 97 |
+
texts: Liste de textes à vectoriser
|
| 98 |
+
|
| 99 |
+
Returns:
|
| 100 |
+
Liste de vecteurs d'embeddings
|
| 101 |
+
"""
|
| 102 |
+
embeddings = self.model.encode(texts, convert_to_numpy=True)
|
| 103 |
+
return embeddings.tolist()
|
| 104 |
+
|
| 105 |
+
def embed_query(self, text: str) -> List[float]:
|
| 106 |
+
"""
|
| 107 |
+
Génère un embedding pour une requête unique.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
text: Texte de la requête
|
| 111 |
+
|
| 112 |
+
Returns:
|
| 113 |
+
Vecteur d'embedding
|
| 114 |
+
"""
|
| 115 |
+
embedding = self.model.encode(text, convert_to_numpy=True)
|
| 116 |
+
return embedding.tolist()
|
| 117 |
+
|
| 118 |
+
# =============================================================================
|
| 119 |
+
# DÉFINITION DE L'ÉTAT DU GRAPHE
|
| 120 |
+
# =============================================================================
|
| 121 |
+
|
| 122 |
+
class AgentState(TypedDict):
|
| 123 |
+
"""État global du workflow LangGraph."""
|
| 124 |
+
messages: Annotated[Sequence[BaseMessage], add]
|
| 125 |
+
user_query: str
|
| 126 |
+
query_analysis: Dict[str, Any]
|
| 127 |
+
collected_information: List[Dict[str, Any]]
|
| 128 |
+
validation_results: List[Dict[str, Any]]
|
| 129 |
+
final_response: str
|
| 130 |
+
iteration_count: int
|
| 131 |
+
errors: List[str]
|
| 132 |
+
additional_information: List[Dict[str, Any]] # Nouvelles infos similaires
|
| 133 |
+
|
| 134 |
+
# =============================================================================
|
| 135 |
+
# INITIALISATION DES RETRIEVERS PINECONE
|
| 136 |
+
# =============================================================================
|
| 137 |
+
|
| 138 |
+
class PineconeRetrieverManager:
|
| 139 |
+
"""Gestionnaire centralisé des retrievers Pinecone."""
|
| 140 |
+
|
| 141 |
+
def __init__(self):
|
| 142 |
+
"""Initialise le gestionnaire et crée les 4 retrievers spécialisés."""
|
| 143 |
+
print("🔧 Initialisation du gestionnaire Pinecone...")
|
| 144 |
+
|
| 145 |
+
self.pc = Pinecone(api_key=PINECONE_API_KEY)
|
| 146 |
+
self.index = self.pc.Index(PINECONE_INDEX_NAME)
|
| 147 |
+
|
| 148 |
+
# Utilisation de HuggingFace Embeddings
|
| 149 |
+
self.embeddings = HuggingFaceEmbeddings()
|
| 150 |
+
|
| 151 |
+
self.retrievers = {
|
| 152 |
+
"laboratoires": self._create_retriever(
|
| 153 |
+
category="FICHELABOTHEMATIQUEAVID",
|
| 154 |
+
description="Laboratoires et thématiques Ville Durable"
|
| 155 |
+
),
|
| 156 |
+
"formations": self._create_retriever(
|
| 157 |
+
category="FORMATIONTHEMATIQUEAVID",
|
| 158 |
+
description="Formations liées à la Ville Durable"
|
| 159 |
+
),
|
| 160 |
+
"recherche": self._create_retriever(
|
| 161 |
+
category="RECHERCHETHEMATIQUEAVID",
|
| 162 |
+
description="Axes de recherche et partenariats"
|
| 163 |
+
),
|
| 164 |
+
"publications": self._create_retriever(
|
| 165 |
+
category="PUBLICATIONTHEMATIQUEAVID",
|
| 166 |
+
description="Publications scientifiques"
|
| 167 |
+
)
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
print("✅ Gestionnaire Pinecone initialisé avec 4 retrievers\n")
|
| 171 |
+
|
| 172 |
+
def _create_retriever(self, category: str, description: str):
|
| 173 |
+
"""Crée un retriever Pinecone avec filtrage par catégorie."""
|
| 174 |
+
vectorstore = PineconeVectorStore(
|
| 175 |
+
index=self.index,
|
| 176 |
+
embedding=self.embeddings,
|
| 177 |
+
text_key="text",
|
| 178 |
+
namespace=""
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
retriever = vectorstore.as_retriever(
|
| 182 |
+
search_type="similarity_score_threshold",
|
| 183 |
+
search_kwargs={
|
| 184 |
+
"k": SIMILARITY_TOP_K,
|
| 185 |
+
"score_threshold": SIMILARITY_SCORE_THRESHOLD,
|
| 186 |
+
"filter": {"categorie": {"$eq": category}}
|
| 187 |
+
}
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
retriever.metadata = {
|
| 191 |
+
"category": category,
|
| 192 |
+
"description": description
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
return retriever
|
| 196 |
+
|
| 197 |
+
def get_retriever(self, retriever_name: str):
|
| 198 |
+
"""Récupère un retriever par son nom."""
|
| 199 |
+
return self.retrievers.get(retriever_name)
|
| 200 |
+
|
| 201 |
+
def search_all_databases(self, query: str, exclude_categories: List[str] = None) -> List[Dict[str, Any]]:
|
| 202 |
+
"""
|
| 203 |
+
Recherche dans toutes les bases pour trouver des informations similaires.
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
query: Requête de recherche
|
| 207 |
+
exclude_categories: Catégories à exclure de la recherche
|
| 208 |
+
|
| 209 |
+
Returns:
|
| 210 |
+
Liste des informations similaires trouvées
|
| 211 |
+
"""
|
| 212 |
+
exclude_categories = exclude_categories or []
|
| 213 |
+
similar_info = []
|
| 214 |
+
|
| 215 |
+
for db_name, retriever in self.retrievers.items():
|
| 216 |
+
if retriever.metadata["category"] in exclude_categories:
|
| 217 |
+
continue
|
| 218 |
+
|
| 219 |
+
try:
|
| 220 |
+
documents = retriever.get_relevant_documents(query)
|
| 221 |
+
|
| 222 |
+
if documents:
|
| 223 |
+
for doc in documents:
|
| 224 |
+
similar_info.append({
|
| 225 |
+
"database": db_name,
|
| 226 |
+
"category": retriever.metadata["category"],
|
| 227 |
+
"content": doc.page_content,
|
| 228 |
+
"metadata": doc.metadata,
|
| 229 |
+
"score": getattr(doc, 'score', None)
|
| 230 |
+
})
|
| 231 |
+
except Exception as e:
|
| 232 |
+
print(f"⚠️ Erreur recherche similaires dans '{db_name}': {str(e)}")
|
| 233 |
+
|
| 234 |
+
return similar_info
|
| 235 |
+
|
| 236 |
+
retriever_manager = PineconeRetrieverManager()
|
| 237 |
+
|
| 238 |
+
# =============================================================================
|
| 239 |
+
# NODE 1: ANALYSE DE LA REQUÊTE
|
| 240 |
+
# =============================================================================
|
| 241 |
+
def analyze_query_node(state: AgentState) -> AgentState:
|
| 242 |
+
"""Node d'analyse de la requête utilisateur."""
|
| 243 |
+
print(f"\n{'='*80}")
|
| 244 |
+
print(f"📊 NODE 1: ANALYSE DE LA REQUÊTE")
|
| 245 |
+
print(f"{'='*80}")
|
| 246 |
+
print(f"🔍 Requête: {state['user_query']}\n")
|
| 247 |
+
|
| 248 |
+
llm = ChatOpenAI(
|
| 249 |
+
model=OPENAI_MODEL_NAME,
|
| 250 |
+
base_url=OPENAI_BASE_URL,
|
| 251 |
+
api_key=OPENAI_API_KEY,
|
| 252 |
+
temperature=0
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
analysis_prompt = ChatPromptTemplate.from_messages([
|
| 256 |
+
("system", """Tu es un expert de l'Université Gustave Eiffel spécialisé dans les thématiques de Ville Durable.
|
| 257 |
+
|
| 258 |
+
Analyse la requête et détermine quelle(s) base(s) de données interroger parmi:
|
| 259 |
+
|
| 260 |
+
1. **laboratoires** (FICHELABOTHEMATIQUEAVID)
|
| 261 |
+
2. **formations** (FORMATIONTHEMATIQUEAVID)
|
| 262 |
+
3. **recherche** (RECHERCHETHEMATIQUEAVID)
|
| 263 |
+
4. **publications** (PUBLICATIONTHEMATIQUEAVID)
|
| 264 |
+
|
| 265 |
+
Réponds UNIQUEMENT en JSON valide."""),
|
| 266 |
+
("human", """{user_query}
|
| 267 |
+
|
| 268 |
+
Format de réponse attendu:
|
| 269 |
+
{{
|
| 270 |
+
"databases_to_query": ["laboratoires", "formations", "recherche", "publications"],
|
| 271 |
+
"priorities": {{
|
| 272 |
+
"laboratoires": "high",
|
| 273 |
+
"formations": "medium",
|
| 274 |
+
"recherche": "low",
|
| 275 |
+
"publications": "high"
|
| 276 |
+
}},
|
| 277 |
+
"optimized_queries": {{
|
| 278 |
+
"laboratoires": "requête optimisée",
|
| 279 |
+
"formations": "requête optimisée",
|
| 280 |
+
"recherche": "requête optimisée",
|
| 281 |
+
"publications": "requête optimisée"
|
| 282 |
+
}},
|
| 283 |
+
"analysis_summary": "résumé de l'analyse"
|
| 284 |
+
}}""")
|
| 285 |
+
])
|
| 286 |
+
|
| 287 |
+
json_parser = JsonOutputParser()
|
| 288 |
+
analysis_chain = analysis_prompt | llm | json_parser
|
| 289 |
+
|
| 290 |
+
try:
|
| 291 |
+
query_analysis = analysis_chain.invoke({"user_query": state["user_query"]})
|
| 292 |
+
|
| 293 |
+
print(f"✅ Bases identifiées: {', '.join(query_analysis['databases_to_query'])}")
|
| 294 |
+
print(f"✅ {query_analysis['analysis_summary']}\n")
|
| 295 |
+
|
| 296 |
+
state["query_analysis"] = query_analysis
|
| 297 |
+
state["messages"].append(AIMessage(content=f"Analyse terminée: {query_analysis['analysis_summary']}"))
|
| 298 |
+
|
| 299 |
+
except Exception as e:
|
| 300 |
+
error_msg = f"Erreur lors de l'analyse: {str(e)}"
|
| 301 |
+
print(f"❌ {error_msg}")
|
| 302 |
+
state["errors"].append(error_msg)
|
| 303 |
+
state["query_analysis"] = {
|
| 304 |
+
"databases_to_query": ["laboratoires"],
|
| 305 |
+
"priorities": {"laboratoires": "high"},
|
| 306 |
+
"optimized_queries": {"laboratoires": state["user_query"]},
|
| 307 |
+
"analysis_summary": "Analyse par défaut suite à erreur"
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
return state
|
| 311 |
+
|
| 312 |
+
# =============================================================================
|
| 313 |
+
# NODE 2: COLLECTE D'INFORMATIONS
|
| 314 |
+
# =============================================================================
|
| 315 |
+
|
| 316 |
+
def collect_information_node(state: AgentState) -> AgentState:
|
| 317 |
+
"""Node de collecte d'informations depuis les bases Pinecone."""
|
| 318 |
+
print(f"\n{'='*80}")
|
| 319 |
+
print(f"🔎 NODE 2: COLLECTE D'INFORMATIONS DEPUIS PINECONE")
|
| 320 |
+
print(f"{'='*80}\n")
|
| 321 |
+
|
| 322 |
+
query_analysis = state["query_analysis"]
|
| 323 |
+
collected_info = []
|
| 324 |
+
|
| 325 |
+
priorities_order = {"high": 0, "medium": 1, "low": 2}
|
| 326 |
+
databases = sorted(
|
| 327 |
+
query_analysis["databases_to_query"],
|
| 328 |
+
key=lambda db: priorities_order.get(query_analysis["priorities"].get(db, "low"), 2)
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
for db_name in databases:
|
| 332 |
+
retriever = retriever_manager.get_retriever(db_name)
|
| 333 |
+
if not retriever:
|
| 334 |
+
print(f"⚠️ Retriever '{db_name}' non trouvé, ignoré.")
|
| 335 |
+
continue
|
| 336 |
+
|
| 337 |
+
query = query_analysis["optimized_queries"].get(db_name, state["user_query"])
|
| 338 |
+
priority = query_analysis["priorities"].get(db_name, "low")
|
| 339 |
+
|
| 340 |
+
print(f"🔍 Recherche dans '{db_name}' (priorité: {priority})")
|
| 341 |
+
print(f" Requête: {query[:80]}...")
|
| 342 |
+
|
| 343 |
+
try:
|
| 344 |
+
documents = retriever.get_relevant_documents(query)
|
| 345 |
+
|
| 346 |
+
if documents:
|
| 347 |
+
print(f" ✅ {len(documents)} résultat(s) trouvé(s)")
|
| 348 |
+
|
| 349 |
+
results = []
|
| 350 |
+
for doc in documents:
|
| 351 |
+
results.append({
|
| 352 |
+
"content": doc.page_content,
|
| 353 |
+
"metadata": doc.metadata,
|
| 354 |
+
"score": getattr(doc, 'score', None)
|
| 355 |
+
})
|
| 356 |
+
|
| 357 |
+
collected_info.append({
|
| 358 |
+
"database": db_name,
|
| 359 |
+
"category": retriever.metadata["category"],
|
| 360 |
+
"query": query,
|
| 361 |
+
"priority": priority,
|
| 362 |
+
"results_count": len(results),
|
| 363 |
+
"results": results
|
| 364 |
+
})
|
| 365 |
+
else:
|
| 366 |
+
print(f" ℹ️ Aucun résultat")
|
| 367 |
+
|
| 368 |
+
except Exception as e:
|
| 369 |
+
error_msg = f"Erreur lors de la recherche dans '{db_name}': {str(e)}"
|
| 370 |
+
print(f" ❌ {error_msg}")
|
| 371 |
+
state["errors"].append(error_msg)
|
| 372 |
+
|
| 373 |
+
print(f"\n✅ Collecte terminée: {len(collected_info)} base(s) interrogée(s)\n")
|
| 374 |
+
|
| 375 |
+
state["collected_information"] = collected_info
|
| 376 |
+
state["messages"].append(AIMessage(
|
| 377 |
+
content=f"Collecte terminée depuis {len(collected_info)} bases Pinecone"
|
| 378 |
+
))
|
| 379 |
+
|
| 380 |
+
return state
|
| 381 |
+
|
| 382 |
+
# =============================================================================
|
| 383 |
+
# NODE 3: GÉNÉRATION DE LA RÉPONSE
|
| 384 |
+
# =============================================================================
|
| 385 |
+
|
| 386 |
+
def generate_response_node(state: AgentState) -> AgentState:
|
| 387 |
+
"""Node de génération de la réponse finale."""
|
| 388 |
+
print(f"\n{'='*80}")
|
| 389 |
+
print(f"✏️ NODE 3: GÉNÉRATION DE LA RÉPONSE")
|
| 390 |
+
print(f"{'='*80}\n")
|
| 391 |
+
|
| 392 |
+
llm = ChatOpenAI(
|
| 393 |
+
model=OPENAI_MODEL_NAME,
|
| 394 |
+
base_url=OPENAI_BASE_URL,
|
| 395 |
+
api_key=OPENAI_API_KEY,
|
| 396 |
+
temperature=0.3
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
context_parts = []
|
| 400 |
+
for info in state["collected_information"]:
|
| 401 |
+
context_parts.append(f"\n### Base: {info['database']} (Catégorie: {info['category']})")
|
| 402 |
+
context_parts.append(f"Requête: {info['query']}")
|
| 403 |
+
context_parts.append(f"Résultats: {info['results_count']}")
|
| 404 |
+
|
| 405 |
+
for idx, result in enumerate(info['results'], 1):
|
| 406 |
+
context_parts.append(f"\nRésultat {idx}:")
|
| 407 |
+
context_parts.append(f"Score: {result.get('score', 'N/A')}")
|
| 408 |
+
context_parts.append(f"Contenu: {result['content'][:500]}...")
|
| 409 |
+
if result['metadata']:
|
| 410 |
+
context_parts.append(f"Métadonnées: {json.dumps(result['metadata'], ensure_ascii=False)}")
|
| 411 |
+
|
| 412 |
+
context = "\n".join(context_parts)
|
| 413 |
+
|
| 414 |
+
generation_prompt = ChatPromptTemplate.from_messages([
|
| 415 |
+
("system", """Tu es un assistant expert de l'Université Gustave Eiffel spécialisé en Ville Durable.
|
| 416 |
+
|
| 417 |
+
RÈGLES STRICTES:
|
| 418 |
+
1. Base ta réponse EXCLUSIVEMENT sur les informations fournies dans le contexte Pinecone
|
| 419 |
+
2. Ne JAMAIS inventer ou extrapoler d'informations
|
| 420 |
+
3. Cite précisément les sources (nom de la base, catégorie Pinecone)
|
| 421 |
+
4. Si une information n'est pas dans les sources, indique-le clairement
|
| 422 |
+
5. Structure ta réponse de manière claire et professionnelle
|
| 423 |
+
6. Mentionne les métadonnées pertinentes (laboratoires, formations, auteurs, etc.)"""),
|
| 424 |
+
("human", """REQUÊTE UTILISATEUR:
|
| 425 |
+
{user_query}
|
| 426 |
+
|
| 427 |
+
CONTEXTE PINECONE (SOURCES VÉRIFIÉES):
|
| 428 |
+
{context}
|
| 429 |
+
|
| 430 |
+
Génère une réponse professionnelle basée uniquement sur ces sources.""")
|
| 431 |
+
])
|
| 432 |
+
|
| 433 |
+
generation_chain = generation_prompt | llm
|
| 434 |
+
|
| 435 |
+
try:
|
| 436 |
+
response = generation_chain.invoke({
|
| 437 |
+
"user_query": state["user_query"],
|
| 438 |
+
"context": context
|
| 439 |
+
})
|
| 440 |
+
|
| 441 |
+
final_response = response.content
|
| 442 |
+
print(f"✅ Réponse générée ({len(final_response)} caractères)\n")
|
| 443 |
+
|
| 444 |
+
state["final_response"] = final_response
|
| 445 |
+
state["messages"].append(AIMessage(content=final_response))
|
| 446 |
+
|
| 447 |
+
except Exception as e:
|
| 448 |
+
error_msg = f"Erreur lors de la génération: {str(e)}"
|
| 449 |
+
print(f"❌ {error_msg}")
|
| 450 |
+
state["errors"].append(error_msg)
|
| 451 |
+
state["final_response"] = f"Erreur lors de la génération de la réponse: {str(e)}"
|
| 452 |
+
|
| 453 |
+
return state
|
| 454 |
+
|
| 455 |
+
# =============================================================================
|
| 456 |
+
# NODE 4: VALIDATION ANTI-HALLUCINATION
|
| 457 |
+
# =============================================================================
|
| 458 |
+
|
| 459 |
+
def validate_response_node(state: AgentState) -> AgentState:
|
| 460 |
+
"""Node de validation anti-hallucination."""
|
| 461 |
+
print(f"\n{'='*80}")
|
| 462 |
+
print(f"✅ NODE 4: VALIDATION ANTI-HALLUCINATION")
|
| 463 |
+
print(f"{'='*80}")
|
| 464 |
+
|
| 465 |
+
iteration = state["iteration_count"] + 1
|
| 466 |
+
print(f"🔄 Itération {iteration}/{MAX_VALIDATION_LOOPS}\n")
|
| 467 |
+
|
| 468 |
+
llm = ChatOpenAI(
|
| 469 |
+
model=OPENAI_MODEL_NAME,
|
| 470 |
+
base_url=OPENAI_BASE_URL,
|
| 471 |
+
api_key=OPENAI_API_KEY,
|
| 472 |
+
temperature=0
|
| 473 |
+
)
|
| 474 |
+
|
| 475 |
+
validation_prompt = ChatPromptTemplate.from_messages([
|
| 476 |
+
("system", """Tu es un validateur strict pour l'Université Gustave Eiffel.
|
| 477 |
+
|
| 478 |
+
Vérifie que CHAQUE élément de la réponse est STRICTEMENT basé sur les sources Pinecone fournies.
|
| 479 |
+
|
| 480 |
+
Sois IMPITOYABLE: mieux vaut rejeter une bonne réponse que laisser passer une hallucination."""),
|
| 481 |
+
("human", """RÉPONSE À VALIDER:
|
| 482 |
+
{response}
|
| 483 |
+
|
| 484 |
+
SOURCES PINECONE (VÉRITÉ ABSOLUE):
|
| 485 |
+
{sources}
|
| 486 |
+
|
| 487 |
+
Réponds en JSON valide:
|
| 488 |
+
{{
|
| 489 |
+
"is_valid": true/false,
|
| 490 |
+
"confidence_score": 0-100,
|
| 491 |
+
"hallucinations_detected": ["liste précise des hallucinations"],
|
| 492 |
+
"missing_information": ["informations manquantes si dans sources"],
|
| 493 |
+
"incorrect_facts": ["faits incorrects ou mal attribués"],
|
| 494 |
+
"validation_message": "message détaillé avec recommandations"
|
| 495 |
+
}}""")
|
| 496 |
+
])
|
| 497 |
+
|
| 498 |
+
json_parser = JsonOutputParser()
|
| 499 |
+
validation_chain = validation_prompt | llm | json_parser
|
| 500 |
+
|
| 501 |
+
try:
|
| 502 |
+
sources_json = json.dumps(
|
| 503 |
+
state["collected_information"],
|
| 504 |
+
ensure_ascii=False,
|
| 505 |
+
indent=2
|
| 506 |
+
)
|
| 507 |
+
|
| 508 |
+
validation_result = validation_chain.invoke({
|
| 509 |
+
"response": state["final_response"],
|
| 510 |
+
"sources": sources_json
|
| 511 |
+
})
|
| 512 |
+
|
| 513 |
+
print(f"📊 Confiance: {validation_result['confidence_score']}%")
|
| 514 |
+
print(f"📊 Valide: {validation_result['is_valid']}")
|
| 515 |
+
|
| 516 |
+
if validation_result['hallucinations_detected']:
|
| 517 |
+
print(f"⚠️ Hallucinations détectées: {len(validation_result['hallucinations_detected'])}")
|
| 518 |
+
for hall in validation_result['hallucinations_detected']:
|
| 519 |
+
print(f" - {hall}")
|
| 520 |
+
else:
|
| 521 |
+
print(f"✅ Aucune hallucination détectée")
|
| 522 |
+
|
| 523 |
+
state["validation_results"].append(validation_result)
|
| 524 |
+
state["iteration_count"] = iteration
|
| 525 |
+
|
| 526 |
+
except Exception as e:
|
| 527 |
+
error_msg = f"Erreur lors de la validation: {str(e)}"
|
| 528 |
+
print(f"❌ {error_msg}")
|
| 529 |
+
state["errors"].append(error_msg)
|
| 530 |
+
|
| 531 |
+
validation_result = {
|
| 532 |
+
"is_valid": False,
|
| 533 |
+
"confidence_score": 0,
|
| 534 |
+
"hallucinations_detected": [f"Erreur de validation: {str(e)}"],
|
| 535 |
+
"missing_information": [],
|
| 536 |
+
"incorrect_facts": [],
|
| 537 |
+
"validation_message": "Erreur lors de la validation"
|
| 538 |
+
}
|
| 539 |
+
state["validation_results"].append(validation_result)
|
| 540 |
+
state["iteration_count"] = iteration
|
| 541 |
+
|
| 542 |
+
print()
|
| 543 |
+
return state
|
| 544 |
+
|
| 545 |
+
# =============================================================================
|
| 546 |
+
# NODE 5: REFINEMENT
|
| 547 |
+
# =============================================================================
|
| 548 |
+
|
| 549 |
+
def refine_response_node(state: AgentState) -> AgentState:
|
| 550 |
+
"""Node de refinement de la réponse."""
|
| 551 |
+
print(f"\n{'='*80}")
|
| 552 |
+
print(f"⚙️ NODE 5: REFINEMENT (CORRECTION)")
|
| 553 |
+
print(f"{'='*80}\n")
|
| 554 |
+
|
| 555 |
+
last_validation = state["validation_results"][-1]
|
| 556 |
+
|
| 557 |
+
print(f"🔧 Correction des problèmes détectés:")
|
| 558 |
+
print(f" - Hallucinations: {len(last_validation['hallucinations_detected'])}")
|
| 559 |
+
print(f" - Faits incorrects: {len(last_validation['incorrect_facts'])}")
|
| 560 |
+
print(f" - Infos manquantes: {len(last_validation['missing_information'])}\n")
|
| 561 |
+
|
| 562 |
+
llm = ChatOpenAI(
|
| 563 |
+
model=OPENAI_MODEL_NAME,
|
| 564 |
+
base_url=OPENAI_BASE_URL,
|
| 565 |
+
api_key=OPENAI_API_KEY,
|
| 566 |
+
temperature=0.2
|
| 567 |
+
)
|
| 568 |
+
|
| 569 |
+
refinement_prompt = ChatPromptTemplate.from_messages([
|
| 570 |
+
("system", """Tu es un correcteur expert pour l'Université Gustave Eiffel.
|
| 571 |
+
|
| 572 |
+
Corrige la réponse précédente en éliminant TOUTES les hallucinations et erreurs."""),
|
| 573 |
+
("human", """RÉPONSE PRÉCÉDENTE (AVEC ERREURS):
|
| 574 |
+
{previous_response}
|
| 575 |
+
|
| 576 |
+
PROBLÈMES DÉTECTÉS:
|
| 577 |
+
{validation_issues}
|
| 578 |
+
|
| 579 |
+
SOURCES PINECONE (VÉRITÉ ABSOLUE):
|
| 580 |
+
{sources}
|
| 581 |
+
|
| 582 |
+
Génère une réponse corrigée, précise et vérifiable.""")
|
| 583 |
+
])
|
| 584 |
+
|
| 585 |
+
refinement_chain = refinement_prompt | llm
|
| 586 |
+
|
| 587 |
+
try:
|
| 588 |
+
validation_issues = json.dumps({
|
| 589 |
+
"hallucinations": last_validation['hallucinations_detected'],
|
| 590 |
+
"incorrect_facts": last_validation['incorrect_facts'],
|
| 591 |
+
"missing_information": last_validation['missing_information'],
|
| 592 |
+
"validation_message": last_validation['validation_message']
|
| 593 |
+
}, ensure_ascii=False, indent=2)
|
| 594 |
+
|
| 595 |
+
sources_json = json.dumps(
|
| 596 |
+
state["collected_information"],
|
| 597 |
+
ensure_ascii=False,
|
| 598 |
+
indent=2
|
| 599 |
+
)
|
| 600 |
+
|
| 601 |
+
response = refinement_chain.invoke({
|
| 602 |
+
"previous_response": state["final_response"],
|
| 603 |
+
"validation_issues": validation_issues,
|
| 604 |
+
"sources": sources_json
|
| 605 |
+
})
|
| 606 |
+
|
| 607 |
+
refined_response = response.content
|
| 608 |
+
print(f"✅ Réponse corrigée générée ({len(refined_response)} caractères)\n")
|
| 609 |
+
|
| 610 |
+
state["final_response"] = refined_response
|
| 611 |
+
state["messages"].append(AIMessage(
|
| 612 |
+
content=f"Réponse corrigée (itération {state['iteration_count']})"
|
| 613 |
+
))
|
| 614 |
+
|
| 615 |
+
except Exception as e:
|
| 616 |
+
error_msg = f"Erreur lors du refinement: {str(e)}"
|
| 617 |
+
print(f"❌ {error_msg}")
|
| 618 |
+
state["errors"].append(error_msg)
|
| 619 |
+
|
| 620 |
+
return state
|
| 621 |
+
|
| 622 |
+
# =============================================================================
|
| 623 |
+
# NODE 6: COLLECTE D'INFORMATIONS SIMILAIRES
|
| 624 |
+
# =============================================================================
|
| 625 |
+
|
| 626 |
+
def collect_similar_information_node(state: AgentState) -> AgentState:
|
| 627 |
+
"""
|
| 628 |
+
Node de collecte d'informations similaires depuis les autres bases.
|
| 629 |
+
"""
|
| 630 |
+
print(f"\n{'='*80}")
|
| 631 |
+
print(f"🔗 NODE 6: COLLECTE D'INFORMATIONS SIMILAIRES")
|
| 632 |
+
print(f"{'='*80}\n")
|
| 633 |
+
|
| 634 |
+
# Catégories déjà utilisées
|
| 635 |
+
used_categories = [info["category"] for info in state["collected_information"]]
|
| 636 |
+
|
| 637 |
+
# Recherche dans les autres bases
|
| 638 |
+
print(f"🔍 Recherche d'informations similaires dans les bases non consultées...")
|
| 639 |
+
similar_info = retriever_manager.search_all_databases(
|
| 640 |
+
query=state["user_query"],
|
| 641 |
+
exclude_categories=used_categories
|
| 642 |
+
)
|
| 643 |
+
|
| 644 |
+
# Recherche aussi basée sur la réponse finale
|
| 645 |
+
if state.get("final_response"):
|
| 646 |
+
print(f"🔍 Recherche basée sur la réponse finale...")
|
| 647 |
+
response_based_info = retriever_manager.search_all_databases(
|
| 648 |
+
query=state["final_response"][:500], # Limiter la taille
|
| 649 |
+
exclude_categories=used_categories
|
| 650 |
+
)
|
| 651 |
+
|
| 652 |
+
# Fusionner et dédupliquer
|
| 653 |
+
for info in response_based_info:
|
| 654 |
+
if info not in similar_info:
|
| 655 |
+
similar_info.append(info)
|
| 656 |
+
|
| 657 |
+
print(f"✅ {len(similar_info)} information(s) similaire(s) trouvée(s)\n")
|
| 658 |
+
|
| 659 |
+
state["additional_information"] = similar_info
|
| 660 |
+
|
| 661 |
+
return state
|
| 662 |
+
|
| 663 |
+
# =============================================================================
|
| 664 |
+
# FONCTIONS DE ROUTAGE
|
| 665 |
+
# =============================================================================
|
| 666 |
+
|
| 667 |
+
def should_collect_information(state: AgentState) -> str:
|
| 668 |
+
if state.get("query_analysis") and state["query_analysis"].get("databases_to_query"):
|
| 669 |
+
return "collect"
|
| 670 |
+
return "end"
|
| 671 |
+
|
| 672 |
+
def should_generate_response(state: AgentState) -> str:
|
| 673 |
+
if state.get("collected_information") and len(state["collected_information"]) > 0:
|
| 674 |
+
return "generate"
|
| 675 |
+
return "end"
|
| 676 |
+
|
| 677 |
+
def should_validate(state: AgentState) -> str:
|
| 678 |
+
if state.get("final_response") and state["final_response"]:
|
| 679 |
+
return "validate"
|
| 680 |
+
return "end"
|
| 681 |
+
|
| 682 |
+
def should_refine_or_collect_similar(state: AgentState) -> str:
|
| 683 |
+
if not state.get("validation_results") or len(state["validation_results"]) == 0:
|
| 684 |
+
return "collect_similar"
|
| 685 |
+
|
| 686 |
+
last_validation = state["validation_results"][-1]
|
| 687 |
+
iteration = state["iteration_count"]
|
| 688 |
+
|
| 689 |
+
is_valid = last_validation.get("is_valid", False)
|
| 690 |
+
confidence = last_validation.get("confidence_score", 0)
|
| 691 |
+
|
| 692 |
+
if is_valid and confidence >= 85:
|
| 693 |
+
print(f"✅ Validation réussie (confiance: {confidence}%) - Collecte d'infos similaires\n")
|
| 694 |
+
return "collect_similar"
|
| 695 |
+
|
| 696 |
+
if iteration >= MAX_VALIDATION_LOOPS:
|
| 697 |
+
print(f"⚠️ Nombre maximum d'itérations atteint ({MAX_VALIDATION_LOOPS}) - Collecte d'infos similaires\n")
|
| 698 |
+
return "collect_similar"
|
| 699 |
+
|
| 700 |
+
print(f"🔄 Refinement nécessaire (confiance: {confidence}%, itération {iteration}/{MAX_VALIDATION_LOOPS})\n")
|
| 701 |
+
return "refine"
|
| 702 |
+
|
| 703 |
+
# =============================================================================
|
| 704 |
+
# CONSTRUCTION DU WORKFLOW
|
| 705 |
+
# =============================================================================
|
| 706 |
+
|
| 707 |
+
def create_agent_workflow() -> StateGraph:
|
| 708 |
+
"""Crée et configure le workflow LangGraph complet."""
|
| 709 |
+
print("\n🗺️ Construction du workflow LangGraph...")
|
| 710 |
+
|
| 711 |
+
workflow = StateGraph(AgentState)
|
| 712 |
+
|
| 713 |
+
workflow.add_node("analyze_query", analyze_query_node)
|
| 714 |
+
workflow.add_node("collect_information", collect_information_node)
|
| 715 |
+
workflow.add_node("generate_response", generate_response_node)
|
| 716 |
+
workflow.add_node("validate_response", validate_response_node)
|
| 717 |
+
workflow.add_node("refine_response", refine_response_node)
|
| 718 |
+
workflow.add_node("collect_similar_information", collect_similar_information_node)
|
| 719 |
+
|
| 720 |
+
workflow.set_entry_point("analyze_query")
|
| 721 |
+
|
| 722 |
+
workflow.add_conditional_edges(
|
| 723 |
+
"analyze_query",
|
| 724 |
+
should_collect_information,
|
| 725 |
+
{
|
| 726 |
+
"collect": "collect_information",
|
| 727 |
+
"end": END
|
| 728 |
+
}
|
| 729 |
+
)
|
| 730 |
+
|
| 731 |
+
workflow.add_conditional_edges(
|
| 732 |
+
"collect_information",
|
| 733 |
+
should_generate_response,
|
| 734 |
+
{
|
| 735 |
+
"generate": "generate_response",
|
| 736 |
+
"end": END
|
| 737 |
+
}
|
| 738 |
+
)
|
| 739 |
+
|
| 740 |
+
workflow.add_conditional_edges(
|
| 741 |
+
"generate_response",
|
| 742 |
+
should_validate,
|
| 743 |
+
{
|
| 744 |
+
"validate": "validate_response",
|
| 745 |
+
"end": END
|
| 746 |
+
}
|
| 747 |
+
)
|
| 748 |
+
|
| 749 |
+
workflow.add_conditional_edges(
|
| 750 |
+
"validate_response",
|
| 751 |
+
should_refine_or_collect_similar,
|
| 752 |
+
{
|
| 753 |
+
"refine": "refine_response",
|
| 754 |
+
"collect_similar": "collect_similar_information"
|
| 755 |
+
}
|
| 756 |
+
)
|
| 757 |
+
|
| 758 |
+
workflow.add_edge("refine_response", "validate_response")
|
| 759 |
+
workflow.add_edge("collect_similar_information", END)
|
| 760 |
+
|
| 761 |
+
#memory = MemorySaver()
|
| 762 |
+
#app = workflow.compile(checkpointer=memory)
|
| 763 |
+
app = workflow.compile()
|
| 764 |
+
|
| 765 |
+
|
| 766 |
+
print("✅ Workflow LangGraph construit avec succès\n")
|
| 767 |
+
|
| 768 |
+
return app
|
| 769 |
+
|
| 770 |
+
# =============================================================================
|
| 771 |
+
# FONCTION D'EXÉCUTION
|
| 772 |
+
# =============================================================================
|
| 773 |
+
|
| 774 |
+
async def run_collaborative_agent(user_query: str) -> Dict[str, Any]:
|
| 775 |
+
"""Exécute le workflow complet de l'agent collaboratif."""
|
| 776 |
+
print(f"\n{'='*80}")
|
| 777 |
+
print(f"🚀 AGENT COLLABORATIF - UNIVERSITÉ GUSTAVE EIFFEL")
|
| 778 |
+
print(f"{'='*80}")
|
| 779 |
+
print(f"🔍 Requête: {user_query}\n")
|
| 780 |
+
|
| 781 |
+
app = create_agent_workflow()
|
| 782 |
+
|
| 783 |
+
initial_state = {
|
| 784 |
+
"messages": [HumanMessage(content=user_query)],
|
| 785 |
+
"user_query": user_query,
|
| 786 |
+
"query_analysis": {},
|
| 787 |
+
"collected_information": [],
|
| 788 |
+
"validation_results": [],
|
| 789 |
+
"final_response": "",
|
| 790 |
+
"iteration_count": 0,
|
| 791 |
+
"errors": [],
|
| 792 |
+
"additional_information": []
|
| 793 |
+
}
|
| 794 |
+
|
| 795 |
+
print(f"{'='*80}")
|
| 796 |
+
print(f"⚙️ EXÉCUTION DU WORKFLOW")
|
| 797 |
+
print(f"{'='*80}\n")
|
| 798 |
+
|
| 799 |
+
try:
|
| 800 |
+
final_state = await app.ainvoke(initial_state)
|
| 801 |
+
|
| 802 |
+
print(f"\n{'='*80}")
|
| 803 |
+
print(f"✨ PROCESSUS TERMINÉ")
|
| 804 |
+
print(f"{'='*80}\n")
|
| 805 |
+
|
| 806 |
+
result = {
|
| 807 |
+
"query": user_query,
|
| 808 |
+
"query_analysis": final_state.get("query_analysis", {}),
|
| 809 |
+
"collected_information": final_state.get("collected_information", []),
|
| 810 |
+
"validation_results": final_state.get("validation_results", []),
|
| 811 |
+
"final_response": final_state.get("final_response", ""),
|
| 812 |
+
"iteration_count": final_state.get("iteration_count", 0),
|
| 813 |
+
"errors": final_state.get("errors", []),
|
| 814 |
+
"additional_information": final_state.get("additional_information", []),
|
| 815 |
+
"sources_used": [
|
| 816 |
+
info["database"]
|
| 817 |
+
for info in final_state.get("collected_information", [])
|
| 818 |
+
],
|
| 819 |
+
"pinecone_index": PINECONE_INDEX_NAME
|
| 820 |
+
}
|
| 821 |
+
|
| 822 |
+
return result
|
| 823 |
+
|
| 824 |
+
except Exception as e:
|
| 825 |
+
error_msg = f"Erreur lors de l'exécution du workflow: {str(e)}"
|
| 826 |
+
print(f"\n❌ {error_msg}\n")
|
| 827 |
+
|
| 828 |
+
return {
|
| 829 |
+
"query": user_query,
|
| 830 |
+
"query_analysis": {},
|
| 831 |
+
"collected_information": [],
|
| 832 |
+
"validation_results": [],
|
| 833 |
+
"final_response": f"Erreur: {error_msg}",
|
| 834 |
+
"iteration_count": 0,
|
| 835 |
+
"errors": [error_msg],
|
| 836 |
+
"additional_information": [],
|
| 837 |
+
"sources_used": [],
|
| 838 |
+
"pinecone_index": PINECONE_INDEX_NAME
|
| 839 |
+
}
|
| 840 |
+
|
| 841 |
+
# =============================================================================
|
| 842 |
+
# FONCTION D'AFFICHAGE DES RÉSULTATS
|
| 843 |
+
# =============================================================================
|
| 844 |
+
|
| 845 |
+
def display_results(result: Dict[str, Any]) -> None:
|
| 846 |
+
"""
|
| 847 |
+
Affiche les résultats de manière formatée et lisible.
|
| 848 |
+
|
| 849 |
+
Args:
|
| 850 |
+
result: Dictionnaire des résultats du workflow
|
| 851 |
+
"""
|
| 852 |
+
print(f"\n{'='*80}")
|
| 853 |
+
print(f"📋 RÉPONSE FINALE")
|
| 854 |
+
print(f"{'='*80}")
|
| 855 |
+
print(result["final_response"])
|
| 856 |
+
|
| 857 |
+
print(f"\n{'='*80}")
|
| 858 |
+
print(f"📊 MÉTADONNÉES DU TRAITEMENT")
|
| 859 |
+
print(f"{'='*80}")
|
| 860 |
+
print(f"🗄️ Index Pinecone: {result['pinecone_index']}")
|
| 861 |
+
print(f"📚 Sources consultées: {', '.join(result['sources_used']) if result['sources_used'] else 'Aucune'}")
|
| 862 |
+
print(f"🔄 Itérations de validation: {result['iteration_count']}")
|
| 863 |
+
|
| 864 |
+
if result['validation_results']:
|
| 865 |
+
last_validation = result['validation_results'][-1]
|
| 866 |
+
print(f"✅ Score de confiance final: {last_validation.get('confidence_score', 0)}%")
|
| 867 |
+
print(f"✅ Validation finale: {'Réussie' if last_validation.get('is_valid') else 'Échouée'}")
|
| 868 |
+
|
| 869 |
+
hallucinations = last_validation.get('hallucinations_detected', [])
|
| 870 |
+
print(f"⚠️ Hallucinations détectées: {len(hallucinations)}")
|
| 871 |
+
|
| 872 |
+
if hallucinations:
|
| 873 |
+
print(f"\n⚠️ HALLUCINATIONS CORRIGÉES:")
|
| 874 |
+
for i, hall in enumerate(hallucinations, 1):
|
| 875 |
+
print(f" {i}. {hall}")
|
| 876 |
+
|
| 877 |
+
if result['errors']:
|
| 878 |
+
print(f"\n❌ ERREURS RENCONTRÉES:")
|
| 879 |
+
for i, error in enumerate(result['errors'], 1):
|
| 880 |
+
print(f" {i}. {error}")
|
| 881 |
+
|
| 882 |
+
print(f"\n{'='*80}")
|
| 883 |
+
print(f"📈 DÉTAILS DE LA COLLECTE")
|
| 884 |
+
print(f"{'='*80}")
|
| 885 |
+
for info in result['collected_information']:
|
| 886 |
+
print(f"\n📦 Base: {info['database']}")
|
| 887 |
+
print(f" Catégorie: {info['category']}")
|
| 888 |
+
print(f" Priorité: {info['priority']}")
|
| 889 |
+
print(f" Résultats: {info['results_count']}")
|
| 890 |
+
print(f" Requête: {info['query'][:80]}...")
|
| 891 |
+
|
| 892 |
+
# Nouvelle section : Informations similaires
|
| 893 |
+
if result.get('additional_information') and len(result['additional_information']) > 0:
|
| 894 |
+
print(f"\n{'='*80}")
|
| 895 |
+
print(f"💡 LES INFORMATIONS QUI AURAIENT PU VOUS INTÉRESSER")
|
| 896 |
+
print(f"{'='*80}")
|
| 897 |
+
print(f"\nInformations similaires ou apparentées trouvées dans d'autres bases:\n")
|
| 898 |
+
|
| 899 |
+
# Regrouper par base de données
|
| 900 |
+
grouped_info = {}
|
| 901 |
+
for info in result['additional_information']:
|
| 902 |
+
db_name = info['database']
|
| 903 |
+
if db_name not in grouped_info:
|
| 904 |
+
grouped_info[db_name] = []
|
| 905 |
+
grouped_info[db_name].append(info)
|
| 906 |
+
|
| 907 |
+
# Afficher par base
|
| 908 |
+
for db_name, items in grouped_info.items():
|
| 909 |
+
print(f"\n{'─'*80}")
|
| 910 |
+
print(f"📚 Base: {db_name.upper()}")
|
| 911 |
+
print(f" Catégorie Pinecone: {items[0]['category']}")
|
| 912 |
+
print(f" Nombre de résultats: {len(items)}")
|
| 913 |
+
print(f"{'─'*80}\n")
|
| 914 |
+
|
| 915 |
+
for idx, item in enumerate(items, 1):
|
| 916 |
+
print(f" Résultat {idx}:")
|
| 917 |
+
print(f" ├─ Score de similarité: {item['score']:.4f}" if item.get('score') else " ├─ Score: N/A")
|
| 918 |
+
|
| 919 |
+
# Affichage du contenu (limité)
|
| 920 |
+
content_preview = item['content'][:300]
|
| 921 |
+
if len(item['content']) > 300:
|
| 922 |
+
content_preview += "..."
|
| 923 |
+
print(f" ├─ Contenu: {content_preview}")
|
| 924 |
+
|
| 925 |
+
# Affichage des métadonnées détaillées
|
| 926 |
+
if item.get('metadata'):
|
| 927 |
+
metadata = item['metadata']
|
| 928 |
+
print(f" └─ Sources complètes:")
|
| 929 |
+
|
| 930 |
+
# Extraire et afficher les métadonnées pertinentes
|
| 931 |
+
if 'titre' in metadata or 'title' in metadata:
|
| 932 |
+
titre = metadata.get('titre') or metadata.get('title')
|
| 933 |
+
print(f" • Titre: {titre}")
|
| 934 |
+
|
| 935 |
+
if 'laboratoire' in metadata:
|
| 936 |
+
print(f" • Laboratoire: {metadata['laboratoire']}")
|
| 937 |
+
|
| 938 |
+
if 'formation' in metadata:
|
| 939 |
+
print(f" • Formation: {metadata['formation']}")
|
| 940 |
+
|
| 941 |
+
if 'auteur' in metadata or 'auteurs' in metadata or 'authors' in metadata:
|
| 942 |
+
auteurs = metadata.get('auteur') or metadata.get('auteurs') or metadata.get('authors')
|
| 943 |
+
print(f" • Auteur(s): {auteurs}")
|
| 944 |
+
|
| 945 |
+
if 'date' in metadata or 'annee' in metadata or 'year' in metadata:
|
| 946 |
+
date = metadata.get('date') or metadata.get('annee') or metadata.get('year')
|
| 947 |
+
print(f" • Date/Année: {date}")
|
| 948 |
+
|
| 949 |
+
if 'thematique' in metadata or 'thematiques' in metadata:
|
| 950 |
+
them = metadata.get('thematique') or metadata.get('thematiques')
|
| 951 |
+
print(f" • Thématique(s): {them}")
|
| 952 |
+
|
| 953 |
+
if 'niveau' in metadata:
|
| 954 |
+
print(f" • Niveau: {metadata['niveau']}")
|
| 955 |
+
|
| 956 |
+
if 'competences' in metadata:
|
| 957 |
+
print(f" • Compétences: {metadata['competences']}")
|
| 958 |
+
|
| 959 |
+
if 'equipements' in metadata:
|
| 960 |
+
print(f" • Équipements: {metadata['equipements']}")
|
| 961 |
+
|
| 962 |
+
if 'axe_recherche' in metadata:
|
| 963 |
+
print(f" • Axe de recherche: {metadata['axe_recherche']}")
|
| 964 |
+
|
| 965 |
+
if 'partenaires' in metadata or 'collaborations' in metadata:
|
| 966 |
+
part = metadata.get('partenaires') or metadata.get('collaborations')
|
| 967 |
+
print(f" • Partenaires/Collaborations: {part}")
|
| 968 |
+
|
| 969 |
+
if 'url' in metadata or 'lien' in metadata:
|
| 970 |
+
url = metadata.get('url') or metadata.get('lien')
|
| 971 |
+
print(f" • Lien: {url}")
|
| 972 |
+
|
| 973 |
+
if 'doi' in metadata:
|
| 974 |
+
print(f" • DOI: {metadata['doi']}")
|
| 975 |
+
|
| 976 |
+
if 'source' in metadata:
|
| 977 |
+
print(f" • Source document: {metadata['source']}")
|
| 978 |
+
|
| 979 |
+
# Métadonnées additionnelles
|
| 980 |
+
displayed_keys = ['titre', 'title', 'laboratoire', 'formation', 'auteur', 'auteurs',
|
| 981 |
+
'authors', 'date', 'annee', 'year', 'thematique', 'thematiques',
|
| 982 |
+
'niveau', 'competences', 'equipements', 'axe_recherche',
|
| 983 |
+
'partenaires', 'collaborations', 'url', 'lien', 'doi', 'source',
|
| 984 |
+
'categorie', 'text']
|
| 985 |
+
|
| 986 |
+
other_metadata = {k: v for k, v in metadata.items() if k not in displayed_keys}
|
| 987 |
+
if other_metadata:
|
| 988 |
+
print(f" • Autres informations: {json.dumps(other_metadata, ensure_ascii=False, indent=8)}")
|
| 989 |
+
|
| 990 |
+
print() # Ligne vide entre les résultats
|
| 991 |
+
|
| 992 |
+
print(f"\n{'='*80}")
|
| 993 |
+
print(f"💬 INTERPRÉTATION DES RÉSULTATS SIMILAIRES")
|
| 994 |
+
print(f"{'='*80}")
|
| 995 |
+
print("Ces informations proviennent de bases qui n'ont pas été prioritaires pour")
|
| 996 |
+
print("votre requête initiale, mais qui contiennent des éléments apparentés.")
|
| 997 |
+
print("Elles peuvent enrichir votre compréhension du sujet ou vous orienter")
|
| 998 |
+
print("vers des domaines connexes intéressants.\n")
|
| 999 |
+
|
| 1000 |
+
# =============================================================================
|
| 1001 |
+
# FONCTION MAIN
|
| 1002 |
+
# =============================================================================
|
| 1003 |
+
|
| 1004 |
+
async def main():
|
| 1005 |
+
"""Fonction principale de l'application."""
|
| 1006 |
+
|
| 1007 |
+
exemples_requetes = [
|
| 1008 |
+
"Quels sont les laboratoires de l'université Gustave Eiffel travaillant sur la mobilité urbaine durable?",
|
| 1009 |
+
"Je cherche des formations en master sur l'aménagement urbain et le développement durable",
|
| 1010 |
+
"Quels laboratoires ont des axes de recherche similaires en énergie et pourraient collaborer?",
|
| 1011 |
+
"Liste les équipements disponibles dans les laboratoires travaillant sur la qualité de l'air",
|
| 1012 |
+
"Trouve des publications récentes sur la transition énergétique dans les villes",
|
| 1013 |
+
"Qui sont les auteurs qui publient sur la mobilité douce et dans quels laboratoires?",
|
| 1014 |
+
"Quelles publications traitent de l'urbanisme durable et quand ont-elles été publiées?",
|
| 1015 |
+
"Compare les formations et les laboratoires sur le thème de la ville intelligente",
|
| 1016 |
+
"Identifie les opportunités de partenariats entre laboratoires sur la résilience urbaine",
|
| 1017 |
+
"Quelles sont les compétences enseignées dans les formations liées à l'économie circulaire?"
|
| 1018 |
+
]
|
| 1019 |
+
|
| 1020 |
+
print(f"\n{'='*80}")
|
| 1021 |
+
print(f"🎓 AGENT COLLABORATIF - UNIVERSITÉ GUSTAVE EIFFEL")
|
| 1022 |
+
print(f"{'='*80}")
|
| 1023 |
+
print(f"🗄️ Index Pinecone: {PINECONE_INDEX_NAME}")
|
| 1024 |
+
print(f"🤖 Modèle: {OPENAI_MODEL_NAME}")
|
| 1025 |
+
print(f"🌐 Base URL: {OPENAI_BASE_URL}")
|
| 1026 |
+
print(f"🤗 Embeddings: {HUGGINGFACE_MODEL}")
|
| 1027 |
+
print(f"🔄 Max itérations: {MAX_VALIDATION_LOOPS}")
|
| 1028 |
+
print(f"🎯 Top K résultats: {SIMILARITY_TOP_K}")
|
| 1029 |
+
print(f"📊 Seuil de similarité: {SIMILARITY_SCORE_THRESHOLD}")
|
| 1030 |
+
print(f"{'='*80}\n")
|
| 1031 |
+
|
| 1032 |
+
print("📚 EXEMPLES DE REQUÊTES DISPONIBLES:")
|
| 1033 |
+
print("="*80)
|
| 1034 |
+
for i, req in enumerate(exemples_requetes, 1):
|
| 1035 |
+
print(f"{i:2d}. {req}")
|
| 1036 |
+
print("="*80 + "\n")
|
| 1037 |
+
|
| 1038 |
+
selected_query = exemples_requetes[0]
|
| 1039 |
+
|
| 1040 |
+
print(f"🎯 Requête sélectionnée: {selected_query}\n")
|
| 1041 |
+
|
| 1042 |
+
result = await run_collaborative_agent(selected_query)
|
| 1043 |
+
|
| 1044 |
+
display_results(result)
|
| 1045 |
+
|
| 1046 |
+
print(f"\n{'='*80}")
|
| 1047 |
+
print(f"✅ TRAITEMENT TERMINÉ AVEC SUCCÈS")
|
| 1048 |
+
print(f"{'='*80}\n")
|
| 1049 |
+
|
| 1050 |
+
return result
|
| 1051 |
+
|
| 1052 |
+
# =============================================================================
|
| 1053 |
+
# POINT D'ENTRÉE DU SCRIPT
|
| 1054 |
+
# =============================================================================
|
| 1055 |
+
|
| 1056 |
+
if __name__ == "__main__":
|
| 1057 |
+
"""
|
| 1058 |
+
Point d'entrée principal du script.
|
| 1059 |
+
|
| 1060 |
+
Configuration requise:
|
| 1061 |
+
1. Variables d'environnement:
|
| 1062 |
+
export PINECONE_API_KEY="votre-clé-pinecone"
|
| 1063 |
+
export OPENAI_API_KEY="votre-clé-openai"
|
| 1064 |
+
export OPENAI_BASE_URL="https://votre-endpoint.com/v1" # Optionnel
|
| 1065 |
+
export OPENAI_MODEL_NAME="gpt-4" # Optionnel
|
| 1066 |
+
export HUGGINGFACE_MODEL="sentence-transformers/all-mpnet-base-v2" # Optionnel
|
| 1067 |
+
|
| 1068 |
+
2. Dépendances:
|
| 1069 |
+
pip install langgraph langchain langchain-pinecone langchain-openai pinecone-client sentence-transformers
|
| 1070 |
+
|
| 1071 |
+
3. Structure Pinecone:
|
| 1072 |
+
- Index: "all-jdlp"
|
| 1073 |
+
- Dimension: compatible avec le modèle HuggingFace (ex: 768)
|
| 1074 |
+
- Métrique: cosine
|
| 1075 |
+
- Catégories: FICHELABOTHEMATIQUEAVID, FORMATIONTHEMATIQUEAVID,
|
| 1076 |
+
RECHERCHETHEMATIQUEAVID, PUBLICATIONTHEMATIQUEAVID
|
| 1077 |
+
|
| 1078 |
+
Utilisation:
|
| 1079 |
+
- Développement: python script.py
|
| 1080 |
+
- Production: Intégrer dans une API FastAPI/Flask
|
| 1081 |
+
- Tests: pytest script.py --asyncio-mode=auto
|
| 1082 |
+
"""
|
| 1083 |
+
|
| 1084 |
+
asyncio.run(main())
|
chainlit.md
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Welcome to Chainlit! 🚀🤖
|
| 2 |
+
|
| 3 |
+
Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
|
| 4 |
+
|
| 5 |
+
## Useful Links 🔗
|
| 6 |
+
|
| 7 |
+
- **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
|
| 8 |
+
- **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
|
| 9 |
+
|
| 10 |
+
We can't wait to see what you create with Chainlit! Happy coding! 💻😊
|
| 11 |
+
|
| 12 |
+
## Welcome screen
|
| 13 |
+
|
| 14 |
+
To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
|
requirements.txt
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# =============================================================================
|
| 2 |
+
# REQUIREMENTS - Agent Collaboratif UGE avec Chainlit 2.8.1
|
| 3 |
+
# =============================================================================
|
| 4 |
+
|
| 5 |
+
# -----------------------------------------------------------------------------
|
| 6 |
+
# Framework Chainlit
|
| 7 |
+
# -----------------------------------------------------------------------------
|
| 8 |
+
chainlit==2.8.1
|
| 9 |
+
literalai>=0.0.500 # Pour le Data Layer officiel
|
| 10 |
+
|
| 11 |
+
# -----------------------------------------------------------------------------
|
| 12 |
+
# LangChain Ecosystem
|
| 13 |
+
# -----------------------------------------------------------------------------
|
| 14 |
+
langchain>=0.1.0
|
| 15 |
+
langchain-openai>=0.0.5
|
| 16 |
+
langchain-pinecone>=0.0.3
|
| 17 |
+
langchain-core>=0.1.23
|
| 18 |
+
langgraph>=0.0.25
|
| 19 |
+
|
| 20 |
+
# -----------------------------------------------------------------------------
|
| 21 |
+
# Vector Stores & Embeddings
|
| 22 |
+
# -----------------------------------------------------------------------------
|
| 23 |
+
#pinecone-client>=3.0.0
|
| 24 |
+
pinecone
|
| 25 |
+
sentence-transformers>=2.3.1
|
| 26 |
+
torch>=2.1.0 # Requis pour sentence-transformers
|
| 27 |
+
numpy>=1.24.0
|
| 28 |
+
|
| 29 |
+
# -----------------------------------------------------------------------------
|
| 30 |
+
# LLM Providers
|
| 31 |
+
# -----------------------------------------------------------------------------
|
| 32 |
+
openai>=1.12.0
|
| 33 |
+
|
| 34 |
+
# -----------------------------------------------------------------------------
|
| 35 |
+
# Monitoring & Observability
|
| 36 |
+
# -----------------------------------------------------------------------------
|
| 37 |
+
langsmith>=0.1.0
|
| 38 |
+
python-dotenv>=1.0.0
|
| 39 |
+
|
| 40 |
+
# -----------------------------------------------------------------------------
|
| 41 |
+
# Database & Data Layer
|
| 42 |
+
# -----------------------------------------------------------------------------
|
| 43 |
+
psycopg2-binary>=2.9.9 # PostgreSQL adapter
|
| 44 |
+
supabase>=2.3.0 # Supabase Python client
|
| 45 |
+
SQLAlchemy>=2.0.0 # ORM pour manipulation DB
|
| 46 |
+
|
| 47 |
+
# -----------------------------------------------------------------------------
|
| 48 |
+
# Async & Utilities
|
| 49 |
+
# -----------------------------------------------------------------------------
|
| 50 |
+
asyncio>=3.4.3
|
| 51 |
+
aiohttp>=3.9.0
|
| 52 |
+
python-multipart>=0.0.6 # Pour les uploads de fichiers
|
| 53 |
+
|
| 54 |
+
# -----------------------------------------------------------------------------
|
| 55 |
+
# Data Processing
|
| 56 |
+
# -----------------------------------------------------------------------------
|
| 57 |
+
pydantic>=2.6.0
|
| 58 |
+
python-json-logger>=2.0.7
|
| 59 |
+
|
| 60 |
+
# -----------------------------------------------------------------------------
|
| 61 |
+
# Development & Testing
|
| 62 |
+
# -----------------------------------------------------------------------------
|
| 63 |
+
pytest>=7.4.0
|
| 64 |
+
pytest-asyncio>=0.21.0
|
| 65 |
+
black>=24.1.0
|
| 66 |
+
flake8>=7.0.0
|
| 67 |
+
mypy>=1.8.0
|
| 68 |
+
|
| 69 |
+
# -----------------------------------------------------------------------------
|
| 70 |
+
# Production
|
| 71 |
+
# -----------------------------------------------------------------------------
|
| 72 |
+
uvicorn[standard]>=0.27.0
|
| 73 |
+
gunicorn>=21.2.0
|
| 74 |
+
watchfiles>=0.21.0 # Pour le hot-reload en dev
|