Spaces:
Sleeping
Sleeping
Ludovic Moncla
commited on
Commit
·
7ef218e
1
Parent(s):
b6fb973
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,9 +14,15 @@ ner = pipeline("token-classification", model="GEODE/camembert-base-edda-span-cla
|
|
| 14 |
placename_classifier = pipeline("text-classification", model="GEODE/bert-base-multilingual-cased-classification-ner", truncation=True)
|
| 15 |
relation_classifier = pipeline("text-classification", model="GEODE/bert-base-multilingual-cased-classification-relation", truncation=True)
|
| 16 |
generator = pipeline("text2text-generation", model="GEODE/mt5-small-coords-norm", truncation=True)
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
def build_visualization(graph, height="1000px", width="100%"):
|
| 22 |
"""Convert the rdflib graph into a pyvis network graph for visualization"""
|
|
@@ -70,13 +76,13 @@ def run_pipeline(text):
|
|
| 70 |
placenames = []
|
| 71 |
relations = []
|
| 72 |
|
| 73 |
-
|
| 74 |
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
|
| 75 |
g = Graph()
|
| 76 |
g.bind("rdf", RDF)
|
| 77 |
g.bind("rdfs", RDFS)
|
| 78 |
g.bind("xsd", XSD)
|
| 79 |
-
g.bind("
|
| 80 |
g.bind("skos", SKOS)
|
| 81 |
|
| 82 |
entity_uris = {}
|
|
@@ -105,31 +111,31 @@ def run_pipeline(text):
|
|
| 105 |
span['uri'] = uri_entity
|
| 106 |
span['label'] = label[0]['label']
|
| 107 |
entity_uris[word] = uri_entity
|
| 108 |
-
add_triplet(g,
|
| 109 |
-
|
| 110 |
if word.startswith("l'"):
|
| 111 |
word = word[2:]
|
| 112 |
-
add_triplet(g,
|
| 113 |
if span['entity_group'] == 'Latlong':
|
| 114 |
latlong = True
|
| 115 |
|
| 116 |
|
| 117 |
entity_uris[head] = uri_article
|
| 118 |
-
add_triplet(g,
|
| 119 |
|
| 120 |
if aliases:
|
| 121 |
if len(aliases) > 0:
|
| 122 |
-
add_triplet(g,
|
| 123 |
for alias in aliases[1:]:
|
| 124 |
-
add_triplet(g,
|
| 125 |
else:
|
| 126 |
-
add_triplet(g,
|
| 127 |
|
| 128 |
if latlong:
|
| 129 |
predicted_coordinates_from_pipeline = generator(text, max_length=128)
|
| 130 |
coords = dms_to_dd(predicted_coordinates_from_pipeline[0]['generated_text'])
|
| 131 |
-
add_triplet(g,
|
| 132 |
-
add_triplet(g,
|
| 133 |
|
| 134 |
# filter out non NP_Spatial or Relation spans
|
| 135 |
filtered_spans = [span for span in spans if span['entity_group'] in ['NP_Spatial', 'Relation']]
|
|
@@ -142,16 +148,15 @@ def run_pipeline(text):
|
|
| 142 |
relations.append({'relation': word, 'label': label[0]['label']})
|
| 143 |
|
| 144 |
if idx > 0 and idx < len(filtered_spans) - 1:
|
| 145 |
-
link_to_subject_object(g, place_type[0]['label'], uri_article, label[0]['label'],word, filtered_spans[idx-1], filtered_spans[idx+1], statement_uris,
|
| 146 |
|
| 147 |
-
#TODO: add triplet like ville de France: NC_Spatial + de + NP_Spatial (type Pays ou Region)
|
| 148 |
span_object = pattern_starting_article(text, spans)
|
| 149 |
if span_object is not None:
|
| 150 |
-
stmt_uri =
|
| 151 |
statement_uris["de"] = stmt_uri
|
| 152 |
-
add_triplet(g, stmt_uri, RDF.subject,
|
| 153 |
-
add_triplet(g, stmt_uri, RDF.object,
|
| 154 |
-
add_triplet(g, stmt_uri, RDF.predicate,
|
| 155 |
|
| 156 |
|
| 157 |
img_path, html_path = create_graph_viz(g)
|
|
@@ -166,9 +171,9 @@ def run_pipeline(text):
|
|
| 166 |
|
| 167 |
|
| 168 |
examples = [
|
|
|
|
| 169 |
"* AACH ou ACH, s. f. petite ville d'Allemagne dans le cercle de Souabe, près de la source de l'Aach. Long. 26. 57. lat. 47. 55.",
|
| 170 |
-
"
|
| 171 |
-
"* ARÉQUIPE, ou ARIQUIPA, (Géog.) ville de l'Amérique méridion. dans le Pérou, sur une riviere, dans un terrein fertile. Long. 308. lat. mérid. 16. 40.",
|
| 172 |
"* AUTAN-KELURAN, (Géog.) ville du Turquestan. Long. 110d. & lat. 46. 45. selon Uluhbeg ; & long. 116. & lat. 45. selon Nassiredden.",
|
| 173 |
"Boston ; c'est le nom qu'on a donné à la ville capitale de la nouvelle Angleterre, dans l'Amérique septentrionale ; elle est grande & a un très-bon port. Lat. 42 degrés, 20 minutes ; long. 306 degrés, 50 & quelques minutes.",
|
| 174 |
]
|
|
@@ -210,7 +215,6 @@ with gr.Blocks() as demo:
|
|
| 210 |
|
| 211 |
out_places = gr.Textbox(label="Placenames and their type (City/River/Mountain/Other...)", lines=4)
|
| 212 |
out_relations = gr.Textbox(label="Spatial relations and their type (Adjacency/Orientation/Distance/Other...)", lines=4)
|
| 213 |
-
#out_map = gr.Map(label="Coordinates on Map")
|
| 214 |
run_btn.click(fn=run_pipeline, inputs=inp, outputs=[img_output, html_output, out_type_entry, out_place_type, out_cardinality, out_places, out_relations])
|
| 215 |
|
| 216 |
|
|
|
|
| 14 |
placename_classifier = pipeline("text-classification", model="GEODE/bert-base-multilingual-cased-classification-ner", truncation=True)
|
| 15 |
relation_classifier = pipeline("text-classification", model="GEODE/bert-base-multilingual-cased-classification-relation", truncation=True)
|
| 16 |
generator = pipeline("text2text-generation", model="GEODE/mt5-small-coords-norm", truncation=True)
|
| 17 |
+
'''
|
| 18 |
+
entry_type_classifier = pipeline("text-classification", model="no-name-research/multilingual-bert-entry-type-classifier", truncation=True)
|
| 19 |
+
place_type_classifier = pipeline("text-classification", model="no-name-research/multilingual-bert-place-type-classifier", truncation=True)
|
| 20 |
+
cardinality_classifier = pipeline("text-classification", model="no-name-research/multilingual-bert-cardinality-classifier", truncation=True)
|
| 21 |
+
ner = pipeline("token-classification", model="no-name-research/camembert-token-classification", aggregation_strategy="simple")
|
| 22 |
+
placename_classifier = pipeline("text-classification", model="no-name-research/multilingual-bert-place-ner-classifier", truncation=True)
|
| 23 |
+
relation_classifier = pipeline("text-classification", model="no-name-research/multilingual-bert-spatial-relations-classifier", truncation=True)
|
| 24 |
+
generator = pipeline("text2text-generation", model="anonymous-research/mt5-coordinates", truncation=True)
|
| 25 |
+
'''
|
| 26 |
|
| 27 |
def build_visualization(graph, height="1000px", width="100%"):
|
| 28 |
"""Convert the rdflib graph into a pyvis network graph for visualization"""
|
|
|
|
| 76 |
placenames = []
|
| 77 |
relations = []
|
| 78 |
|
| 79 |
+
EKG = Namespace("http://encyclokg.geo/")
|
| 80 |
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")
|
| 81 |
g = Graph()
|
| 82 |
g.bind("rdf", RDF)
|
| 83 |
g.bind("rdfs", RDFS)
|
| 84 |
g.bind("xsd", XSD)
|
| 85 |
+
g.bind("ekg", EKG)
|
| 86 |
g.bind("skos", SKOS)
|
| 87 |
|
| 88 |
entity_uris = {}
|
|
|
|
| 111 |
span['uri'] = uri_entity
|
| 112 |
span['label'] = label[0]['label']
|
| 113 |
entity_uris[word] = uri_entity
|
| 114 |
+
add_triplet(g, EKG[uri_entity], RDF.type, EKG[label[0]['label']])
|
| 115 |
+
|
| 116 |
if word.startswith("l'"):
|
| 117 |
word = word[2:]
|
| 118 |
+
add_triplet(g, EKG[uri_entity], SKOS['prefLabel'], Literal(word))
|
| 119 |
if span['entity_group'] == 'Latlong':
|
| 120 |
latlong = True
|
| 121 |
|
| 122 |
|
| 123 |
entity_uris[head] = uri_article
|
| 124 |
+
add_triplet(g, EKG[uri_article], RDF.type, EKG[place_type[0]['label']])
|
| 125 |
|
| 126 |
if aliases:
|
| 127 |
if len(aliases) > 0:
|
| 128 |
+
add_triplet(g, EKG[uri_article], SKOS['prefLabel'], Literal(aliases[0]))
|
| 129 |
for alias in aliases[1:]:
|
| 130 |
+
add_triplet(g, EKG[uri_article], SKOS['altLabel'], Literal(alias))
|
| 131 |
else:
|
| 132 |
+
add_triplet(g, EKG[uri_article], SKOS['prefLabel'], Literal(head))
|
| 133 |
|
| 134 |
if latlong:
|
| 135 |
predicted_coordinates_from_pipeline = generator(text, max_length=128)
|
| 136 |
coords = dms_to_dd(predicted_coordinates_from_pipeline[0]['generated_text'])
|
| 137 |
+
add_triplet(g, EKG[uri_article], EKG['latitude'], Literal(coords[0], datatype=XSD.float))
|
| 138 |
+
add_triplet(g, EKG[uri_article], EKG['longitude'], Literal(coords[1], datatype=XSD.float))
|
| 139 |
|
| 140 |
# filter out non NP_Spatial or Relation spans
|
| 141 |
filtered_spans = [span for span in spans if span['entity_group'] in ['NP_Spatial', 'Relation']]
|
|
|
|
| 148 |
relations.append({'relation': word, 'label': label[0]['label']})
|
| 149 |
|
| 150 |
if idx > 0 and idx < len(filtered_spans) - 1:
|
| 151 |
+
link_to_subject_object(g, place_type[0]['label'], uri_article, label[0]['label'],word, filtered_spans[idx-1], filtered_spans[idx+1], statement_uris, EKG, RDF)
|
| 152 |
|
|
|
|
| 153 |
span_object = pattern_starting_article(text, spans)
|
| 154 |
if span_object is not None:
|
| 155 |
+
stmt_uri = EKG[f"Statement{len(statement_uris)}"]
|
| 156 |
statement_uris["de"] = stmt_uri
|
| 157 |
+
add_triplet(g, stmt_uri, RDF.subject, EKG[uri_article])
|
| 158 |
+
add_triplet(g, stmt_uri, RDF.object, EKG[entity_uris[span_object['word']]])
|
| 159 |
+
add_triplet(g, stmt_uri, RDF.predicate, EKG["inclusion"])
|
| 160 |
|
| 161 |
|
| 162 |
img_path, html_path = create_graph_viz(g)
|
|
|
|
| 171 |
|
| 172 |
|
| 173 |
examples = [
|
| 174 |
+
"Jean de Luz, S. (Géog.) Lucius Vicus ; le nom basque est Loitzun, petite ville de France en Gascogne, la deuxieme du pays de Labour, & la derniere du côté de l'Espagne, avec un port. Elle est sur une petite riviere, que Piganiol de la Force nomme la Ninette, & M. de Lisle le Nivelet, à 4 lieues N. E. de Fontarabie, 4 S. O. de Bayonne, 174 S. O. de Paris. Long. 15. 59. 28. lat. 43. 23. 15. (D. J.)",
|
| 175 |
"* AACH ou ACH, s. f. petite ville d'Allemagne dans le cercle de Souabe, près de la source de l'Aach. Long. 26. 57. lat. 47. 55.",
|
| 176 |
+
"OCHRIDA, lac d', (Géog.) lac de la Turquie en Europe, entre l'Albanie au couchant, & le Coménolitari au levant. Ce lac n'a qu'une demi-lieue de large sur dix lieues de long, & une seule ville du même nom, autrement dite Giustandil. Les anciens ont connu ce lac sous le nom de lacus Lycuicus. ",
|
|
|
|
| 177 |
"* AUTAN-KELURAN, (Géog.) ville du Turquestan. Long. 110d. & lat. 46. 45. selon Uluhbeg ; & long. 116. & lat. 45. selon Nassiredden.",
|
| 178 |
"Boston ; c'est le nom qu'on a donné à la ville capitale de la nouvelle Angleterre, dans l'Amérique septentrionale ; elle est grande & a un très-bon port. Lat. 42 degrés, 20 minutes ; long. 306 degrés, 50 & quelques minutes.",
|
| 179 |
]
|
|
|
|
| 215 |
|
| 216 |
out_places = gr.Textbox(label="Placenames and their type (City/River/Mountain/Other...)", lines=4)
|
| 217 |
out_relations = gr.Textbox(label="Spatial relations and their type (Adjacency/Orientation/Distance/Other...)", lines=4)
|
|
|
|
| 218 |
run_btn.click(fn=run_pipeline, inputs=inp, outputs=[img_output, html_output, out_type_entry, out_place_type, out_cardinality, out_places, out_relations])
|
| 219 |
|
| 220 |
|