Kaguya-19
commited on
Commit
·
143fca0
1
Parent(s):
09daf17
fit for sentence transformers
Browse files- 1_Pool/config.json +10 -0
- README.md +31 -5
- config.json +1 -1
- config_sentence_transformers.json +9 -0
- configuration.json +1 -0
- modeling_minicpm.py +9 -0
- modules.json +14 -0
1_Pool/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 2304,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": false
|
| 10 |
+
}
|
README.md
CHANGED
|
@@ -347,6 +347,7 @@ flash-attn>2.3.5
|
|
| 347 |
|
| 348 |
### 示例脚本 Demo
|
| 349 |
|
|
|
|
| 350 |
```python
|
| 351 |
|
| 352 |
from transformers import AutoModel, AutoTokenizer
|
|
@@ -358,10 +359,11 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
| 358 |
model = AutoModel.from_pretrained(model_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16).to("cuda")
|
| 359 |
model.eval()
|
| 360 |
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
|
|
|
| 365 |
reps = s / d
|
| 366 |
return reps
|
| 367 |
|
|
@@ -373,7 +375,7 @@ def encode(input_texts):
|
|
| 373 |
attention_mask = batch_dict["attention_mask"]
|
| 374 |
hidden = outputs.last_hidden_state
|
| 375 |
|
| 376 |
-
reps =
|
| 377 |
embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
|
| 378 |
return embeddings
|
| 379 |
|
|
@@ -391,6 +393,30 @@ scores = (embeddings_query @ embeddings_doc.T)
|
|
| 391 |
print(scores.tolist()) # [[0.3535913825035095, 0.18596848845481873]]
|
| 392 |
```
|
| 393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
## 实验结果 Evaluation Results
|
| 395 |
|
| 396 |
### 中文与英文检索结果 CN/EN Retrieval Results
|
|
|
|
| 347 |
|
| 348 |
### 示例脚本 Demo
|
| 349 |
|
| 350 |
+
#### Huggingface Transformers
|
| 351 |
```python
|
| 352 |
|
| 353 |
from transformers import AutoModel, AutoTokenizer
|
|
|
|
| 359 |
model = AutoModel.from_pretrained(model_name, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch.float16).to("cuda")
|
| 360 |
model.eval()
|
| 361 |
|
| 362 |
+
# 事实上我们用的是weighted mean pooling,但为了部署方便,我们将一部分pooling步骤集成在model.forward中
|
| 363 |
+
# In fact, we will use weighted mean pooling, but we will integrate some pooling steps into model.forward for deployment convenience
|
| 364 |
+
def mean_pooling(hidden,attention_mask):
|
| 365 |
+
s = torch.sum(hidden * attention_mask.unsqueeze(-1).float(), dim=1)
|
| 366 |
+
d = attention_mask.sum(dim=1, keepdim=True).float()
|
| 367 |
reps = s / d
|
| 368 |
return reps
|
| 369 |
|
|
|
|
| 375 |
attention_mask = batch_dict["attention_mask"]
|
| 376 |
hidden = outputs.last_hidden_state
|
| 377 |
|
| 378 |
+
reps = mean_pooling(hidden, attention_mask)
|
| 379 |
embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
|
| 380 |
return embeddings
|
| 381 |
|
|
|
|
| 393 |
print(scores.tolist()) # [[0.3535913825035095, 0.18596848845481873]]
|
| 394 |
```
|
| 395 |
|
| 396 |
+
#### Sentence Transformers
|
| 397 |
+
|
| 398 |
+
```python
|
| 399 |
+
import torch
|
| 400 |
+
from sentence_transformers import SentenceTransformer
|
| 401 |
+
|
| 402 |
+
model_name = "openbmb/MiniCPM-Embedding"
|
| 403 |
+
model = SentenceTransformer(model_name, trust_remote_code=True, model_kwargs={"attn_implementation":"flash_attention_2", "torch_dtype":torch.float16})
|
| 404 |
+
model.max_seq_length = 512
|
| 405 |
+
model.tokenizer.padding_side="right"
|
| 406 |
+
|
| 407 |
+
queries = ["中国的首都是哪里?"]
|
| 408 |
+
passages = ["beijing", "shanghai"]
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
INSTRUCTION = "Query: "
|
| 412 |
+
|
| 413 |
+
embeddings_query = model.encode(queries, prompt=INSTRUCTION, normalize_embeddings=True)
|
| 414 |
+
embeddings_doc = model.encode(passages, normalize_embeddings=True)
|
| 415 |
+
|
| 416 |
+
scores = (embeddings_query @ embeddings_doc.T)
|
| 417 |
+
print(scores.tolist()) # [[0.3535913825035095, 0.18596848845481873]]
|
| 418 |
+
```
|
| 419 |
+
|
| 420 |
## 实验结果 Evaluation Results
|
| 421 |
|
| 422 |
### 中文与英文检索结果 CN/EN Retrieval Results
|
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "openbmb/
|
| 3 |
"architectures": [
|
| 4 |
"MiniCPM"
|
| 5 |
],
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "openbmb/MiniCPM-Embedding",
|
| 3 |
"architectures": [
|
| 4 |
"MiniCPM"
|
| 5 |
],
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "2.7.0",
|
| 4 |
+
"transformers": "4.37.2",
|
| 5 |
+
"pytorch": "2.0.1+cu121"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {},
|
| 8 |
+
"default_prompt_name": null
|
| 9 |
+
}
|
configuration.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"task":"sentence-embedding"}
|
modeling_minicpm.py
CHANGED
|
@@ -1043,6 +1043,8 @@ class MiniCPMModel(MiniCPMPreTrainedModel):
|
|
| 1043 |
if inputs_embeds is None:
|
| 1044 |
inputs_embeds = self.embed_tokens(input_ids) * self.config.scale_emb
|
| 1045 |
|
|
|
|
|
|
|
| 1046 |
if self._use_flash_attention_2:
|
| 1047 |
# 2d mask is passed through the layers
|
| 1048 |
attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
|
|
@@ -1107,6 +1109,13 @@ class MiniCPMModel(MiniCPMPreTrainedModel):
|
|
| 1107 |
if output_hidden_states:
|
| 1108 |
all_hidden_states += (hidden_states,)
|
| 1109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1110 |
next_cache = None
|
| 1111 |
if use_cache:
|
| 1112 |
next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
|
|
|
|
| 1043 |
if inputs_embeds is None:
|
| 1044 |
inputs_embeds = self.embed_tokens(input_ids) * self.config.scale_emb
|
| 1045 |
|
| 1046 |
+
_attention_mask = attention_mask
|
| 1047 |
+
|
| 1048 |
if self._use_flash_attention_2:
|
| 1049 |
# 2d mask is passed through the layers
|
| 1050 |
attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
|
|
|
|
| 1109 |
if output_hidden_states:
|
| 1110 |
all_hidden_states += (hidden_states,)
|
| 1111 |
|
| 1112 |
+
# gen weight before mean pooling
|
| 1113 |
+
attention_mask_ = _attention_mask * _attention_mask.cumsum(dim=1)
|
| 1114 |
+
s = hidden_states * attention_mask_.unsqueeze(-1).float()
|
| 1115 |
+
d = attention_mask_.sum(dim=1, keepdim=True).unsqueeze(1).float() /_attention_mask.sum(dim=1, keepdim=True).unsqueeze(1).float()
|
| 1116 |
+
|
| 1117 |
+
hidden_states = s / d
|
| 1118 |
+
|
| 1119 |
next_cache = None
|
| 1120 |
if use_cache:
|
| 1121 |
next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
|
modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
}
|
| 14 |
+
]
|