SeaLLMs
/

SeaLLM-7B-v2.5

@@ -101,7 +101,7 @@ Baselines were evaluated using their respective chat-template and system prompts
 #### Zero-shot MGSM
-[SeaLLM-7B-v2](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2) also outperforms GPT-3.5 and Qwen-14B on the multilingual MGSM for Zh and Th.
 | Model | MGSM-Zh | MGSM-Th
 |-----| -----  | ---
@@ -126,27 +126,6 @@ We evaluate models on 3 benchmarks following the recommended default setups: 5-s
 | SeaLLM-7B-v2.5  | Multi | 64.05 | 76.87 | 62.54 | 63.11 | 53.30 | 48.64 | 46.86
-### MT-Bench
-**SeaLLM-7B-v2.5 only score 7.40 on MT-bench, better preference tuning is needed**
-On the English [MT-bench](https://arxiv.org/abs/2306.05685) metric, SeaLLM-7B-v2 achieves **7.54** score on the MT-bench (3rd place on the leaderboard for 7B category), outperforms many 70B models and is arguably the only one that handles 10 SEA languages.
-Refer to [mt_bench/seallm_7b_v2.jsonl](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2/blob/main/evaluation/mt_bench/seallm_7b_v2.jsonl) for the MT-bench predictions of SeaLLM-7B-v2, and [here](https://github.com/lm-sys/FastChat/issues/3013#issue-2118685341) to reproduce it.
-| Model | Access | Langs | MT-Bench
-| --- | --- | --- | --- |
-| GPT-4-turbo | closed | multi | 9.32
-| GPT-4-0613 | closed | multi | 9.18
-| Mixtral-8x7b (46B) | open | multi | 8.3
-| Starling-LM-7B-alpha | open | mono (en) | 8.0
-| OpenChat-3.5-7B | open | mono (en) | 7.81
-| **SeaLLM-7B-v2** | **open** | **multi (10+)** | **7.54**
-| **SeaLLM-7B-v2.5** | **open** | **multi (10+)** | **7.40**
-| [Qwen-14B](https://huggingface.co/Qwen/Qwen-14B-Chat) | open | multi | 6.96
-| [Llama-2-70B](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | open | mono (en) | 6.86
-| Mistral-7B-instuct | open | mono (en) | 6.84
 ### Sea-Bench
 Not ready
@@ -165,7 +144,6 @@ Hello world<eos>
 <|im_start|>assistant
 Hi there, how can I help?<eos>"""
-# NOTE: previous commit has \n between </s> and <|im_start|>, that was incorrect!
 # <|im_start|> is not a special token.
 # Transformers chat_template should be consistent with vLLM format below.
@@ -176,6 +154,9 @@ print(tokenizer.convert_ids_to_tokens(tokenizer.encode(prompt)))
 ```
 #### Using transformers's chat_template
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -183,8 +164,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "cuda" # the device to load the model onto
 # use bfloat16 to ensure the best performance.
-model = AutoModelForCausalLM.from_pretrained("SeaLLMs/SeaLLM-7B-v2", torch_dtype=torch.bfloat16, device_map=device)
-tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLM-7B-v2")
 messages = [
     {"role": "system", "content": "You are a helpful assistant."},
@@ -195,7 +176,6 @@ messages = [
 encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
 print(tokenizer.convert_ids_to_tokens(encodeds[0]))
-# ['<s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'system', '<0x0A>', 'You', '▁are', '▁a', '▁helpful', '▁assistant', '.', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'user', '<0x0A>', 'Hello', '▁world', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'ass', 'istant', '<0x0A>', 'Hi', '▁there', ',', '▁how', '▁can', '▁I', '▁help', '▁you', '▁today', '?', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'user', '<0x0A>', 'Ex', 'plain', '▁general', '▁rel', 'ativity', '▁in', '▁details', '.', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'ass', 'istant', '<0x0A>']
 model_inputs = encodeds.to(device)
 model.to(device)
@@ -210,11 +190,9 @@ print(decoded[0])
 ```python
 from vllm import LLM, SamplingParams
-TURN_TEMPLATE = "<|im_start|>{role}\n{content}</s>"
 TURN_PREFIX = "<|im_start|>{role}\n"
-# There is no \n between </s> and <|im_start|>.
 def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_prompt=None):
     # conversations: list of dict with key `role` and `content` (openai format)
     if conversations[0]['role'] != 'system' and system_prompt is not None:
@@ -228,8 +206,8 @@ def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_p
         text += prompt
     return text
-sparams = SamplingParams(temperature=0.1, max_tokens=1024, stop=['</s>', '<|im_start|>'])
-llm = LLM("SeaLLMs/SeaLLM-7B-v2", dtype="bfloat16")
 message = "Explain general relativity in details."
 prompt = seallm_chat_convo_format(message, True)
@@ -238,7 +216,7 @@ gen = llm.generate(prompt, sampling_params)
 print(gen[0].outputs[0].text)
 ```
-#### Fine-tuning SeaLLM-7B-v2
 Should follow the chat format and accurately mask out source tokens. Here is an example.
@@ -250,7 +228,7 @@ conversations = [
     {"role": "user", "content": "Tell me a joke."},
     {"role": "assistant", "content": "Why don't scientists trust atoms? Because they make up everything."},
 ]
-def seallm_7b_v2_tokenize_multi_turns(tokenizer, conversations, add_assistant_prefix=False):
     """
     Inputs:
         conversations: list of dict following openai format, eg
@@ -271,7 +249,7 @@ def seallm_7b_v2_tokenize_multi_turns(tokenizer, conversations, add_assistant_pr
         labels = sample['input_ids'].clone()
         labels[sample['token_type_ids'] == 0] = -100
     """
-    TURN_TEMPLATE = "<|im_start|>{role}\n{content}</s>"
     TURN_PREFIX = "<|im_start|>{role}\n"
     sample = None
     assistant_prefix_len = None
@@ -304,12 +282,9 @@ def seallm_7b_v2_tokenize_multi_turns(tokenizer, conversations, add_assistant_pr
     return sample
 # ! testing
-sample = seallm_7b_v2_tokenize_multi_turns(tokenizer, conversations)
 print(tokenizer.convert_ids_to_tokens(sample['input_ids']))
 print(sample['token_type_ids'])
-# ['<s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'system', '<0x0A>', 'You', '▁are', '▁hel', 'ful', '▁assistant', '.', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'user', '<0x0A>', 'Hello', '▁world', '.', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'ass', 'istant', '<0x0A>', 'Hi', '▁there', ',', '▁how', '▁can', '▁I', '▁help', '?', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'user', '<0x0A>', 'Tell', '▁me', '▁a', '▁joke', '.', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'ass', 'istant', '<0x0A>', 'Why', '▁don', "'", 't', '▁scientists', '▁trust', '▁atoms', '?', '▁Because', '▁they', '▁make', '▁up', '▁everything', '.', '</s>']
-# [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
 ```
@@ -329,7 +304,7 @@ If you find our project useful, we hope you would kindly star our repo and cite
 ```
 @article{damonlpsg2023seallm,
-  author = {Xuan-Phi Nguyen*, Wenxuan Zhang*, Xin Li*, Mahani Aljunied*,
             Zhiqiang Hu, Chenhui Shen^, Yew Ken Chia^, Xingxuan Li, Jianyu Wang,
             Qingyu Tan, Liying Cheng, Guanzheng Chen, Yue Deng, Sen Yang,
             Chaoqun Liu, Hang Zhang, Lidong Bing},

 #### Zero-shot MGSM
+[SeaLLM-7B-v2.5](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2.5) also outperforms GPT-3.5 and Qwen-14B on the multilingual MGSM for Thai.
 | Model | MGSM-Zh | MGSM-Th
 |-----| -----  | ---
 | SeaLLM-7B-v2.5  | Multi | 64.05 | 76.87 | 62.54 | 63.11 | 53.30 | 48.64 | 46.86
 ### Sea-Bench
 Not ready
 <|im_start|>assistant
 Hi there, how can I help?<eos>"""
 # <|im_start|> is not a special token.
 # Transformers chat_template should be consistent with vLLM format below.
 ```
 #### Using transformers's chat_template
+Install the latest transformers (>4.40)
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "cuda" # the device to load the model onto
 # use bfloat16 to ensure the best performance.
+model = AutoModelForCausalLM.from_pretrained("SeaLLMs/SeaLLM-7B-v2.5", torch_dtype=torch.bfloat16, device_map=device)
+tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLM-7B-v2.5")
 messages = [
     {"role": "system", "content": "You are a helpful assistant."},
 encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
 print(tokenizer.convert_ids_to_tokens(encodeds[0]))
 model_inputs = encodeds.to(device)
 model.to(device)
 ```python
 from vllm import LLM, SamplingParams
+TURN_TEMPLATE = "<|im_start|>{role}\n{content}<eos>\n"
 TURN_PREFIX = "<|im_start|>{role}\n"
 def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_prompt=None):
     # conversations: list of dict with key `role` and `content` (openai format)
     if conversations[0]['role'] != 'system' and system_prompt is not None:
         text += prompt
     return text
+sparams = SamplingParams(temperature=0.1, max_tokens=1024, stop=['<eos>', '<|im_start|>'])
+llm = LLM("SeaLLMs/SeaLLM-7B-v2.5", dtype="bfloat16")
 message = "Explain general relativity in details."
 prompt = seallm_chat_convo_format(message, True)
 print(gen[0].outputs[0].text)
 ```
+#### Fine-tuning SeaLLM-7B-v2.5
 Should follow the chat format and accurately mask out source tokens. Here is an example.
     {"role": "user", "content": "Tell me a joke."},
     {"role": "assistant", "content": "Why don't scientists trust atoms? Because they make up everything."},
 ]
+def seallm_7b_v25_tokenize_multi_turns(tokenizer, conversations, add_assistant_prefix=False):
     """
     Inputs:
         conversations: list of dict following openai format, eg
         labels = sample['input_ids'].clone()
         labels[sample['token_type_ids'] == 0] = -100
     """
+    TURN_TEMPLATE = "<|im_start|>{role}\n{content}<eos>\n"
     TURN_PREFIX = "<|im_start|>{role}\n"
     sample = None
     assistant_prefix_len = None
     return sample
 # ! testing
+sample = seallm_7b_v25_tokenize_multi_turns(tokenizer, conversations)
 print(tokenizer.convert_ids_to_tokens(sample['input_ids']))
 print(sample['token_type_ids'])
 ```
 ```
 @article{damonlpsg2023seallm,
+  author = {Xuan-Phi Nguyen*, Wenxuan Zhang*, Xin Li*, Mahani Aljunied*, Weiwen Xu, Hou Pong Chan,
             Zhiqiang Hu, Chenhui Shen^, Yew Ken Chia^, Xingxuan Li, Jianyu Wang,
             Qingyu Tan, Liying Cheng, Guanzheng Chen, Yue Deng, Sen Yang,
             Chaoqun Liu, Hang Zhang, Lidong Bing},