Skip to content

Commit b768035

Browse files
authored
Update training.md
1 parent ba7ee0e commit b768035

1 file changed

Lines changed: 4 additions & 4 deletions

File tree

Long_LLM/activation_beacon/new/docs/training.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ output_name=beacon-mistral-finetune
5656
torchrun --nproc_per_node 8 $DDP -m main.train \
5757
--output_dir data/outputs/$output_name \
5858
--model_name_or_path data/outputs/beacon-mistral-pretrain/* \
59-
--train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:needle/train.16K.json long-llm:redpajama/train.json[5000] \
59+
--train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:redpajama/train.json[5000] \
6060
--max_length 20000 \
6161
--min_length 7200 \
6262
--group_by_stride strict \
@@ -126,7 +126,7 @@ output_name=beacon-llama3-finetune
126126
torchrun --nproc_per_node 8 $DDP -m main.train \
127127
--output_dir data/outputs/$output_name \
128128
--model_name_or_path data/outputs/beacon-llama3-pretrain/* \
129-
--train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:needle/train.16K.json long-llm:redpajama/train.json[5000] \
129+
--train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:redpajama/train.json[5000] \
130130
--max_length 20000 \
131131
--min_length 7200 \
132132
--group_by_stride strict \
@@ -194,7 +194,7 @@ torchrun --nproc_per_node 8 $DDP -m main.train \
194194
torchrun --nproc_per_node 8 $DDP -m main.train \
195195
--output_dir data/outputs/$output_name \
196196
--model_name_or_path data/outputs/beacon-qwen2-pretrain/* \
197-
--train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:needle/train.16K.json long-llm:redpajama/train.json[5000] \
197+
--train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:redpajama/train.json[5000] \
198198
--max_length 20000 \
199199
--min_length 7200 \
200200
--group_by_stride strict \
@@ -219,4 +219,4 @@ torchrun --nproc_per_node 8 $DDP -m main.train \
219219
--bf16 \
220220
--deepspeed data/deepspeed/stage2.json \
221221
--chat_template qwen
222-
```
222+
```

0 commit comments

Comments
 (0)