@@ -56,7 +56,7 @@ output_name=beacon-mistral-finetune
5656torchrun --nproc_per_node 8 $DDP -m main.train \
5757--output_dir data/outputs/$output_name \
5858--model_name_or_path data/outputs/beacon-mistral-pretrain/* \
59- --train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:needle/train.16K.json long-llm: redpajama/train.json[5000] \
59+ --train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:redpajama/train.json[5000] \
6060--max_length 20000 \
6161--min_length 7200 \
6262--group_by_stride strict \
@@ -126,7 +126,7 @@ output_name=beacon-llama3-finetune
126126torchrun --nproc_per_node 8 $DDP -m main.train \
127127--output_dir data/outputs/$output_name \
128128--model_name_or_path data/outputs/beacon-llama3-pretrain/* \
129- --train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:needle/train.16K.json long-llm: redpajama/train.json[5000] \
129+ --train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:redpajama/train.json[5000] \
130130--max_length 20000 \
131131--min_length 7200 \
132132--group_by_stride strict \
@@ -194,7 +194,7 @@ torchrun --nproc_per_node 8 $DDP -m main.train \
194194torchrun --nproc_per_node 8 $DDP -m main.train \
195195--output_dir data/outputs/$output_name \
196196--model_name_or_path data/outputs/beacon-qwen2-pretrain/* \
197- --train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:needle/train.16K.json long-llm: redpajama/train.json[5000] \
197+ --train_data long-llm:gpt/one_detail_book.train.16K.json long-llm:gpt/one_detail_paper.train.16K.json long-llm:longalpaca/train.json long-llm:booksum/train.16K.json long-llm:needle/train.16K.json long-llm:redpajama/train.json[5000] \
198198--max_length 20000 \
199199--min_length 7200 \
200200--group_by_stride strict \
@@ -219,4 +219,4 @@ torchrun --nproc_per_node 8 $DDP -m main.train \
219219--bf16 \
220220--deepspeed data/deepspeed/stage2.json \
221221--chat_template qwen
222- ```
222+ ```
0 commit comments