From f594099184866cc9459f5846a80c45b162dca13c Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 11:54:51 -0700 Subject: [PATCH 01/15] Add in-loops --- .../recipes/olmo2/train-1b-5xC-dclm-dolma2-augusta.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2-augusta.yaml b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2-augusta.yaml index 0df71f69..9f7754dd 100644 --- a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2-augusta.yaml +++ b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2-augusta.yaml @@ -14,8 +14,9 @@ model: "olmo2_1B_v2" tokenizer: "dolma2" priority: high cluster: ai2/augusta-google-1 -activation_checkpointing: true eval_interval: 10 +downstream_evaluators: + - olmo2_dev_1b dataset: sources: - name: gs-test From a6e7e01c323f0f86d615589c5341979226e22a5c Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 15:19:41 -0700 Subject: [PATCH 02/15] Use saturn to debug image/core/evals --- pyproject.toml | 2 +- .../olmo2/train-1b-5xC-dclm-dolma2.yaml | 22 +++++++++---------- src/cookbook/utils/config.py | 6 +++-- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 931c5c7e..39d95e2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ wandb = [ "wandb", ] all = [ - "ai2-olmo-core @ git+https://github.com/allenai/OLMo-core.git@ef28f2ab956177f4e9bf8cbcc1bb50457ae575cd", + "ai2-olmo-core @ git+https://github.com/allenai/OLMo-core.git@6599c2f7b85ee584e18febdc93ee1833480348e9", "beaker-py>=1,<2", "GitPython>=3.0,<4.0", "wandb", diff --git a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml index 8deaa7e3..fd824bde 100644 --- a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml +++ b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml @@ -1,25 +1,25 @@ -name: "olmo2-1b-dclm-dolma2" -description: "OLMo2 1b@5xC dclm-baseline" +name: "olmo2-1b-in-loop-debug" +description: "" budget: "ai2/oe-training" workspace: "ai2/oe-data" -nodes: 4 +nodes: 1 gpus: 8 preemptible: true -max_tokens: 127_939_584_000 +max_tokens: 1_000_000_000 global_batch_size: 2097152 +rank_microbatch_size: 32768 sequence_length: 4096 seed: 1337 model: "olmo2_1B_v2" tokenizer: "dolma2" priority: high -eval_interval: 250 -cluster: ai2/jupiter-cirrascale-2 -rank_microbatch_size: 32768 -weka: true +cluster: ai2/saturn-cirrascale +eval_interval: 10 +downstream_evaluators: + - mmlu_stem_val_rc_5shot dataset: sources: - - name: dclm-baseline-20pct-dolma2 + - name: gs-test target_ratio: 1.0 paths: - - weka://oe-training-default/ai2-llm/preprocessed/dclm/baseline_type_topic_classified_20pct/allenai/dolma2-tokenizer/**/**/part-0*-00000.npy - + - gs://ai2-llm/preprocessed/dclm/love2code_codeprose/codeprose/*.npy diff --git a/src/cookbook/utils/config.py b/src/cookbook/utils/config.py index ee8d6bf0..38799336 100644 --- a/src/cookbook/utils/config.py +++ b/src/cookbook/utils/config.py @@ -241,7 +241,7 @@ def mk_launch_configs(group: ExperimentGroup, beaker_user: str) -> list[BeakerLa budget=group.config.budget or "ai2/oe-data", workspace=group.config.workspace, preemptible=group.config.preemptible, - beaker_image="petew/olmo-core-tch270cu126", + beaker_image="petew/olmo-core-tch270cu128", priority=group.config.priority, env_vars=[BeakerEnvVar(name="NCCL_DEBUG", value="INFO" if group.config.nccl_debug else "WARN")], env_secrets=[ @@ -259,7 +259,9 @@ def mk_launch_configs(group: ExperimentGroup, beaker_user: str) -> list[BeakerLa "cd olmo-cookbook", 'git checkout "$GIT_REF"', "git submodule update --init --recursive", - "pip install -e '.[all]'", + "pip install uv", + "uv pip install -e '.[all]'", + "uv pip install torch==2.7.0 torchaudio torchvision --index-url https://download.pytorch.org/whl/test/cu128 --system", "pip freeze", # Move AWS credentials from env to relevant files "mkdir -p ~/.aws", From 710d2c6456277afccb8a824ff9cb21264667d5be Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 15:27:06 -0700 Subject: [PATCH 03/15] tweaks --- src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml index fd824bde..c34afc60 100644 --- a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml +++ b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml @@ -17,9 +17,10 @@ cluster: ai2/saturn-cirrascale eval_interval: 10 downstream_evaluators: - mmlu_stem_val_rc_5shot +weka: true dataset: sources: - name: gs-test target_ratio: 1.0 paths: - - gs://ai2-llm/preprocessed/dclm/love2code_codeprose/codeprose/*.npy + - weka://oe-training-default/ai2-llm/preprocessed/dclm/baseline_type_topic_classified_20pct/allenai/dolma2-tokenizer/**/**/part-0*-00000.npy From dbc27312cdcfa6a906ac3774e37a301b01d76d7b Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 15:39:30 -0700 Subject: [PATCH 04/15] Fix no grouped tasks --- src/cookbook/model/evaluators.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cookbook/model/evaluators.py b/src/cookbook/model/evaluators.py index 31372099..e19ea51a 100644 --- a/src/cookbook/model/evaluators.py +++ b/src/cookbook/model/evaluators.py @@ -57,9 +57,11 @@ def get_tasks_for_groups(groups: List[str]) -> List[str]: """Return all tasks in a group""" tasks = [] for group in groups: - if group not in TASK_GROUPS: - raise ValueError(f"Group {group} not found") - - tasks.extend(TASK_GROUPS[group]) + if group in TASK_GROUPS: + tasks.extend(TASK_GROUPS[group]) + elif group.upper() in ALL_TASKS_MAP: + tasks.append(ALL_TASKS_MAP[group.upper()]) + else: + raise ValueError(f"Group or task '{group}' not found") return list(set(tasks)) From 55d592d641dd09e7a1c490cccdc5154115455403 Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 15:43:50 -0700 Subject: [PATCH 05/15] oops --- src/cookbook/utils/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cookbook/utils/config.py b/src/cookbook/utils/config.py index 38799336..b40d7631 100644 --- a/src/cookbook/utils/config.py +++ b/src/cookbook/utils/config.py @@ -260,7 +260,7 @@ def mk_launch_configs(group: ExperimentGroup, beaker_user: str) -> list[BeakerLa 'git checkout "$GIT_REF"', "git submodule update --init --recursive", "pip install uv", - "uv pip install -e '.[all]'", + "uv pip install -e '.[all]' --system", "uv pip install torch==2.7.0 torchaudio torchvision --index-url https://download.pytorch.org/whl/test/cu128 --system", "pip freeze", # Move AWS credentials from env to relevant files From 77d11f61d9d3090940f12df7c825d17ab1050279 Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 15:56:52 -0700 Subject: [PATCH 06/15] try them all --- src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml index c34afc60..657e58ce 100644 --- a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml +++ b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml @@ -16,7 +16,7 @@ priority: high cluster: ai2/saturn-cirrascale eval_interval: 10 downstream_evaluators: - - mmlu_stem_val_rc_5shot + - olmo2_dev_1b weka: true dataset: sources: From 666040056aa40215bcc741f644bc9708895c1e5f Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 16:10:27 -0700 Subject: [PATCH 07/15] Maybe code --- src/cookbook/model/evaluators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cookbook/model/evaluators.py b/src/cookbook/model/evaluators.py index e19ea51a..c50ec38d 100644 --- a/src/cookbook/model/evaluators.py +++ b/src/cookbook/model/evaluators.py @@ -30,8 +30,8 @@ "minerva_math_number_theory_gold_bpb_0shot", "minerva_math_prealgebra_gold_bpb_0shot", "minerva_math_precalculus_gold_bpb_0shot", - "codex_humaneval_gold_bpb_0shot", - "codex_mbpp_gold_bpb_0shot", + # "codex_humaneval_gold_bpb_0shot", + # "codex_mbpp_gold_bpb_0shot", # Sanity check for MCQA ability "copycolors_10way", ] From f7ea6db86128a9afee01e0dd4015ff684a91f46e Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 16:20:29 -0700 Subject: [PATCH 08/15] Just MMLU stem again --- src/cookbook/model/evaluators.py | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/cookbook/model/evaluators.py b/src/cookbook/model/evaluators.py index c50ec38d..bf8504ce 100644 --- a/src/cookbook/model/evaluators.py +++ b/src/cookbook/model/evaluators.py @@ -5,35 +5,35 @@ OLMO2_DEV_1B_TASKS = [ # OLMES Core 9(-ish) RC - "arc_challenge_test_rc_5shot", - "arc_easy_test_rc_5shot", - "hellaswag_rc_5shot", # 1K subset of HellaSwag - "winogrande_val_rc_5shot", # Helpful after 750M-5xC scale - "csqa_val_rc_5shot", - "piqa_val_rc_5shot", - "socialiqa_val_rc_5shot", + # "arc_challenge_test_rc_5shot", + # "arc_easy_test_rc_5shot", + # "hellaswag_rc_5shot", # 1K subset of HellaSwag + # "winogrande_val_rc_5shot", # Helpful after 750M-5xC scale + # "csqa_val_rc_5shot", + # "piqa_val_rc_5shot", + # "socialiqa_val_rc_5shot", # MMLU RC "mmlu_stem_val_rc_5shot", - "mmlu_humanities_val_rc_5shot", - "mmlu_social_sciences_val_rc_5shot", - "mmlu_other_val_rc_5shot", - "mmlu_stem_test_rc_5shot", - "mmlu_humanities_test_rc_5shot", - "mmlu_social_sciences_test_rc_5shot", - "mmlu_other_test_rc_5shot", + # "mmlu_humanities_val_rc_5shot", + # "mmlu_social_sciences_val_rc_5shot", + # "mmlu_other_val_rc_5shot", + # "mmlu_stem_test_rc_5shot", + # "mmlu_humanities_test_rc_5shot", + # "mmlu_social_sciences_test_rc_5shot", + # "mmlu_other_test_rc_5shot", # Gen tasks BPB - "gsm8k_gold_bpb_5shot", - "minerva_math_algebra_gold_bpb_0shot", - "minerva_math_counting_and_probability_gold_bpb_0shot", - "minerva_math_geometry_gold_bpb_0shot", - "minerva_math_intermediate_algebra_gold_bpb_0shot", - "minerva_math_number_theory_gold_bpb_0shot", - "minerva_math_prealgebra_gold_bpb_0shot", - "minerva_math_precalculus_gold_bpb_0shot", + # "gsm8k_gold_bpb_5shot", + # "minerva_math_algebra_gold_bpb_0shot", + # "minerva_math_counting_and_probability_gold_bpb_0shot", + # "minerva_math_geometry_gold_bpb_0shot", + # "minerva_math_intermediate_algebra_gold_bpb_0shot", + # "minerva_math_number_theory_gold_bpb_0shot", + # "minerva_math_prealgebra_gold_bpb_0shot", + # "minerva_math_precalculus_gold_bpb_0shot", # "codex_humaneval_gold_bpb_0shot", # "codex_mbpp_gold_bpb_0shot", # Sanity check for MCQA ability - "copycolors_10way", + # "copycolors_10way", ] TASK_GROUPS: Dict[str, List[str]] = { From e490b917098de28daded9a3506fa6edb0bcd69f3 Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 16:27:19 -0700 Subject: [PATCH 09/15] Multiple mmlu --- src/cookbook/model/evaluators.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/cookbook/model/evaluators.py b/src/cookbook/model/evaluators.py index bf8504ce..d8804d8a 100644 --- a/src/cookbook/model/evaluators.py +++ b/src/cookbook/model/evaluators.py @@ -14,13 +14,13 @@ # "socialiqa_val_rc_5shot", # MMLU RC "mmlu_stem_val_rc_5shot", - # "mmlu_humanities_val_rc_5shot", - # "mmlu_social_sciences_val_rc_5shot", - # "mmlu_other_val_rc_5shot", - # "mmlu_stem_test_rc_5shot", - # "mmlu_humanities_test_rc_5shot", - # "mmlu_social_sciences_test_rc_5shot", - # "mmlu_other_test_rc_5shot", + "mmlu_humanities_val_rc_5shot", + "mmlu_social_sciences_val_rc_5shot", + "mmlu_other_val_rc_5shot", + "mmlu_stem_test_rc_5shot", + "mmlu_humanities_test_rc_5shot", + "mmlu_social_sciences_test_rc_5shot", + "mmlu_other_test_rc_5shot", # Gen tasks BPB # "gsm8k_gold_bpb_5shot", # "minerva_math_algebra_gold_bpb_0shot", From 76032070b36298e8f0aa2e0c1f19022350dc4b82 Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 16:35:12 -0700 Subject: [PATCH 10/15] try more cpu mem --- src/cookbook/utils/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cookbook/utils/config.py b/src/cookbook/utils/config.py index b40d7631..66f7be1e 100644 --- a/src/cookbook/utils/config.py +++ b/src/cookbook/utils/config.py @@ -243,6 +243,7 @@ def mk_launch_configs(group: ExperimentGroup, beaker_user: str) -> list[BeakerLa preemptible=group.config.preemptible, beaker_image="petew/olmo-core-tch270cu128", priority=group.config.priority, + shared_memory="25GiB", env_vars=[BeakerEnvVar(name="NCCL_DEBUG", value="INFO" if group.config.nccl_debug else "WARN")], env_secrets=[ BeakerEnvSecret(name="BEAKER_TOKEN", secret=f"{beaker_user}_BEAKER_TOKEN"), From aa1ea1b43f473ff601cd148d8c4766295552769d Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 16:35:41 -0700 Subject: [PATCH 11/15] try only arc --- src/cookbook/model/evaluators.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/cookbook/model/evaluators.py b/src/cookbook/model/evaluators.py index d8804d8a..10903561 100644 --- a/src/cookbook/model/evaluators.py +++ b/src/cookbook/model/evaluators.py @@ -5,7 +5,7 @@ OLMO2_DEV_1B_TASKS = [ # OLMES Core 9(-ish) RC - # "arc_challenge_test_rc_5shot", + "arc_challenge_test_rc_5shot", # "arc_easy_test_rc_5shot", # "hellaswag_rc_5shot", # 1K subset of HellaSwag # "winogrande_val_rc_5shot", # Helpful after 750M-5xC scale @@ -13,14 +13,14 @@ # "piqa_val_rc_5shot", # "socialiqa_val_rc_5shot", # MMLU RC - "mmlu_stem_val_rc_5shot", - "mmlu_humanities_val_rc_5shot", - "mmlu_social_sciences_val_rc_5shot", - "mmlu_other_val_rc_5shot", - "mmlu_stem_test_rc_5shot", - "mmlu_humanities_test_rc_5shot", - "mmlu_social_sciences_test_rc_5shot", - "mmlu_other_test_rc_5shot", + # "mmlu_stem_val_rc_5shot", + # "mmlu_humanities_val_rc_5shot", + # "mmlu_social_sciences_val_rc_5shot", + # "mmlu_other_val_rc_5shot", + # "mmlu_stem_test_rc_5shot", + # "mmlu_humanities_test_rc_5shot", + # "mmlu_social_sciences_test_rc_5shot", + # "mmlu_other_test_rc_5shot", # Gen tasks BPB # "gsm8k_gold_bpb_5shot", # "minerva_math_algebra_gold_bpb_0shot", From ac9740cdbe0f0b85816936bb99698613134f104c Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 16:43:46 -0700 Subject: [PATCH 12/15] try 2 --- src/cookbook/model/evaluators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cookbook/model/evaluators.py b/src/cookbook/model/evaluators.py index 10903561..7903608b 100644 --- a/src/cookbook/model/evaluators.py +++ b/src/cookbook/model/evaluators.py @@ -6,7 +6,7 @@ OLMO2_DEV_1B_TASKS = [ # OLMES Core 9(-ish) RC "arc_challenge_test_rc_5shot", - # "arc_easy_test_rc_5shot", + "arc_easy_test_rc_5shot", # "hellaswag_rc_5shot", # 1K subset of HellaSwag # "winogrande_val_rc_5shot", # Helpful after 750M-5xC scale # "csqa_val_rc_5shot", From 08db69d9de873282b0d004d69f0d24c20e41807a Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 16:52:52 -0700 Subject: [PATCH 13/15] try no workdir --- src/cookbook/model/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cookbook/model/builder.py b/src/cookbook/model/builder.py index ef5be70d..ea49e4cc 100644 --- a/src/cookbook/model/builder.py +++ b/src/cookbook/model/builder.py @@ -586,7 +586,7 @@ def build(self) -> ModelTrainConfig: load_path=load_path, load_strategy=load_strategy, save_folder=self.checkpoint_dir, - work_dir=self.dataset_cache, + # work_dir=self.dataset_cache, save_overwrite=True, metrics_collect_interval=10, cancel_check_interval=5, From 83b299f25b4b0166cb4bbfd566650f83bbb22ebf Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 17:02:22 -0700 Subject: [PATCH 14/15] just one --- src/cookbook/model/builder.py | 2 +- src/cookbook/model/evaluators.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cookbook/model/builder.py b/src/cookbook/model/builder.py index ea49e4cc..ef5be70d 100644 --- a/src/cookbook/model/builder.py +++ b/src/cookbook/model/builder.py @@ -586,7 +586,7 @@ def build(self) -> ModelTrainConfig: load_path=load_path, load_strategy=load_strategy, save_folder=self.checkpoint_dir, - # work_dir=self.dataset_cache, + work_dir=self.dataset_cache, save_overwrite=True, metrics_collect_interval=10, cancel_check_interval=5, diff --git a/src/cookbook/model/evaluators.py b/src/cookbook/model/evaluators.py index 7903608b..10903561 100644 --- a/src/cookbook/model/evaluators.py +++ b/src/cookbook/model/evaluators.py @@ -6,7 +6,7 @@ OLMO2_DEV_1B_TASKS = [ # OLMES Core 9(-ish) RC "arc_challenge_test_rc_5shot", - "arc_easy_test_rc_5shot", + # "arc_easy_test_rc_5shot", # "hellaswag_rc_5shot", # 1K subset of HellaSwag # "winogrande_val_rc_5shot", # Helpful after 750M-5xC scale # "csqa_val_rc_5shot", From 0b4389a8e0e7a02b2dc40361ab8415bb50b637a5 Mon Sep 17 00:00:00 2001 From: Tyler Murray Date: Mon, 12 May 2025 17:18:06 -0700 Subject: [PATCH 15/15] specific image --- src/cookbook/model/evaluators.py | 2 +- src/cookbook/utils/config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cookbook/model/evaluators.py b/src/cookbook/model/evaluators.py index 10903561..7903608b 100644 --- a/src/cookbook/model/evaluators.py +++ b/src/cookbook/model/evaluators.py @@ -6,7 +6,7 @@ OLMO2_DEV_1B_TASKS = [ # OLMES Core 9(-ish) RC "arc_challenge_test_rc_5shot", - # "arc_easy_test_rc_5shot", + "arc_easy_test_rc_5shot", # "hellaswag_rc_5shot", # 1K subset of HellaSwag # "winogrande_val_rc_5shot", # Helpful after 750M-5xC scale # "csqa_val_rc_5shot", diff --git a/src/cookbook/utils/config.py b/src/cookbook/utils/config.py index 66f7be1e..672934e4 100644 --- a/src/cookbook/utils/config.py +++ b/src/cookbook/utils/config.py @@ -241,7 +241,7 @@ def mk_launch_configs(group: ExperimentGroup, beaker_user: str) -> list[BeakerLa budget=group.config.budget or "ai2/oe-data", workspace=group.config.workspace, preemptible=group.config.preemptible, - beaker_image="petew/olmo-core-tch270cu128", + beaker_image="petew/olmo-core-tch270cu128-v2.1", priority=group.config.priority, shared_memory="25GiB", env_vars=[BeakerEnvVar(name="NCCL_DEBUG", value="INFO" if group.config.nccl_debug else "WARN")],