diff --git a/pyproject.toml b/pyproject.toml index 931c5c7e..39d95e2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ wandb = [ "wandb", ] all = [ - "ai2-olmo-core @ git+https://github.com/allenai/OLMo-core.git@ef28f2ab956177f4e9bf8cbcc1bb50457ae575cd", + "ai2-olmo-core @ git+https://github.com/allenai/OLMo-core.git@6599c2f7b85ee584e18febdc93ee1833480348e9", "beaker-py>=1,<2", "GitPython>=3.0,<4.0", "wandb", diff --git a/src/cookbook/model/evaluators.py b/src/cookbook/model/evaluators.py index 31372099..7903608b 100644 --- a/src/cookbook/model/evaluators.py +++ b/src/cookbook/model/evaluators.py @@ -7,33 +7,33 @@ # OLMES Core 9(-ish) RC "arc_challenge_test_rc_5shot", "arc_easy_test_rc_5shot", - "hellaswag_rc_5shot", # 1K subset of HellaSwag - "winogrande_val_rc_5shot", # Helpful after 750M-5xC scale - "csqa_val_rc_5shot", - "piqa_val_rc_5shot", - "socialiqa_val_rc_5shot", + # "hellaswag_rc_5shot", # 1K subset of HellaSwag + # "winogrande_val_rc_5shot", # Helpful after 750M-5xC scale + # "csqa_val_rc_5shot", + # "piqa_val_rc_5shot", + # "socialiqa_val_rc_5shot", # MMLU RC - "mmlu_stem_val_rc_5shot", - "mmlu_humanities_val_rc_5shot", - "mmlu_social_sciences_val_rc_5shot", - "mmlu_other_val_rc_5shot", - "mmlu_stem_test_rc_5shot", - "mmlu_humanities_test_rc_5shot", - "mmlu_social_sciences_test_rc_5shot", - "mmlu_other_test_rc_5shot", + # "mmlu_stem_val_rc_5shot", + # "mmlu_humanities_val_rc_5shot", + # "mmlu_social_sciences_val_rc_5shot", + # "mmlu_other_val_rc_5shot", + # "mmlu_stem_test_rc_5shot", + # "mmlu_humanities_test_rc_5shot", + # "mmlu_social_sciences_test_rc_5shot", + # "mmlu_other_test_rc_5shot", # Gen tasks BPB - "gsm8k_gold_bpb_5shot", - "minerva_math_algebra_gold_bpb_0shot", - "minerva_math_counting_and_probability_gold_bpb_0shot", - "minerva_math_geometry_gold_bpb_0shot", - "minerva_math_intermediate_algebra_gold_bpb_0shot", - "minerva_math_number_theory_gold_bpb_0shot", - "minerva_math_prealgebra_gold_bpb_0shot", - "minerva_math_precalculus_gold_bpb_0shot", - "codex_humaneval_gold_bpb_0shot", - "codex_mbpp_gold_bpb_0shot", + # "gsm8k_gold_bpb_5shot", + # "minerva_math_algebra_gold_bpb_0shot", + # "minerva_math_counting_and_probability_gold_bpb_0shot", + # "minerva_math_geometry_gold_bpb_0shot", + # "minerva_math_intermediate_algebra_gold_bpb_0shot", + # "minerva_math_number_theory_gold_bpb_0shot", + # "minerva_math_prealgebra_gold_bpb_0shot", + # "minerva_math_precalculus_gold_bpb_0shot", + # "codex_humaneval_gold_bpb_0shot", + # "codex_mbpp_gold_bpb_0shot", # Sanity check for MCQA ability - "copycolors_10way", + # "copycolors_10way", ] TASK_GROUPS: Dict[str, List[str]] = { @@ -57,9 +57,11 @@ def get_tasks_for_groups(groups: List[str]) -> List[str]: """Return all tasks in a group""" tasks = [] for group in groups: - if group not in TASK_GROUPS: - raise ValueError(f"Group {group} not found") - - tasks.extend(TASK_GROUPS[group]) + if group in TASK_GROUPS: + tasks.extend(TASK_GROUPS[group]) + elif group.upper() in ALL_TASKS_MAP: + tasks.append(ALL_TASKS_MAP[group.upper()]) + else: + raise ValueError(f"Group or task '{group}' not found") return list(set(tasks)) diff --git a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2-augusta.yaml b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2-augusta.yaml index 0df71f69..9f7754dd 100644 --- a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2-augusta.yaml +++ b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2-augusta.yaml @@ -14,8 +14,9 @@ model: "olmo2_1B_v2" tokenizer: "dolma2" priority: high cluster: ai2/augusta-google-1 -activation_checkpointing: true eval_interval: 10 +downstream_evaluators: + - olmo2_dev_1b dataset: sources: - name: gs-test diff --git a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml index 8deaa7e3..657e58ce 100644 --- a/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml +++ b/src/cookbook/recipes/olmo2/train-1b-5xC-dclm-dolma2.yaml @@ -1,25 +1,26 @@ -name: "olmo2-1b-dclm-dolma2" -description: "OLMo2 1b@5xC dclm-baseline" +name: "olmo2-1b-in-loop-debug" +description: "" budget: "ai2/oe-training" workspace: "ai2/oe-data" -nodes: 4 +nodes: 1 gpus: 8 preemptible: true -max_tokens: 127_939_584_000 +max_tokens: 1_000_000_000 global_batch_size: 2097152 +rank_microbatch_size: 32768 sequence_length: 4096 seed: 1337 model: "olmo2_1B_v2" tokenizer: "dolma2" priority: high -eval_interval: 250 -cluster: ai2/jupiter-cirrascale-2 -rank_microbatch_size: 32768 +cluster: ai2/saturn-cirrascale +eval_interval: 10 +downstream_evaluators: + - olmo2_dev_1b weka: true dataset: sources: - - name: dclm-baseline-20pct-dolma2 + - name: gs-test target_ratio: 1.0 paths: - weka://oe-training-default/ai2-llm/preprocessed/dclm/baseline_type_topic_classified_20pct/allenai/dolma2-tokenizer/**/**/part-0*-00000.npy - diff --git a/src/cookbook/utils/config.py b/src/cookbook/utils/config.py index ee8d6bf0..672934e4 100644 --- a/src/cookbook/utils/config.py +++ b/src/cookbook/utils/config.py @@ -241,8 +241,9 @@ def mk_launch_configs(group: ExperimentGroup, beaker_user: str) -> list[BeakerLa budget=group.config.budget or "ai2/oe-data", workspace=group.config.workspace, preemptible=group.config.preemptible, - beaker_image="petew/olmo-core-tch270cu126", + beaker_image="petew/olmo-core-tch270cu128-v2.1", priority=group.config.priority, + shared_memory="25GiB", env_vars=[BeakerEnvVar(name="NCCL_DEBUG", value="INFO" if group.config.nccl_debug else "WARN")], env_secrets=[ BeakerEnvSecret(name="BEAKER_TOKEN", secret=f"{beaker_user}_BEAKER_TOKEN"), @@ -259,7 +260,9 @@ def mk_launch_configs(group: ExperimentGroup, beaker_user: str) -> list[BeakerLa "cd olmo-cookbook", 'git checkout "$GIT_REF"', "git submodule update --init --recursive", - "pip install -e '.[all]'", + "pip install uv", + "uv pip install -e '.[all]' --system", + "uv pip install torch==2.7.0 torchaudio torchvision --index-url https://download.pytorch.org/whl/test/cu128 --system", "pip freeze", # Move AWS credentials from env to relevant files "mkdir -p ~/.aws",