From 9aca563489c76d2abb6d2ba1919de5641001e92c Mon Sep 17 00:00:00 2001 From: KINNNNNNG <137853302@qq.com> Date: Tue, 6 Jun 2023 15:37:05 +0800 Subject: [PATCH] Update config.py When I was training with ```chat/train.py```, I reported the following error after training: ``` Traceback (most recent call last): File "train.py", line 345, in main() File "train.py", line 313, in main if data_args.dataset_config_name is not None: AttributeError: 'DataArguments' object has no attribute 'dataset_config_name' WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 2461 closing signal SIGTERM WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 2462 closing signal SIGTERM ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 2460) of binary: /usr/bin/python Traceback (most recent call last): File "/usr/local/bin/torchrun", line 8, in sys.exit(main()) File "/usr/local/lib/python3.7/dist-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 346, in wrapper return f(*args, **kwargs) File "/usr/local/lib/python3.7/dist-packages/torch/distributed/run.py", line 762, in main run(args) File "/usr/local/lib/python3.7/dist-packages/torch/distributed/run.py", line 756, in run )(*cmd_args) File "/usr/local/lib/python3.7/dist-packages/torch/distributed/launcher/api.py", line 132, in __call__ return launch_agent(self._config, self._entrypoint, list(args)) File "/usr/local/lib/python3.7/dist-packages/torch/distributed/launcher/api.py", line 248, in launch_agent failures=result.failures, torch.distributed.elastic.multiprocessing.errors.ChildFailedError: ============================================================ train.py FAILED ------------------------------------------------------------ Failures: ------------------------------------------------------------ Root Cause (first observed failure): [0]: time : 2023-06-06_07:00:57 host : ee11o7smse510-0 rank : 0 (local_rank: 0) exitcode : 1 (pid: 2460) error_file: traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html ``` I looked at the ```config.py``` file and found that ```DataArguments``` was missing the ```dataset_config_name``` statement, I wrote a ```test.py``` for testing, After I added ```dataset_config_name```, the test passed. ``` import logging import math import os import random import sys from itertools import chain import datasets import torch import transformers from config import DataArguments, ModelArguments, TrainingArguments from datasets import load_dataset from dialogues import get_dialogue_template, mask_user_labels, prepare_dialogue from transformers import (AutoModelForCausalLM, AutoTokenizer, Trainer, default_data_collator, set_seed) from transformers.testing_utils import CaptureLogger from transformers.trainer_utils import get_last_checkpoint from utils import StarChatArgumentParser, hf_login if __name__ == '__main__': parser = StarChatArgumentParser((ModelArguments, DataArguments, TrainingArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".yaml"): # If we pass only one argument to the script and it's the path to a YAML file, # let's parse it to get our arguments. model_args, data_args, training_args = parser.parse_yaml_file(os.path.abspath(sys.argv[1])) # parse command line args and yaml file elif len(sys.argv) > 2 and sys.argv[1].endswith(".yaml"): model_args, data_args, training_args = parser.parse_yaml_and_args(os.path.abspath(sys.argv[1]), sys.argv[2:]) # parse command line args only else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() kwargs = {} if data_args.dataset_name is not None: kwargs["dataset_tags"] = data_args.dataset_name if data_args.dataset_config_name is not None: kwargs["dataset_args"] = data_args.dataset_config_name kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}" else: kwargs["dataset"] = data_args.dataset_name kwargs["dataset_args"] = "default" print(data_args.dataset_config_name is not None) print(kwargs) ``` --- chat/config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/chat/config.py b/chat/config.py index fcdc569..390d2ac 100644 --- a/chat/config.py +++ b/chat/config.py @@ -106,6 +106,9 @@ class DataArguments: "help": "The name of the dialogue template to use for conditioning the model. See h4.training.dialogues for choices." }, ) + dataset_config_name: Optional[str] = field( + default=None + ) @dataclass