# To speed up this part, we use multiprocessing. See the documentation of the map method for more information: # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map with training_args.main_process_first(desc="grouping texts together"): if not data_args...
args.distributed_init_method ='tcp://localhost:{port}'.format(port=port) args.distributed_rank =None# set based on device idifmax(args.update_freq) >1andargs.ddp_backend !='no_c10d': print('| NOTE: you may get better performance with: --ddp-backend=no_c10d') torch.multiprocessing....