blog/assets/62_pytorch_fsdp/run_clm_no_trainer.py at add277b...
remove_columns=column_names, load_from_cache_file=not args.overwrite_cache, desc="Running tokenizer on dataset", ) if args.block_size is None: block_size = tokenizer.model_max_length if block_size > 1024: logger