1024, "max_samples": 1000, "overwrite_cache": True, "preprocessing_num_workers": 4, "output_dir": "saves/llama3-8b/lora/sft", "logging_steps": 10, "save_steps": 500, "plot_loss": True, "overwrite_output_dir": True, "per_device_train_batch_size": 1, # "gradient_accumulation_...