num_steps=int(num_epochs*num_train_batches)warmup_steps=int(config.TRAIN.WARMUP_EPOCHS*num_train_batches)warmup_prefix=True# So first step after warmup we're at base LRt_initial=num_steps-warmup_steps# So last step we're at min LR with warmup prefix = Truelr_scheduler=CosineLRSchedu...
logging_steps=1_000,gradient_accumulation_steps=8,num_train_epochs=50,weight_decay=0.1,warmup_steps=5_000,lr_scheduler_type="cosine_with_restarts",# that's actually the only relevant linelearning_rate=5e-4,save_steps