Merge pull request karpathy#578 from devin-open-source/devin/1733728337-fix-warmup-lr

karpathy · web-flow · commit 93a43d9a5c22 · 2024-12-09T15:53:04.000-08:00
fix: ensure non-zero learning rate during warmup at iteration 0
diff --git a/train.py b/train.py
@@ -231,7 +231,7 @@ def estimate_loss():
 def get_lr(it):
     # 1) linear warmup for warmup_iters steps
     if it < warmup_iters:
-        return learning_rate * it / warmup_iters
+        return learning_rate * (it + 1) / (warmup_iters + 1)
     # 2) if it > lr_decay_iters, return min learning rate
     if it > lr_decay_iters:
         return min_lr