File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -173,10 +173,6 @@ def _init_weights(self, module):
173173 torch .nn .init .zeros_ (module .bias )
174174 elif isinstance (module , nn .Embedding ):
175175 torch .nn .init .normal_ (module .weight , mean = 0.0 , std = 0.02 )
176- elif isinstance (module , (LayerNorm , nn .LayerNorm )):
177- torch .nn .init .ones_ (module .weight )
178- if module .bias is not None :
179- torch .nn .init .zeros_ (module .bias )
180176
181177 def forward (self , idx , targets = None ):
182178 device = idx .device
Original file line number Diff line number Diff line change 5757# adamw optimizer
5858learning_rate = 6e-4 # max learning rate
5959max_iters = 600000 # total number of training iterations
60- weight_decay = 1e-2
60+ weight_decay = 1e-1
6161beta1 = 0.9
6262beta2 = 0.95
6363grad_clip = 1.0 # clip gradients at this value, or disable if == 0.0
You can’t perform that action at this time.
0 commit comments