File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 4444 "vocab_size" : 50257
4545}
4646
47- import math
48- from torch import Tensor
49-
5047
5148# from transformers
5249class Conv1D (nn .Module ):
@@ -74,23 +71,12 @@ def forward(self, x):
7471 return x
7572
7673
77- # from transformers
78- class NewGELUActivation (nn .Module ):
79- """
80- Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
81- the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
82- """
83-
84- def forward (self , input : Tensor ) -> Tensor :
85- return 0.5 * input * (1.0 + torch .tanh (math .sqrt (2.0 / math .pi ) * (input + 0.044715 * torch .pow (input , 3.0 ))))
86-
87-
8874class HeadFFN (nn .Module ): # todo rename
8975 def __init__ (self , dim ):
9076 super ().__init__ ()
9177 self .c_fc = Conv1D (dim , config ['n_embd' ])
9278 self .c_proj = Conv1D (config ['n_embd' ], dim )
93- self .act = NewGELUActivation ()
79+ self .act = nn . functional . gelu
9480 self .dropout = nn .Dropout (config ['resid_pdrop' ])
9581
9682 def forward (self , hidden_states ):
You can’t perform that action at this time.
0 commit comments