1+ <!DOCTYPE html>
2+ < html lang ="zh ">
3+ < head >
4+ < meta http-equiv ="content-type " content ="text/html;charset=utf-8 "/>
5+ < meta name ="viewport " content ="width=device-width, initial-scale=1.0 "/>
6+ < meta name ="description " content =""/>
7+
8+ < meta name ="twitter:card " content ="summary "/>
9+ < meta name ="twitter:image:src " content ="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4 "/>
10+ < meta name ="twitter:title " content ="configs.py "/>
11+ < meta name ="twitter:description " content =""/>
12+ < meta name ="twitter:site " content ="@labmlai "/>
13+ < meta name ="twitter:creator " content ="@labmlai "/>
14+
15+ < meta property ="og:url " content ="https://nn.labml.ai/RWKV/configs.html "/>
16+ < meta property ="og:title " content ="configs.py "/>
17+ < meta property ="og:image " content ="https://avatars1.githubusercontent.com/u/64068543?s=400&v=4 "/>
18+ < meta property ="og:site_name " content ="configs.py "/>
19+ < meta property ="og:type " content ="object "/>
20+ < meta property ="og:title " content ="configs.py "/>
21+ < meta property ="og:description " content =""/>
22+
23+ < title > configs.py</ title >
24+ < link rel ="shortcut icon " href ="/icon.png "/>
25+ < link rel ="stylesheet " href ="../pylit.css?v=1 ">
26+ < link rel ="canonical " href ="https://nn.labml.ai/RWKV/configs.html "/>
27+ < link rel ="stylesheet " href ="https://cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.css " integrity ="sha384-zTROYFVGOfTw7JV7KUu8udsvW2fx4lWOsCEDqhBreBwlHI4ioVRtmIvEThzJHGET " crossorigin ="anonymous ">
28+
29+ <!-- Global site tag (gtag.js) - Google Analytics -->
30+ < script async src ="https://www.googletagmanager.com/gtag/js?id=G-4V3HC8HBLH "> </ script >
31+ < script >
32+ window . dataLayer = window . dataLayer || [ ] ;
33+
34+ function gtag ( ) {
35+ dataLayer . push ( arguments ) ;
36+ }
37+
38+ gtag ( 'js' , new Date ( ) ) ;
39+
40+ gtag ( 'config' , 'G-4V3HC8HBLH' ) ;
41+ </ script >
42+ </ head >
43+ < body >
44+ < div id ='container '>
45+ < div id ="background "> </ div >
46+ < div class ='section '>
47+ < div class ='docs '>
48+ < p >
49+ < a class ="parent " href ="/ "> home</ a >
50+ < a class ="parent " href ="index.html "> RWKV</ a >
51+ </ p >
52+ < p >
53+ < a href ="https://github.com/labmlai/annotated_deep_learning_paper_implementations " target ="_blank ">
54+ < img alt ="Github "
55+ src ="https://img.shields.io/github/stars/labmlai/annotated_deep_learning_paper_implementations?style=social "
56+ style ="max-width:100%; "/> </ a >
57+ < a href ="https://twitter.com/labmlai " rel ="nofollow " target ="_blank ">
58+ < img alt ="Twitter "
59+ src ="https://img.shields.io/twitter/follow/labmlai?style=social "
60+ style ="max-width:100%; "/> </ a >
61+ </ p >
62+ < p >
63+ < a href ="https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/RWKV/configs.py " target ="_blank ">
64+ View code on Github</ a >
65+ </ p >
66+ </ div >
67+ </ div >
68+ < div class ='section ' id ='section-0 '>
69+ < div class ='docs '>
70+ < div class ='section-link '>
71+ < a href ='#section-0 '> #</ a >
72+ </ div >
73+
74+ </ div >
75+ < div class ='code '>
76+ < div class ="highlight "> < pre > < span class ="lineno "> 1</ span > < span > </ span > < span class ="kn "> from</ span > < span class ="nn "> labml.configs</ span > < span class ="kn "> import</ span > < span class ="n "> BaseConfigs</ span > </ pre > </ div >
77+ </ div >
78+ </ div >
79+ < div class ='section ' id ='section-1 '>
80+ < div class ='docs doc-strings '>
81+ < div class ='section-link '>
82+ < a href ='#section-1 '> #</ a >
83+ </ div >
84+ < h2 > Transformer Configurations</ h2 >
85+ < p > This defines configurations for a transformer. The configurations are calculate using option functions. These are lazy loaded and therefore only the necessary modules are calculated.</ p >
86+
87+ </ div >
88+ < div class ='code '>
89+ < div class ="highlight "> < pre > < span class ="lineno "> 4</ span > < span class ="k "> class</ span > < span class ="nc "> RWKVConfigs</ span > < span class ="p "> (</ span > < span class ="n "> BaseConfigs</ span > < span class ="p "> ):</ span > </ pre > </ div >
90+ </ div >
91+ </ div >
92+ < div class ='section ' id ='section-2 '>
93+ < div class ='docs '>
94+ < div class ='section-link '>
95+ < a href ='#section-2 '> #</ a >
96+ </ div >
97+ < p > Number of attention heads </ p >
98+
99+ </ div >
100+ < div class ='code '>
101+ < div class ="highlight "> < pre > < span class ="lineno "> 14</ span > < span class ="n "> n_heads</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > < span class ="o "> =</ span > < span class ="mi "> 8</ span > </ pre > </ div >
102+ </ div >
103+ </ div >
104+ < div class ='section ' id ='section-3 '>
105+ < div class ='docs '>
106+ < div class ='section-link '>
107+ < a href ='#section-3 '> #</ a >
108+ </ div >
109+ < p > Transformer embedding size </ p >
110+
111+ </ div >
112+ < div class ='code '>
113+ < div class ="highlight "> < pre > < span class ="lineno "> 16</ span > < span class ="n "> d_model</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > < span class ="o "> =</ span > < span class ="mi "> 512</ span > </ pre > </ div >
114+ </ div >
115+ </ div >
116+ < div class ='section ' id ='section-4 '>
117+ < div class ='docs '>
118+ < div class ='section-link '>
119+ < a href ='#section-4 '> #</ a >
120+ </ div >
121+ < p > Number of layers </ p >
122+
123+ </ div >
124+ < div class ='code '>
125+ < div class ="highlight "> < pre > < span class ="lineno "> 18</ span > < span class ="n "> n_layers</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > < span class ="o "> =</ span > < span class ="mi "> 6</ span > </ pre > </ div >
126+ </ div >
127+ </ div >
128+ < div class ='section ' id ='section-5 '>
129+ < div class ='docs '>
130+ < div class ='section-link '>
131+ < a href ='#section-5 '> #</ a >
132+ </ div >
133+ < p > Dropout probability </ p >
134+
135+ </ div >
136+ < div class ='code '>
137+ < div class ="highlight "> < pre > < span class ="lineno "> 20</ span > < span class ="n "> dropout</ span > < span class ="p "> :</ span > < span class ="nb "> float</ span > < span class ="o "> =</ span > < span class ="mf "> 0.1</ span > </ pre > </ div >
138+ </ div >
139+ </ div >
140+ < div class ='section ' id ='section-6 '>
141+ < div class ='docs '>
142+ < div class ='section-link '>
143+ < a href ='#section-6 '> #</ a >
144+ </ div >
145+ < p > Number of tokens in the source vocabulary (for token embeddings) </ p >
146+
147+ </ div >
148+ < div class ='code '>
149+ < div class ="highlight "> < pre > < span class ="lineno "> 22</ span > < span class ="n "> n_src_vocab</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > </ pre > </ div >
150+ </ div >
151+ </ div >
152+ < div class ='section ' id ='section-7 '>
153+ < div class ='docs '>
154+ < div class ='section-link '>
155+ < a href ='#section-7 '> #</ a >
156+ </ div >
157+ < p > Number of tokens in the target vocabulary (to generate logits for prediction) </ p >
158+
159+ </ div >
160+ < div class ='code '>
161+ < div class ="highlight "> < pre > < span class ="lineno "> 24</ span > < span class ="n "> n_tgt_vocab</ span > < span class ="p "> :</ span > < span class ="nb "> int</ span > </ pre > </ div >
162+ </ div >
163+ </ div >
164+ < div class ='footer '>
165+ < a href ="https://papers.labml.ai "> Trending Research Papers</ a >
166+ < a href ="https://labml.ai "> labml.ai</ a >
167+ </ div >
168+ </ div >
169+ < script src =../interactive.js?v =1"> </ script >
170+ < script >
171+ function handleImages ( ) {
172+ var images = document . querySelectorAll ( 'p>img' )
173+
174+ for ( var i = 0 ; i < images . length ; ++ i ) {
175+ handleImage ( images [ i ] )
176+ }
177+ }
178+
179+ function handleImage ( img ) {
180+ img . parentElement . style . textAlign = 'center'
181+
182+ var modal = document . createElement ( 'div' )
183+ modal . id = 'modal'
184+
185+ var modalContent = document . createElement ( 'div' )
186+ modal . appendChild ( modalContent )
187+
188+ var modalImage = document . createElement ( 'img' )
189+ modalContent . appendChild ( modalImage )
190+
191+ var span = document . createElement ( 'span' )
192+ span . classList . add ( 'close' )
193+ span . textContent = 'x'
194+ modal . appendChild ( span )
195+
196+ img . onclick = function ( ) {
197+ console . log ( 'clicked' )
198+ document . body . appendChild ( modal )
199+ modalImage . src = img . src
200+ }
201+
202+ span . onclick = function ( ) {
203+ document . body . removeChild ( modal )
204+ }
205+ }
206+
207+ handleImages ( )
208+ </ script >
209+ </ body >
210+ </ html >
0 commit comments