@@ -174,38 +174,26 @@ def __init__(self, deterministic: bool = False):
174174 def build_tagger (self ):
175175 """
176176 Finite state transducer for classifying date, e.g.
177- jan. 5, 2012 -> date { month: "january" day: "five" year: ", twenty twelve" preserve_order: "true" }
178- jan. 5 -> date { month: "january" day: "five" preserve_order: "true" }
179- 5 january 2012 -> date { day: "five" month: "january" year: "twenty twelve" preserve_order: "true" }
177+ jan. 5, 2012 -> date { month: "january" day: "five" year: ", twenty twelve" }
178+ jan. 5 -> date { month: "january" day: "five" }
179+ 5 january 2012 -> date { day: "five" month: "january" year: "twenty twelve" }
180180 2012-01-05 -> date { year: "twenty twelve" month: "january" day: "five" }
181181 2012.01.05 -> date { year: "twenty twelve" month: "january" day: "five" }
182182 2012/01/05 -> date { year: "twenty twelve" month: "january" day: "five" }
183183 2012 -> date { year: "twenty twelve" }
184184 """
185185 cardinal = Cardinal (self .deterministic )
186- # january
186+ # january, January, JANUARY
187187 month_graph = pynini .string_file (
188- get_abs_path ("english/data/date/month_name.tsv" )).optimize ()
189- # January, JANUARY
190- month_graph |= pynini .compose (
191- self .TO_LOWER + pynini .closure (self .VCHAR ),
192- month_graph ) | pynini .compose (self .TO_LOWER ** (2 , ...), month_graph )
193-
194- # jan
195- month_abbr_graph = pynini .string_file (
196- get_abs_path ("english/data/date/month_abbr.tsv" )).optimize ()
188+ get_abs_path ("english/data/date/month_name.tsv" ))
197189 # jan, Jan, JAN
198- month_abbr_graph = (
199- month_abbr_graph
200- | pynini .compose (self .TO_LOWER + pynini .closure (self .LOWER , 1 ),
201- month_abbr_graph ).optimize ()
202- | pynini .compose (self .TO_LOWER ** (2 , ...),
203- month_abbr_graph ).optimize ()) + pynini .closure (
204- pynutil .delete ("." ), 0 , 1 )
205- month_graph |= month_abbr_graph .optimize ()
190+ month_abbr_graph = pynini .string_file (
191+ get_abs_path ("english/data/date/month_abbr.tsv" ))
192+ month_graph |= month_abbr_graph
193+ month_graph += pynutil .delete (self .PUNCT ).ques
206194
207195 month_numbers_labels = pynini .string_file (
208- get_abs_path ("english/data/date/month_number.tsv" )). optimize ()
196+ get_abs_path ("english/data/date/month_number.tsv" ))
209197 cardinal_graph = cardinal .graph_hundred_component_at_least_one_none_zero_digit
210198
211199 year_graph = _get_year_graph (cardinal_graph = cardinal_graph ,
@@ -235,13 +223,14 @@ def build_tagger(self):
235223 cardinal_graph = cardinal_graph ,
236224 single_digits_graph = cardinal .single_digits_graph )
237225 two_digit_year = pynutil .insert (
238- "year: \" " ) + two_digit_year + pynutil .insert ("\" " )
226+ "year: \" " ) + two_digit_year + self .PUNCT .ques + pynutil .insert (
227+ "\" " )
239228
240229 graph_year = pynutil .insert (" year: \" " ) + pynutil .delete (
241- " " ) + year_graph + pynutil .insert ("\" " )
230+ " " ) + year_graph + self . PUNCT . ques + pynutil .insert ("\" " )
242231 graph_year |= (pynutil .insert (" year: \" " ) + pynini .accep ("," ) +
243232 pynini .closure (pynini .accep (" " ), 0 , 1 ) + year_graph +
244- pynutil .insert ("\" " ))
233+ self . PUNCT . ques + pynutil .insert ("\" " ))
245234 optional_graph_year = pynini .closure (graph_year , 0 , 1 )
246235
247236 year_graph = pynutil .insert ("year: \" " ) + year_graph + pynutil .insert (
@@ -252,134 +241,53 @@ def build_tagger(self):
252241 | (pynini .accep (" " ) + day_graph )
253242 | graph_year
254243 | (self .DELETE_EXTRA_SPACE + day_graph + graph_year ))
255-
256244 graph_mdy |= (month_graph + pynini .cross ("-" , " " ) + day_graph +
257245 pynini .closure (
258246 ((pynini .cross ("-" , " " ) +
259247 pynini .closure (self .VCHAR )) @ graph_year ), 0 , 1 ))
260-
261248 for x in ["-" , "/" , "." ]:
262249 delete_sep = pynutil .delete (x )
263250 graph_mdy |= (month_numbers_graph + delete_sep +
264251 self .INSERT_SPACE +
265252 pynini .closure (pynutil .delete ("0" ), 0 , 1 ) +
266253 day_graph + delete_sep + self .INSERT_SPACE +
267- (year_graph | two_digit_year ))
254+ (pynutil . add_weight ( year_graph , - 1.0 ) ))
268255
269- graph_dmy = day_graph + self .DELETE_EXTRA_SPACE + month_graph + optional_graph_year
256+ graph_dmy = day_graph + self .DELETE_EXTRA_SPACE + self . INSERT_SPACE + month_graph + optional_graph_year
270257 day_ex_month = (self .DIGIT ** 2 - pynini .project (month_numbers_graph ,
271258 "input" )) @ day_graph
272259 for x in ["-" , "/" , "." ]:
273260 delete_sep = pynutil .delete (x )
274261 graph_dmy |= (day_ex_month + delete_sep + self .INSERT_SPACE +
275262 month_numbers_graph + delete_sep +
276- self .INSERT_SPACE + (year_graph | two_digit_year ))
263+ self .INSERT_SPACE +
264+ (pynutil .add_weight (year_graph , - 1.0 )))
277265
278- graph_ymd = pynini . accep ( "" )
266+ graph_ymd = year_graph + self . DELETE_EXTRA_SPACE + self . INSERT_SPACE + month_graph + self . DELETE_EXTRA_SPACE + self . INSERT_SPACE + day_graph
279267 for x in ["-" , "/" , "." ]:
280268 delete_sep = pynutil .delete (x )
281- graph_ymd |= ((year_graph | two_digit_year ) + delete_sep +
269+ graph_ymd |= ((pynutil . add_weight ( year_graph , - 1.0 ) ) + delete_sep +
282270 self .INSERT_SPACE + month_numbers_graph +
283271 delete_sep + self .INSERT_SPACE +
284272 pynini .closure (pynutil .delete ("0" ), 0 , 1 ) +
285273 day_graph )
286274
287- final_graph = graph_mdy | graph_dmy
288-
289- if not self .deterministic :
290- final_graph += pynini .closure (
291- pynutil .insert (" preserve_order: \" true\" " ), 0 , 1 )
292- m_sep_d = (month_numbers_graph +
293- pynutil .delete (pynini .union ("-" , "/" )) +
294- self .INSERT_SPACE +
295- pynini .closure (pynutil .delete ("0" ), 0 , 1 ) + day_graph )
296- final_graph |= m_sep_d
297- else :
298- final_graph += pynutil .insert (" preserve_order: \" true\" " )
275+ final_graph = pynutil .add_weight (graph_mdy | graph_dmy | graph_ymd ,
276+ - 0.1 ) | year_graph
299277
300278 period_fy = pynutil .insert (
301279 "text: \" " ) + _get_financial_period_graph () + pynutil .insert ("\" " )
302280 graph_fy = period_fy + self .INSERT_SPACE + two_digit_year
303281
304- final_graph |= graph_ymd | year_graph | graph_fy
305-
306- ymd_to_mdy_graph = None
307- ymd_to_dmy_graph = None
308- mdy_to_dmy_graph = None
309- md_to_dm_graph = None
310-
311- for month in [
312- x [0 ] for x in load_labels (
313- get_abs_path ("english/data/date/month_name.tsv" ))
314- ]:
315- for day in [
316- x [0 ] for x in load_labels (
317- get_abs_path ("english/data/date/day.tsv" ))
318- ]:
319- ymd_to_mdy_curr = (pynutil .insert ("month: \" " + month +
320- "\" day: \" " + day + "\" " ) +
321- pynini .accep ('year:' ) +
322- pynini .closure (self .VCHAR ) +
323- pynutil .delete (" month: \" " + month +
324- "\" day: \" " + day + "\" " ))
325-
326- # YY-MM-DD -> MM-DD-YY
327- ymd_to_mdy_curr = pynini .compose (graph_ymd , ymd_to_mdy_curr )
328- ymd_to_mdy_graph = (ymd_to_mdy_curr if
329- ymd_to_mdy_graph is None else pynini .union (
330- ymd_to_mdy_curr , ymd_to_mdy_graph ))
331-
332- ymd_to_dmy_curr = (
333- pynutil .insert ("day: \" " + day + "\" month: \" " + month +
334- "\" " ) + pynini .accep ('year:' ) +
335- pynini .closure (self .VCHAR ) +
336- pynutil .delete (" month: \" " + month + "\" day: \" " + day +
337- "\" " ))
338-
339- # YY-MM-DD -> MM-DD-YY
340- ymd_to_dmy_curr = pynini .compose (graph_ymd ,
341- ymd_to_dmy_curr ).optimize ()
342- ymd_to_dmy_graph = (ymd_to_dmy_curr if
343- ymd_to_dmy_graph is None else pynini .union (
344- ymd_to_dmy_curr , ymd_to_dmy_graph ))
345-
346- mdy_to_dmy_curr = (
347- pynutil .insert ("day: \" " + day + "\" month: \" " + month +
348- "\" " ) +
349- pynutil .delete ("month: \" " + month + "\" day: \" " + day +
350- "\" " ) + pynini .accep ('year:' ) +
351- pynini .closure (self .VCHAR )).optimize ()
352- # MM-DD-YY -> verbalize as MM-DD-YY (February fourth 1991) or DD-MM-YY (the fourth of February 1991)
353- mdy_to_dmy_curr = pynini .compose (graph_mdy ,
354- mdy_to_dmy_curr ).optimize ()
355- mdy_to_dmy_graph = (
356- mdy_to_dmy_curr if mdy_to_dmy_graph is None else
357- pynini .union (mdy_to_dmy_curr ,
358- mdy_to_dmy_graph ).optimize ()).optimize ()
359-
360- md_to_dm_curr = pynutil .insert (
361- "day: \" " + day + "\" month: \" " + month +
362- "\" " ) + pynutil .delete ("month: \" " + month + "\" day: \" " +
363- day + "\" " )
364- md_to_dm_curr = pynini .compose (m_sep_d ,
365- md_to_dm_curr ).optimize ()
366-
367- md_to_dm_graph = (
368- md_to_dm_curr if md_to_dm_graph is None else pynini .union (
369- md_to_dm_curr , md_to_dm_graph ).optimize ()).optimize ()
370-
371- if not self .deterministic :
372- final_graph |= pynutil .add_weight (
373- mdy_to_dmy_graph | md_to_dm_graph | ymd_to_dmy_graph , - 0.1 )
374-
375- final_graph = self .add_tokens (final_graph )
376- self .tagger = final_graph .optimize ()
282+ final_graph |= graph_fy
283+
284+ self .tagger = self .add_tokens (final_graph )
377285
378286 def build_verbalizer (self ):
379287 """
380288 Finite state transducer for verbalizing date, e.g.
381- date { month: "february" day: "five" year: "twenty twelve" preserve_order: "true" } -> february fifth twenty twelve
382- date { day: "five" month: "february" year: "twenty twelve" preserve_order: "true" } -> the fifth of february twenty twelve
289+ date { month: "february" day: "five" year: "twenty twelve" } -> the fifth of february twenty twelve
290+ date { day: "five" month: "february" year: "twenty twelve" } -> the fifth of february twenty twelve
383291 """
384292 ordinal = Ordinal (self .deterministic )
385293 phrase = pynini .closure (self .NOT_QUOTE , 1 )
@@ -401,31 +309,10 @@ def build_verbalizer(self):
401309 graph_fy = (pynutil .insert ("the " ) + period + pynutil .insert (" of" ) +
402310 pynini .closure (self .DELETE_EXTRA_SPACE + year , 0 , 1 ))
403311
404- # month (day) year
405- graph_mdy = (month +
406- pynini .closure (self .DELETE_EXTRA_SPACE + day , 0 , 1 ) +
407- pynini .closure (self .DELETE_EXTRA_SPACE + year , 0 , 1 ))
408- # may 5 -> may five
409- if not self .deterministic :
410- graph_mdy |= (
411- month +
412- pynini .closure (self .DELETE_EXTRA_SPACE + day_cardinal , 0 , 1 ) +
413- pynini .closure (self .DELETE_EXTRA_SPACE + year , 0 , 1 ))
414-
415312 # day month year
416313 graph_dmy = (pynutil .insert ("the " ) + day + self .DELETE_EXTRA_SPACE +
417314 pynutil .insert ("of " ) + month +
418315 pynini .closure (self .DELETE_EXTRA_SPACE + year , 0 , 1 ))
419316
420- optional_preserve_order = pynini .closure (
421- pynutil .delete ("preserve_order:" ) + self .DELETE_SPACE +
422- pynutil .delete ("\" true\" " ) + self .DELETE_SPACE
423- | pynutil .delete ("field_order:" ) + self .DELETE_SPACE +
424- pynutil .delete ("\" " ) + self .NOT_QUOTE + pynutil .delete ("\" " ) +
425- self .DELETE_SPACE )
426-
427- final_graph = (
428- (graph_dmy | pynutil .add_weight (graph_mdy , 0.0001 ) | year
429- | graph_fy ) + self .DELETE_SPACE + optional_preserve_order ) # noqa
430- delete_tokens = self .delete_tokens (final_graph )
431- self .verbalizer = delete_tokens .optimize ()
317+ final_graph = ((graph_dmy | year | graph_fy ) + self .DELETE_SPACE )
318+ self .verbalizer = self .delete_tokens (final_graph )
0 commit comments