@@ -231,7 +231,7 @@ static int is_rfc822_special(char ch)
231231 }
232232}
233233
234- static int has_rfc822_specials (const char * s , int len )
234+ static int needs_rfc822_quoting (const char * s , int len )
235235{
236236 int i ;
237237 for (i = 0 ; i < len ; i ++ )
@@ -240,6 +240,17 @@ static int has_rfc822_specials(const char *s, int len)
240240 return 0 ;
241241}
242242
243+ static int last_line_length (struct strbuf * sb )
244+ {
245+ int i ;
246+
247+ /* How many bytes are already used on the last line? */
248+ for (i = sb -> len - 1 ; i >= 0 ; i -- )
249+ if (sb -> buf [i ] == '\n' )
250+ break ;
251+ return sb -> len - (i + 1 );
252+ }
253+
243254static void add_rfc822_quoted (struct strbuf * out , const char * s , int len )
244255{
245256 int i ;
@@ -261,57 +272,110 @@ static void add_rfc822_quoted(struct strbuf *out, const char *s, int len)
261272 strbuf_addch (out , '"' );
262273}
263274
264- static int is_rfc2047_special (char ch )
275+ enum rfc2047_type {
276+ RFC2047_SUBJECT ,
277+ RFC2047_ADDRESS ,
278+ };
279+
280+ static int is_rfc2047_special (char ch , enum rfc2047_type type )
265281{
266- return (non_ascii (ch ) || (ch == '=' ) || (ch == '?' ) || (ch == '_' ));
282+ /*
283+ * rfc2047, section 4.2:
284+ *
285+ * 8-bit values which correspond to printable ASCII characters other
286+ * than "=", "?", and "_" (underscore), MAY be represented as those
287+ * characters. (But see section 5 for restrictions.) In
288+ * particular, SPACE and TAB MUST NOT be represented as themselves
289+ * within encoded words.
290+ */
291+
292+ /*
293+ * rule out non-ASCII characters and non-printable characters (the
294+ * non-ASCII check should be redundant as isprint() is not localized
295+ * and only knows about ASCII, but be defensive about that)
296+ */
297+ if (non_ascii (ch ) || !isprint (ch ))
298+ return 1 ;
299+
300+ /*
301+ * rule out special printable characters (' ' should be the only
302+ * whitespace character considered printable, but be defensive and use
303+ * isspace())
304+ */
305+ if (isspace (ch ) || ch == '=' || ch == '?' || ch == '_' )
306+ return 1 ;
307+
308+ /*
309+ * rfc2047, section 5.3:
310+ *
311+ * As a replacement for a 'word' entity within a 'phrase', for example,
312+ * one that precedes an address in a From, To, or Cc header. The ABNF
313+ * definition for 'phrase' from RFC 822 thus becomes:
314+ *
315+ * phrase = 1*( encoded-word / word )
316+ *
317+ * In this case the set of characters that may be used in a "Q"-encoded
318+ * 'encoded-word' is restricted to: <upper and lower case ASCII
319+ * letters, decimal digits, "!", "*", "+", "-", "/", "=", and "_"
320+ * (underscore, ASCII 95.)>. An 'encoded-word' that appears within a
321+ * 'phrase' MUST be separated from any adjacent 'word', 'text' or
322+ * 'special' by 'linear-white-space'.
323+ */
324+
325+ if (type != RFC2047_ADDRESS )
326+ return 0 ;
327+
328+ /* '=' and '_' are special cases and have been checked above */
329+ return !(isalnum (ch ) || ch == '!' || ch == '*' || ch == '+' || ch == '-' || ch == '/' );
267330}
268331
269- static void add_rfc2047 ( struct strbuf * sb , const char * line , int len ,
270- const char * encoding )
332+ static int needs_rfc2047_encoding ( const char * line , int len ,
333+ enum rfc2047_type type )
271334{
272- static const int max_length = 78 ; /* per rfc2822 */
273335 int i ;
274- int line_len ;
275-
276- /* How many bytes are already used on the current line? */
277- for (i = sb -> len - 1 ; i >= 0 ; i -- )
278- if (sb -> buf [i ] == '\n' )
279- break ;
280- line_len = sb -> len - (i + 1 );
281336
282337 for (i = 0 ; i < len ; i ++ ) {
283338 int ch = line [i ];
284339 if (non_ascii (ch ) || ch == '\n' )
285- goto needquote ;
340+ return 1 ;
286341 if ((i + 1 < len ) && (ch == '=' && line [i + 1 ] == '?' ))
287- goto needquote ;
342+ return 1 ;
288343 }
289- strbuf_add_wrapped_bytes (sb , line , len , 0 , 1 , max_length - line_len );
290- return ;
291344
292- needquote :
345+ return 0 ;
346+ }
347+
348+ static void add_rfc2047 (struct strbuf * sb , const char * line , int len ,
349+ const char * encoding , enum rfc2047_type type )
350+ {
351+ static const int max_encoded_length = 76 ; /* per rfc2047 */
352+ int i ;
353+ int line_len = last_line_length (sb );
354+
293355 strbuf_grow (sb , len * 3 + strlen (encoding ) + 100 );
294356 strbuf_addf (sb , "=?%s?q?" , encoding );
295357 line_len += strlen (encoding ) + 5 ; /* 5 for =??q? */
296358 for (i = 0 ; i < len ; i ++ ) {
297359 unsigned ch = line [i ] & 0xFF ;
360+ int is_special = is_rfc2047_special (ch , type );
361+
362+ /*
363+ * According to RFC 2047, we could encode the special character
364+ * ' ' (space) with '_' (underscore) for readability. But many
365+ * programs do not understand this and just leave the
366+ * underscore in place. Thus, we do nothing special here, which
367+ * causes ' ' to be encoded as '=20', avoiding this problem.
368+ */
298369
299- if (line_len >= max_length - 2 ) {
370+ if (line_len + 2 + ( is_special ? 3 : 1 ) > max_encoded_length ) {
300371 strbuf_addf (sb , "?=\n =?%s?q?" , encoding );
301372 line_len = strlen (encoding ) + 5 + 1 ; /* =??q? plus SP */
302373 }
303374
304- /*
305- * We encode ' ' using '=20' even though rfc2047
306- * allows using '_' for readability. Unfortunately,
307- * many programs do not understand this and just
308- * leave the underscore in place.
309- */
310- if (is_rfc2047_special (ch ) || ch == ' ' || ch == '\n' ) {
375+ if (is_special ) {
311376 strbuf_addf (sb , "=%02X" , ch );
312377 line_len += 3 ;
313- }
314- else {
378+ } else {
315379 strbuf_addch (sb , ch );
316380 line_len ++ ;
317381 }
@@ -323,6 +387,7 @@ void pp_user_info(const struct pretty_print_context *pp,
323387 const char * what , struct strbuf * sb ,
324388 const char * line , const char * encoding )
325389{
390+ int max_length = 78 ; /* per rfc2822 */
326391 char * date ;
327392 int namelen ;
328393 unsigned long time ;
@@ -340,25 +405,27 @@ void pp_user_info(const struct pretty_print_context *pp,
340405 if (pp -> fmt == CMIT_FMT_EMAIL ) {
341406 char * name_tail = strchr (line , '<' );
342407 int display_name_length ;
343- int final_line ;
344408 if (!name_tail )
345409 return ;
346410 while (line < name_tail && isspace (name_tail [-1 ]))
347411 name_tail -- ;
348412 display_name_length = name_tail - line ;
349413 strbuf_addstr (sb , "From: " );
350- if (!has_rfc822_specials (line , display_name_length )) {
351- add_rfc2047 (sb , line , display_name_length , encoding );
352- } else {
414+ if (needs_rfc2047_encoding (line , display_name_length , RFC2047_ADDRESS )) {
415+ add_rfc2047 (sb , line , display_name_length ,
416+ encoding , RFC2047_ADDRESS );
417+ max_length = 76 ; /* per rfc2047 */
418+ } else if (needs_rfc822_quoting (line , display_name_length )) {
353419 struct strbuf quoted = STRBUF_INIT ;
354420 add_rfc822_quoted (& quoted , line , display_name_length );
355- add_rfc2047 (sb , quoted .buf , quoted .len , encoding );
421+ strbuf_add_wrapped_bytes (sb , quoted .buf , quoted .len ,
422+ -6 , 1 , max_length );
356423 strbuf_release (& quoted );
424+ } else {
425+ strbuf_add_wrapped_bytes (sb , line , display_name_length ,
426+ -6 , 1 , max_length );
357427 }
358- for (final_line = 0 ; final_line < sb -> len ; final_line ++ )
359- if (sb -> buf [sb -> len - final_line - 1 ] == '\n' )
360- break ;
361- if (namelen - display_name_length + final_line > 78 ) {
428+ if (namelen - display_name_length + last_line_length (sb ) > max_length ) {
362429 strbuf_addch (sb , '\n' );
363430 if (!isspace (name_tail [0 ]))
364431 strbuf_addch (sb , ' ' );
@@ -1278,6 +1345,7 @@ void pp_title_line(const struct pretty_print_context *pp,
12781345 const char * encoding ,
12791346 int need_8bit_cte )
12801347{
1348+ static const int max_length = 78 ; /* per rfc2047 */
12811349 struct strbuf title ;
12821350
12831351 strbuf_init (& title , 80 );
@@ -1287,7 +1355,12 @@ void pp_title_line(const struct pretty_print_context *pp,
12871355 strbuf_grow (sb , title .len + 1024 );
12881356 if (pp -> subject ) {
12891357 strbuf_addstr (sb , pp -> subject );
1290- add_rfc2047 (sb , title .buf , title .len , encoding );
1358+ if (needs_rfc2047_encoding (title .buf , title .len , RFC2047_SUBJECT ))
1359+ add_rfc2047 (sb , title .buf , title .len ,
1360+ encoding , RFC2047_SUBJECT );
1361+ else
1362+ strbuf_add_wrapped_bytes (sb , title .buf , title .len ,
1363+ - last_line_length (sb ), 1 , max_length );
12911364 } else {
12921365 strbuf_addbuf (sb , & title );
12931366 }
0 commit comments