[bug-mailutils] [PATCH] new RFC 2047 encoder

bug-mailutils

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[bug-mailutils] [PATCH] new RFC 2047 encoder

From:	이기동\(Kidong Lee\)
Subject:	[bug-mailutils] [PATCH] new RFC 2047 encoder
Date:	Thu, 28 Apr 2005 13:20:13 +0900 (JST)

Hello,
I rewrote rfc2047_encode() function.

The existing encoder function does not use filter and
stream function, so this function is partially
duplicated with base64/qp encoder in filter_trans.c.

so, I rewrote the function using filter_* and stream_*
functions, removing duplicated code.

This patch is based on version 0.6, not CVS.
(When I compiled source on CVS, error occured at
compiling folder.c)
Please review it and commit to CVS.

Cheers,
Kidong


        

        
                
___________________________________ 
기본 250MB 최대 1GB, 더 이상 용량 고민없는 - 야후! 메일 (http://mail.yahoo.co.kr) 
최신 휴대폰 정보, 벨소리, 캐릭터, 문자메세지 - 야후! 모바일 (http://kr.mobile.yahoo.com) 
대한민국 블로거 다 모여라! - 야후가 지원금 100만원을 
쏩니다(http://kr.ring.yahoo.com/FMAIN/event/event1.html)

--- rfc2047.c.orig      2005-04-26 14:48:26.000000000 +0900
+++ rfc2047.c   2005-04-27 21:06:08.000000000 +0900
@@ -207,413 +207,6 @@
 }
 
 
-
-/* ==================================================
-   RFC 2047 Encoder
-   ================================================== */
-
-#define MAX_QUOTE 75
-
-/* Be more conservative in what we quote than in RFC2045, as in some
-   circumstances, additional symbols (like parenthesis) must be quoted
-   in headers. This is never a problem for the recipient, except for
-   the extra overhead in the message size */
-static int
-must_quote (char c)
-{
-  if (((c >  32) && (c <= 57))  || 
-      ((c >= 64) && (c <= 126)))
-    return 0;
-
-  return 1;
-}
-
-
-/* State of the encoder */
-typedef struct _encoder rfc2047_encoder;
-
-struct _encoder {
-  /* Name of the encoding (either B or Q) */
-  char encoding;
-
-  /* Charset of the input stream */
-  const char * charset;
-
-  /* Compute the size of the next character (in bytes), according to
-     the charset */
-  int (* charcount) (const char *);
-
-  /* Size of the next character (in bytes) */
-  int charblock;
-
-  /* TRUE if we need to open a quoted-word at the next byte */
-  int must_open;
-
-  /* Pointer on the current input byte */
-  const unsigned char * src;
-
-  /* Pointer on the current output byte and on the complete output */
-  char * dst, * result;
-
-  /* todo: number of bytes remaining in the input, done: number of
-     bytes written in the output, quotesize: number of bytes in the
-     current quoted-word */
-  int todo, done, quotesize;
-
-  /* Virtual methods implemented for the encoders:
-      
-      count: return how many bytes would be used by inserting the
-             current input and updates 'charblock'
-      next:  quote the current input byte on the output
-      flush: output any pending byte
-  */
-  int  (* count) (rfc2047_encoder * enc);
-  int  (* next)  (rfc2047_encoder * enc);
-  void (* flush) (rfc2047_encoder * enc);
-
-  /* Extra data for the Base64 encoder */
-  unsigned char buffer [4];
-  int  state;
-};
-
-
-/* --------------------------------------------------
-   Quoted-words building blocks 
-   -------------------------------------------------- */
-
-/* Write the opening of a quoted-word and return the minimum number of
-   bytes it will use */
-static int
-_open_quote (const char * charset,
-           char encoding,
-           char ** dst, int * done)
-{
-  int len = strlen (charset) + 5;
-
-  (* done) += len;
-  
-  if (* dst)
-    {
-      sprintf (* dst, "=?%s?%c?", charset, encoding);
-      (* dst) += len;
-    }
-
-  /* in the initial length of the quote we already count the final ?= */
-  return len + 2;
-}
-
-/* Terminate a quoted-word */
-static void
-_close_quote (char ** dst, int * done)
-{
-  * done += 2;
-  
-  if (* dst)
-    {
-      strcpy (* dst, "?=");
-      (* dst) += 2;
-    }
-}
-
-
-/* Call this function before the beginning of a quoted-word */
-static void
-init_quoted (rfc2047_encoder * enc)
-{
-  enc->must_open = 1;
-}
-
-/* Insert the current byte in the quoted-word (handling maximum
-   quoted-word sizes,...) */
-static void
-insert_quoted (rfc2047_encoder * enc)
-{
-  if (enc->must_open)
-    {
-      enc->must_open = 0;
-
-      /* The quotesize holds the known size of the quoted-word, even
-        if all the bytes have not yet been inserted in the output
-        stream. */
-      enc->quotesize = 
-       _open_quote (enc->charset, enc->encoding, 
-                    & enc->dst, & enc->done) + enc->count (enc);
-    }
-  else 
-    {
-      if (enc->charblock == 0)
-       {
-         /* The quotesize holds the known size of the quoted-word,
-            even if all the bytes have not yet been inserted in the
-            output stream. */
-         enc->quotesize += enc->count (enc);
-         if (enc->quotesize > MAX_QUOTE)
-           {
-             /* Start a new quoted-word */
-             _close_quote (& enc->dst, & enc->done);
-             
-             if (enc->dst) * (enc->dst ++) = ' ';
-             enc->done ++;
-             
-             enc->quotesize = _open_quote (enc->charset, enc->encoding, 
-                                           & enc->dst, & enc->done);
-           }
-       }
-    }
-
-  /* We are ready to process one more byte from the input stream */
-  enc->charblock --;
-  enc->next (enc);
-}
-
-/* Flush the current quoted-word */
-static void
-flush_quoted (rfc2047_encoder * enc)
-{
-  if (enc->must_open) return;
-
-  enc->flush (enc);
-  _close_quote (& enc->dst, & enc->done);
-}
-
-
-/* Insert the current byte unquoted */
-static void
-insert_unquoted (rfc2047_encoder * enc)
-{
-  if (enc->dst) * (enc->dst ++) = * (enc->src);
-  enc->src ++;
-  enc->todo --;
-  enc->done ++;
-}
-
-
-/* Check if the next word will need to be quoted */
-static int
-is_next_quoted (const char * src) 
-{
-  while (isspace (* src)) src ++;
-
-  while (* src) 
-    {
-      if (isspace (* src)) return 0;
-      if (must_quote (* src)) return 1;
-
-      src ++;
-    }
-
-  return 0;
-}
-
-
-/* --------------------------------------------------
-   Known character encodings
-   -------------------------------------------------- */
-
-static int
-ce_single_byte (const char * src)
-{
-  return 1;
-}
-
-static int
-ce_utf_8 (const char * src)
-{
-  unsigned char c = * src;
-
-  if (c <= 0x7F) return 1;
-
-  if (c >= 0xFC) return 6;
-  if (c >= 0xF8) return 5;
-  if (c >= 0xF0) return 4;
-  if (c >= 0xE0) return 3;
-  if (c >= 0xC0) return 2;
-
-  /* otherwise, this is not a first byte (and the UTF-8 is possibly
-     broken), continue with a single byte. */
-  return 1;
-}
-
-
-/* --------------------------------------------------
-   Quoted-printable encoder 
-   -------------------------------------------------- */
-
-static void
-qp_init (rfc2047_encoder * enc)
-{
-  return;
-}
-
-static int
-qp_count (rfc2047_encoder * enc)
-{
-  int len = 0, todo;
-  unsigned const char * curr;
-
-  /* count the size of a complete (multibyte) character */
-  enc->charblock = enc->charcount (enc->src);
-
-  for (todo = 0, curr = enc->src ;
-       todo < enc->charblock && * curr; 
-       todo ++, curr ++)
-    {
-      len += must_quote (* curr) ? 3 : 1;
-    }
-
-  return len;
-}
-
-static const char _hexdigit[16] = "0123456789ABCDEF";
-
-static int
-qp_next (rfc2047_encoder * enc)
-{
-  int done;
-
-  if (* enc->src == '_' || must_quote (* enc->src))
-    {
-      /* special encoding of space as a '_' to increase readability */
-      if (* enc->src == ' ')
-       {
-         if (enc->dst)
-           {
-             * (enc->dst ++) = '_';
-           }
-
-         done = 1;
-       }
-      else {
-       /* default encoding */
-       if (enc->dst)
-         {
-           * (enc->dst ++) = '=';
-           * (enc->dst ++) = _hexdigit [* (enc->src) >> 4];
-           * (enc->dst ++) = _hexdigit [* (enc->src) & 0xF];
-         }
-       
-       done = 3;
-      }
-    }
-  else
-    {
-      if (enc->dst)
-       {
-         * (enc->dst ++) = * enc->src;
-       }
-
-      done = 1;
-    }
-
-  enc->src ++;
-
-  enc->done += done;
-  enc->todo --;
-
-  return done;
-}
-
-static void
-qp_flush (rfc2047_encoder * enc)
-{
-  return;
-}
-
-
-/* --------------------------------------------------
-   Base64 encoder 
-   -------------------------------------------------- */
-
-const char *b64 =
-"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-
-static void
-base64_init (rfc2047_encoder * enc)
-{
-  enc->state = 0;
-  return;
-}
-
-static int
-base64_count (rfc2047_encoder * enc)
-{
-  int len = 0, todo;
-
-  /* Check the size of a complete (multibyte) character */
-  enc->charblock = enc->charcount (enc->src);
-
-  for (todo = 0 ; todo < enc->charblock; todo ++)
-    {
-      /* Count the size of the encoded block only once, at the first
-        byte transmitted. */
-      len += ((enc->state + todo) % 3 == 0) ? 4 : 0;
-    }
-
-  return len;
-}
-
-static int
-base64_next (rfc2047_encoder * enc)
-{
-  enc->buffer [enc->state ++] = * (enc->src ++);
-  enc->todo --;
-
-  if (enc->state < 3) return 0;
-  
-  /* We have a full quantum */
-  if (enc->dst)
-    {
-      * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
-      * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4) | (enc->buffer[1] 
>> 4)];
-      * (enc->dst ++) = b64 [((enc->buffer[1] & 0xF) << 2) | (enc->buffer[2] 
>> 6)];
-      * (enc->dst ++) = b64 [(enc->buffer[2] & 0x3F)];
-    }
-  
-  enc->done += 4;
-  
-  enc->state = 0;
-  return 4;
-}
-
-static void
-base64_flush (rfc2047_encoder * enc)
-{
-  if (enc->state == 0) return;
-
-  if (enc->dst) 
-    {
-      switch (enc->state)
-       {
-       case 1:
-         * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
-         * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4)];
-         * (enc->dst ++) = '=';
-         * (enc->dst ++) = '=';
-         break;
-       
-       case 2:
-         * (enc->dst ++) = b64 [(enc->buffer[0] >> 2)];
-         * (enc->dst ++) = b64 [((enc->buffer[0] & 0x3) << 4) | 
(enc->buffer[1] >> 4)];
-         * (enc->dst ++) = b64 [((enc->buffer[1] & 0xF) << 2)];
-         * (enc->dst ++) = '=';
-         break;
-       }
-    }
-
-  enc->done += 4;
-  enc->state = 0;
-  return;
-}
-
-
-/* States of the RFC2047 encoder */
-enum {
-  ST_SPACE,   /* waiting for non-quoted whitespace */
-  ST_WORD,    /* waiting for non-quoted word */
-  ST_QUOTED,  /* waiting for quoted word */
-  ST_QUOTED_SPACE, /* waiting for quoted whitespace */
-};
-
 /**
    Encode a header according to RFC 2047
    
@@ -630,149 +223,42 @@
 */
 int
 rfc2047_encode (const char *charset, const char *encoding,
-               const char *text, char ** result)
+               const char *text, char **result)
 {
-  rfc2047_encoder enc;
+  stream_t input_stream;
+  stream_t output_stream;
+  int nbytes = 0;
+  char encoding_char = '\0';
+
+  if (charset == NULL || encoding == NULL || text == NULL)
+    return MU_ERR_BAD_2047_INPUT;
+
+  if (strcasecmp (encoding, "base64") == 0)
+    encoding_char = 'B';
+  else if (strcasecmp (encoding, "quoted-printable") == 0)
+    encoding_char = 'Q';
+  else
+    return MU_ERR_BAD_2047_INPUT;
+
+  memory_stream_create (&input_stream, 0, 0);
+  stream_sequential_write (input_stream, text, strlen (text));
+
+  filter_create (&output_stream, input_stream, encoding, MU_FILTER_ENCODE,
+                MU_STREAM_READ);
+
+  /* Assume strlen(qp_encoded_text) <= strlen(text) * 3 */
+  /* malloced length = strlen("=?" + charset + "?" + [BQ] + "?" + encoded_text 
+ "?=") */
+  *result =
+    (char *) malloc (2 + strlen (charset) + 3 + (strlen (text) * 3) + 2);
+  sprintf (*result, "=?%s?%c?", charset, encoding_char);
 
-  int is_compose;
-  int state;
+  stream_sequential_read (output_stream, *result + strlen (*result),
+                         strlen (text) * 3, &nbytes);
 
-  if (!charset || !encoding || !text)
-    return EINVAL;
-  if (!result)
-    return MU_ERR_OUT_PTR_NULL;
-
-  /* Check for a known encoding */
-  do 
-    {
-      if (strcasecmp (encoding, "base64") == 0) 
-       {
-         base64_init (& enc);
-         enc.encoding = 'B';
-         enc.next  = base64_next;
-         enc.count = base64_count;
-         enc.flush = base64_flush;
-         break;
-       }
-      
-      if (strcasecmp (encoding, "quoted-printable") == 0) 
-       {
-         qp_init (& enc);
-         enc.encoding = 'Q';
-         enc.next  = qp_next;
-         enc.count = qp_count;
-         enc.flush = qp_flush;
-         break;
-       }
-      
-      return MU_ERR_NOENT;
-    } 
-  while (0);
-
-  /* Check for a known charset */
-  do
-    {
-      if (strcasecmp (charset, "utf-8") == 0)
-       {
-         enc.charcount = ce_utf_8;
-         break;
-       }
-      
-      enc.charcount = ce_single_byte;
-    }
-  while (0);
-
-  enc.dst = NULL;
-  enc.charset = charset;
-
-  /* proceed in two passes: estimate the required space, then fill */
-  for (is_compose = 0 ; is_compose <= 1 ; is_compose ++) 
-    {
-      state = ST_SPACE;
-
-      enc.src  = text;
-      enc.todo = strlen (text);
-      enc.done = 0;
-
-      while (enc.todo) 
-       {
-         
-         switch (state) 
-           {
-           case ST_SPACE:
-             if (isspace (* enc.src)) 
-               {
-                 insert_unquoted (& enc);
-                 break;
-               }
-
-             if (is_next_quoted (enc.src)) 
-               {
-                 init_quoted (& enc);
-                 state = ST_QUOTED;
-               }
-             else 
-               {
-                 state = ST_WORD;
-               }
-             break;
-
-           case ST_WORD:
-             if (isspace (* enc.src)) 
-               {
-                 state = ST_SPACE;
-                 break;
-               }
-
-             insert_unquoted (& enc);
-             break;
-             
-           case ST_QUOTED:
-             if (isspace (* enc.src))
-               {
-                 if (is_next_quoted (enc.src))
-                   {
-                     state = ST_QUOTED_SPACE;
-                   }
-                 else
-                   {
-                     flush_quoted (& enc);
-                     state = ST_SPACE;
-                   }
-                 break;
-               }
-
-             insert_quoted (& enc);
-             break;
-             
-           case ST_QUOTED_SPACE:
-             if (! isspace (* enc.src))
-               {
-                 state = ST_QUOTED;
-                 break;
-               }
-
-             insert_quoted (& enc);
-             break;
-           }
-       }
-
-      if (state == ST_QUOTED ||
-         state == ST_QUOTED_SPACE)
-       {
-         flush_quoted (& enc);
-       }
-
-      if (enc.dst == NULL) 
-       {
-         enc.dst = malloc (enc.done + 1);
-         if (enc.dst == NULL) return -ENOMEM;
-         enc.result = enc.dst;
-       }
-    }
+  strcpy (*result + 2 + strlen (charset) + 3 + nbytes, "?=");
 
-  * (enc.dst) = '\0';
-  * result = enc.result;
+  stream_destroy (&input_stream, NULL);
+  stream_destroy (&output_stream, NULL);
 
   return 0;
 }

[Prev in Thread]

Current Thread

[Next in Thread]

[bug-mailutils] [PATCH] new RFC 2047 encoder, 이기동\(Kidong Lee\) <=

Prev by Date: Re: [bug-mailutils] sieve & quota
Previous by thread: [bug-mailutils] qp encoding
Index(es):
- Date
- Thread