[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[SCM] GNU Mailutils branch, master, updated. release-2.2-682-g24eb3d6
From: |
Sergey Poznyakoff |
Subject: |
[SCM] GNU Mailutils branch, master, updated. release-2.2-682-g24eb3d6 |
Date: |
Mon, 07 Jul 2014 12:44:34 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Mailutils".
http://git.savannah.gnu.org/cgit/mailutils.git/commit/?id=24eb3d6df145f2e301a2484551a395edfd9aa8e0
The branch, master has been updated
via 24eb3d6df145f2e301a2484551a395edfd9aa8e0 (commit)
via ebfd2897f2230f527d1bfdaa507674268dc513e6 (commit)
from 0e406bd797b22703d208a396b2a870a16ae4cf0f (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 24eb3d6df145f2e301a2484551a395edfd9aa8e0
Author: Sergey Poznyakoff <address@hidden>
Date: Mon Jul 7 15:43:37 2014 +0300
New example: mboxidx
* examples/mboxidx.c: New file
* examples/Makefile.am: Add mboxidx
commit ebfd2897f2230f527d1bfdaa507674268dc513e6
Author: Sergey Poznyakoff <address@hidden>
Date: Mon Jul 7 15:41:12 2014 +0300
Fix memory overrun in iconv filter.
* libmailutils/filter/iconvflt.c (format_octal): New function.
(_icvt_decoder): Fix size calculations before returning
mu_filter_moreoutput.
Use format_octal instead of sprintf to avoid memory overwrite with the
terminating nul characteter.
-----------------------------------------------------------------------
Summary of changes:
examples/mboxidx.c | 532 ++++++++++++++++++++++++++++++++++++++++
libmailutils/filter/iconvflt.c | 17 ++-
libmailutils/filter/xml.c | 4 +-
3 files changed, 549 insertions(+), 4 deletions(-)
create mode 100644 examples/mboxidx.c
diff --git a/examples/mboxidx.c b/examples/mboxidx.c
new file mode 100644
index 0000000..b06f68e
--- /dev/null
+++ b/examples/mboxidx.c
@@ -0,0 +1,532 @@
+/* Convert mailbox to a Sphinx XML input.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+
+ GNU Mailutils is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GNU Mailutils is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GNU Mailutils. If not, see <http://www.gnu.org/licenses/>. */
+
+/* This program takes a mailbox in arbitrary format as its only argument
+ and produces on standard output an XML stream suitable for use with
+ Sphinx for indexing the mailbox content. Example usage in sphinx.conf:
+
+ source mbox_source
+ {
+ type = xmlpipe2
+ xmlpipe_command = /usr/local/bin/mboxidx /var/spool/archive/mbox
+ }
+
+ index mbox_idx
+ {
+ source = mbox_source
+ docinfo = extern
+ charset_type = utf-8
+ ...
+ }
+*/
+
+#include <config.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <mailutils/mailutils.h>
+#include <fnmatch.h>
+
+mu_list_t typelist;
+mu_list_t fldlist;
+int log_to_stderr = 1;
+char *output_charset = "utf-8";
+int no_out_if_empty = 0;
+
+char *docstr[] = {
+ "usage: mboxidx [OPTIONS] MBOX",
+ "converts MBOX into a xmlpipe2 stream for Sphinx indexing engine",
+ "OPTIONS are:",
+ "",
+ " -c CHARSET set output charset",
+ " -d SPEC set debug verbosity",
+ " -f FIELD index header FIELD",
+ " -F FACILITY set syslog facility",
+ " -L TAG set syslog tag",
+ " -N no output if nothing to index",
+ " -S N skip first N messages",
+ " -s FILE save and read status of the prior run from FILE\n",
+ " -t GLOB index message body if its content type matches GLOB",
+ " -h display this help summary",
+ NULL
+};
+
+void
+help (int c)
+{
+ int i;
+ FILE *fp = c ? stderr : stdout;
+
+ for (i = 0; docstr[i]; i++)
+ fprintf (fp, "%s\n", docstr[i]);
+
+ exit (c);
+}
+
+
+static int
+matchstr (void const *item, void const *data)
+{
+ char const *pat = item;
+ char const *str = data;
+
+ return fnmatch (pat, str, 0);
+}
+
+struct field
+{
+ char *name;
+ int ishdr;
+};
+
+static void
+fldadd (char *name, int ishdr)
+{
+ struct field *f = mu_alloc (sizeof (*f));
+ f->name = mu_strdup (name);
+ f->ishdr = ishdr;
+ mu_list_append (fldlist, f);
+}
+
+static int
+get_hdr_value (mu_header_t hdr, const char *name, char **value)
+{
+ int status = mu_header_aget_value_unfold (hdr, name, value);
+ if (status == 0)
+ mu_rtrim_class (*value, MU_CTYPE_SPACE);
+ return status;
+}
+
+
+static int
+get_content_encoding (mu_header_t hdr, char **value)
+{
+ char *encoding = NULL;
+ if (get_hdr_value (hdr, MU_HEADER_CONTENT_TRANSFER_ENCODING, &encoding))
+ encoding = mu_strdup ("7bit"); /* Default. */
+ *value = encoding;
+ return 0;
+}
+
+static int
+set_charset_filter (mu_stream_t * str, char *input_charset)
+{
+ int rc;
+ char const *args[4];
+
+ mu_stream_t b_stream = *str;
+
+ args[0] = "iconv";
+ args[1] = input_charset;
+ args[2] = output_charset;
+ args[3] = "copy-octal";
+ args[4] = NULL;
+
+ rc = mu_filter_create_args (str, b_stream,
+ "iconv",
+ 4, args, MU_FILTER_DECODE, MU_STREAM_READ);
+
+ if (rc == 0)
+ mu_stream_unref (b_stream);
+ return rc;
+}
+
+
+static void
+formatbody (mu_message_t msg, int delim)
+{
+ int rc;
+ char *type;
+ char *input_charset = NULL;
+ int ismime = 0;
+ mu_message_t msgpart;
+ mu_header_t hdr = NULL;
+
+ mu_message_get_header (msg, &hdr);
+ if (get_hdr_value (hdr, MU_HEADER_CONTENT_TYPE, &type))
+ type = mu_strdup ("text/plain");
+ else
+ {
+ struct mu_wordsplit ws;
+
+ mu_strlower (type);
+
+ ws.ws_delim = ";";
+ if (mu_wordsplit (type, &ws,
+ MU_WRDSF_DELIM | MU_WRDSF_WS
+ | MU_WRDSF_NOVAR | MU_WRDSF_NOCMD))
+ {
+ mu_error ("can't split content type \"%s\": %s",
+ type, mu_wordsplit_strerror (&ws));
+ }
+ else
+ {
+ int i;
+
+ type = ws.ws_wordv[0];
+ ws.ws_wordv[0] = NULL;
+ for (i = 1; i < ws.ws_wordc; i++)
+ {
+ if (strncasecmp (ws.ws_wordv[i], "charset=", 8) == 0)
+ {
+ input_charset = mu_strdup (ws.ws_wordv[i] + 8);
+ break;
+ }
+ }
+ mu_wordsplit_free (&ws);
+ }
+ }
+
+ mu_message_is_multipart (msg, &ismime);
+ if (ismime)
+ {
+ size_t i, nparts;
+
+ rc = mu_message_get_num_parts (msg, &nparts);
+ if (rc)
+ mu_diag_funcall (MU_DIAG_ERROR, "mu_message_get_num_parts", NULL, rc);
+ else
+ for (i = 1; i <= nparts; i++)
+ {
+ if (mu_message_get_part (msg, i, &msgpart) == 0)
+ formatbody (msgpart, 1);
+ }
+ }
+ else if (mu_c_strncasecmp (type, "message/rfc822", strlen (type)) == 0)
+ {
+ rc = mu_message_unencapsulate (msg, &msgpart, NULL);
+ if (rc)
+ mu_diag_funcall (MU_DIAG_ERROR, "mu_message_unencapsulate", NULL, rc);
+ else
+ formatbody (msgpart, 1);
+ }
+ else if (mu_list_locate (typelist, type, NULL) == 0)
+ {
+ char *encoding;
+ mu_body_t body = NULL;
+ mu_stream_t b_stream = NULL;
+ mu_stream_t d_stream = NULL;
+ mu_stream_t stream = NULL;
+
+ get_content_encoding (hdr, &encoding);
+
+ mu_message_get_body (msg, &body);
+ mu_body_get_streamref (body, &b_stream);
+ if (mu_filter_create (&d_stream, b_stream, encoding,
+ MU_FILTER_DECODE, MU_STREAM_READ) == 0)
+ {
+ mu_stream_unref (b_stream);
+ stream = d_stream;
+ }
+ else
+ stream = b_stream;
+
+ if (!input_charset || set_charset_filter (&stream, input_charset))
+ set_charset_filter (&stream, "US-ASCII");
+
+ b_stream = stream;
+ if (mu_filter_create (&stream, b_stream, "xml",
+ MU_FILTER_ENCODE, MU_STREAM_READ) == 0)
+ mu_stream_unref (b_stream);
+
+ if (delim)
+ mu_printf ("\n");
+ mu_stream_copy (mu_strout, stream, 0, NULL);
+
+ mu_stream_unref (stream);
+ free (encoding);
+ }
+ free (type);
+}
+
+static int
+fldfmt (void *item, void *data)
+{
+ struct field *f = item;
+ mu_message_t msg = data;
+ int rc;
+
+ mu_printf (" <%s>", f->name);
+ if (f->ishdr)
+ {
+ mu_header_t hdr;
+ char *hfield;
+
+ mu_message_get_header (msg, &hdr);
+ rc = mu_header_aget_value_unfold (hdr, f->name, &hfield);
+ if (rc == 0)
+ {
+ char *tmp;
+
+ rc = mu_rfc2047_decode (output_charset, hfield, &tmp);
+ if (rc)
+ mu_stream_write (mu_strout, hfield, strlen (hfield), NULL);
+ else
+ {
+ mu_stream_write (mu_strout, tmp, strlen (tmp), NULL);
+ free (tmp);
+ }
+ }
+ }
+ else
+ formatbody (msg, 0);
+
+ mu_printf ("</%s>\n", f->name);
+ return 0;
+}
+
+static int
+fldout (void *item, void *data)
+{
+ struct field *f = item;
+ mu_printf (" <sphinx:field name=\"%s\"/>\n", f->name);
+ return 0;
+}
+
+void
+xmlpipe2_header ()
+{
+ mu_printf ("<?xml version=\"1.0\"?>\n");
+ mu_printf ("<sphinx:docset>\n");
+ mu_printf (" <sphinx:schema>\n");
+ mu_list_foreach (fldlist, fldout, NULL);
+ mu_printf (" </sphinx:schema>\n");
+}
+
+void
+xmlpipe2_footer ()
+{
+ mu_printf ("</sphinx:docset>\n");
+}
+
+size_t skip_count;
+size_t nmesg;
+
+void
+read_state_file (const char *name)
+{
+ FILE *fp;
+ size_t count;
+ int c;
+
+ if (access (name, F_OK))
+ {
+ if (errno == ENOENT)
+ return;
+ mu_error ("can't access state file \"%s\": %s",
+ name, mu_strerror (errno));
+ exit (1);
+ }
+
+ fp = fopen (name, "r");
+ if (!fp)
+ {
+ mu_error ("can't open state file \"%s\" for reading: %s",
+ name, mu_strerror (errno));
+ exit (1);
+ }
+
+ count = 0;
+ while ((c = fgetc (fp)) != EOF && c >= '0' && c <= '9')
+ {
+ size_t n = count * 10 + c - '0';
+ if (n < count)
+ {
+ mu_error ("%s: message number too big", name);
+ exit (1);
+ }
+ count = n;
+ }
+ fclose (fp);
+
+ if (c != EOF && c != '\n')
+ {
+ mu_error ("%s: malformed state file", name);
+ exit (1);
+ }
+
+ skip_count = count;
+}
+
+void
+write_state_file (const char *name)
+{
+ FILE *fp;
+
+ fp = fopen (name, "w");
+ if (!fp)
+ {
+ mu_error ("can't open state file \"%s\" for writing: %s",
+ name, mu_strerror (errno));
+ exit (1);
+ }
+
+ fprintf (fp, "%lu\n", (unsigned long) nmesg);
+
+ fclose (fp);
+}
+
+static int
+action (mu_observer_t o, size_t type, void *data, void *action_data)
+{
+ mu_mailbox_t mbox = mu_observer_get_owner (o);
+ mu_message_t msg = NULL;
+
+ switch (type)
+ {
+ case MU_EVT_MESSAGE_ADD:
+ ++nmesg;
+ if (nmesg < skip_count)
+ break;
+ if (no_out_if_empty && nmesg == skip_count)
+ xmlpipe2_header ();
+
+ MU_ASSERT (mu_mailbox_get_message (mbox, nmesg, &msg));
+ mu_printf (" <sphinx:document id=\"%lu\">\n", (unsigned long) nmesg);
+ mu_list_foreach (fldlist, fldfmt, msg);
+ mu_printf (" </sphinx:document>\n");
+ break;
+ case MU_EVT_MAILBOX_PROGRESS:
+ /* Noop. */
+ break;
+ }
+ return 0;
+}
+
+int
+main (int argc, char **argv)
+{
+ int rc;
+ mu_mailbox_t mbox;
+ mu_observer_t observer;
+ mu_observable_t observable;
+ size_t total;
+ char *p;
+ char *state_file = NULL;
+
+ mu_set_program_name (argv[0]);
+ mu_register_all_mbox_formats ();
+ mu_stdstream_setup (MU_STDSTREAM_RESET_NONE);
+
+ MU_ASSERT (mu_list_create (&fldlist));
+
+ MU_ASSERT (mu_list_create (&typelist));
+ mu_list_set_comparator (typelist, matchstr);
+ mu_list_append (typelist, mu_strdup ("text/*"));
+
+ while ((rc = getopt (argc, argv, "c:d:f:F:hL:NS:s:t:")) != EOF)
+ {
+ switch (rc)
+ {
+ case 'c':
+ output_charset = optarg;
+ break;
+ case 'd':
+ mu_debug_parse_spec (optarg);
+ break;
+ case 'F':
+ if (mu_string_to_syslog_facility (optarg, &mu_log_facility))
+ {
+ mu_error ("unknown facility: %s", optarg);
+ exit (1);
+ }
+ log_to_stderr = 0;
+ break;
+ case 'f':
+ fldadd (optarg, 1);
+ break;
+ case 'L':
+ mu_log_tag = optarg;
+ break;
+ case 'N':
+ no_out_if_empty = 1;
+ break;
+ case 'S':
+ skip_count = strtoul (optarg, &p, 10);
+ if (*p)
+ {
+ mu_error ("invalid number: %s", optarg);
+ exit (1);
+ }
+ break;
+ case 's':
+ state_file = optarg;
+ break;
+ case 't':
+ p = mu_strdup (optarg);
+ mu_strlower (p);
+ mu_list_append (typelist, p);
+ break;
+ case 'h':
+ help (0);
+ default:
+ exit (1);
+ }
+ }
+ fldadd ("content", 0);
+
+ if (state_file)
+ read_state_file (state_file);
+
+ skip_count++;
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 1)
+ help (1);
+
+ if (log_to_stderr)
+ mu_stdstream_strerr_setup (MU_STRERR_STDERR);
+ else
+ {
+ if (!mu_log_tag)
+ mu_log_tag = (char *) mu_program_name;
+ mu_stdstream_strerr_setup (MU_STRERR_SYSLOG);
+ }
+
+ rc = mu_mailbox_create_default (&mbox, argv[0]);
+ if (rc)
+ {
+ mu_error ("mu_mailbox_create: %s", mu_strerror (rc));
+ exit (EXIT_FAILURE);
+ }
+
+ rc = mu_mailbox_open (mbox, MU_STREAM_READ);
+ if (rc)
+ {
+ mu_error ("mu_mailbox_open: %s", mu_strerror (rc));
+ exit (EXIT_FAILURE);
+ }
+
+ mu_observer_create (&observer, mbox);
+ mu_observer_set_action (observer, action, mbox);
+ mu_observer_set_action_data (observer, NULL, mbox);
+ mu_mailbox_get_observable (mbox, &observable);
+ mu_observable_attach (observable, MU_EVT_MESSAGE_ADD, observer);
+
+ if (!no_out_if_empty)
+ xmlpipe2_header ();
+ MU_ASSERT (mu_mailbox_scan (mbox, 1, &total));
+ if (!no_out_if_empty || nmesg >= skip_count)
+ xmlpipe2_footer ();
+ mu_mailbox_close (mbox);
+ mu_mailbox_destroy (&mbox);
+
+ if (state_file)
+ write_state_file (state_file);
+
+ return EXIT_SUCCESS;
+}
diff --git a/libmailutils/filter/iconvflt.c b/libmailutils/filter/iconvflt.c
index a227dc4..1d0f3db 100644
--- a/libmailutils/filter/iconvflt.c
+++ b/libmailutils/filter/iconvflt.c
@@ -76,6 +76,19 @@ struct _icvt_filter
iconv_t cd; /* Conversion descriptor */
};
+static void
+format_octal (char *op, unsigned char n)
+{
+ op += 4;
+ *--op = n % 8;
+ n >> 3;
+ *--op = n % 8;
+ n >> 3;
+ *--op = n % 8;
+ n >> 3;
+ *--op = '\\';
+}
+
static enum mu_filter_result
_icvt_decoder (void *xd,
enum mu_filter_command cmd,
@@ -169,10 +182,10 @@ _icvt_decoder (void *xd,
{
if (olen < 4)
{
- iobuf->osize = 4;
+ iobuf->osize += 4;
return mu_filter_moreoutput;
}
- sprintf (op, "\\%03o", *(unsigned char*)ip);
+ format_octal (op, *(unsigned char*)ip);
op += 4;
olen -= 4;
ip++;
diff --git a/libmailutils/filter/xml.c b/libmailutils/filter/xml.c
index f8bf8b2..e1f1b54 100644
--- a/libmailutils/filter/xml.c
+++ b/libmailutils/filter/xml.c
@@ -27,8 +27,8 @@
/* In the "encode" mode, the "xml" filter takes a stream of UTF-8
characters as its input, converts the three XML entities as per
table below and replaces invalid characters (see
- http://www.w3.org/TR/xml/#dt-charref) with their numeric character
- reference representation as per http://www.w3.org/TR/xml/#charencoding.
+ http://www.w3.org/TR/xml/#charsets) with their numeric character
+ reference representation as per http://www.w3.org/TR/xml/#dt-charref.
In decode mode, the filter does the reverse.
hooks/post-receive
--
GNU Mailutils
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [SCM] GNU Mailutils branch, master, updated. release-2.2-682-g24eb3d6,
Sergey Poznyakoff <=