Re: [Pan-devel] potential memory improvement

pan-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Pan-devel] potential memory improvement

From:	Charles Kerr
Subject:	Re: [Pan-devel] potential memory improvement
Date:	Sun, 13 May 2007 01:18:31 -0500
User-agent:	Thunderbird 1.5.0.10 (X11/20070302)

address@hidden wrote:

I'm running on x64.


Attached is a patch I was working on last weekend that makes a serious
dent in memory use on the x64 architecture.  The improvements aren't
as significant on 32-bit machines, but it does speed up loading groups
slightly.

This patch is for svn r280.

Charles

Index: pan/gui/gui.cc
===================================================================
--- pan/gui/gui.cc      (revision 278)
+++ pan/gui/gui.cc      (working copy)
@@ -1389,22 +1389,22 @@
     char * date = date_maker.get_date_string (a->time_posted);
 
     // article parts
-    std::vector<int> missing_parts;
-    int i = 0;
-    foreach_const (Article::parts_t, a->parts, it) {
-      ++i;
-      if (it->empty())
-        missing_parts.push_back (i);
-    }
+    typedef Parts::number_t number_t;
+    std::set<number_t> missing_parts;
+    const number_t n_parts = a->get_total_part_count ();
+    for (number_t i=1; i<=n_parts; ++i)
+      missing_parts.insert (i);
+    for (Article::part_iterator it(a->pbegin()), e(a->pend()); it!=e; ++it)
+      missing_parts.erase (it.number());
     char msg[512];
     *msg = '\0';
     std::ostringstream s;
-    if (i > 1) {
+    if (n_parts > 1) {
       if (missing_parts.empty())
-        g_snprintf (msg, sizeof(msg), _("This article has all %d parts."), i);
+        g_snprintf (msg, sizeof(msg), _("This article has all %d parts."), 
(int)n_parts);
       else
-        g_snprintf (msg, sizeof(msg), _("This article is missing %d of its %d 
parts:"), (int)missing_parts.size(), i);
-      foreach_const (std::vector<int>, missing_parts, it)
+        g_snprintf (msg, sizeof(msg), _("This article is missing %d of its %d 
parts:"), (int)missing_parts.size(), (int)n_parts);
+      foreach_const (std::set<number_t>, missing_parts, it)
         s << ' ' << *it;
     }
 
Index: pan/gui/server-ui.cc
===================================================================
--- pan/gui/server-ui.cc        (revision 278)
+++ pan/gui/server-ui.cc        (working copy)
@@ -253,7 +253,7 @@
     HIG::workarea_add_section_spacer (t, row, 2);
 
     const int DEFAULT_MAX_PER_SERVER (4);
-    a = GTK_ADJUSTMENT (gtk_adjustment_new (1.0, 1.0, DEFAULT_MAX_PER_SERVER, 
1.0, 1.0, 1.0));
+    a = GTK_ADJUSTMENT (gtk_adjustment_new (1.0, 0.0, DEFAULT_MAX_PER_SERVER, 
1.0, 1.0, 1.0));
     d->connection_limit_spin = w = gtk_spin_button_new (GTK_ADJUSTMENT(a), 
1.0, 0u);
     HIG::workarea_add_row (t, &row, _("Connection _Limit:"), w, NULL);
 
Index: pan/tasks/nzb-test.cc
===================================================================
--- pan/tasks/nzb-test.cc       (revision 278)
+++ pan/tasks/nzb-test.cc       (working copy)
@@ -70,12 +70,16 @@
   const Article a (dynamic_cast<TaskArticle*>(tasks[0])->get_article());
   check (a.author == "Joe Bloggs <address@hidden>")
   check (a.subject == "Here's your file!  abc-mr2a.r01 (1/2)")
-  check (a.parts.size() == 2)
+  check (a.get_total_part_count() == 2)
+  check (a.get_found_part_count() == 2)
   check (a.message_id == "<address@hidden>")
   check (a.time_posted == 1071674882)
   check (a.xref.size() == 3)
-  check (a.parts[0].bytes == 102394)
-  check (a.parts[0].get_message_id(a.message_id) == "<address@hidden>")
+  std::string part_mid;
+  Parts::bytes_t part_bytes;
+  check (a.get_part_info (1u, part_mid, part_bytes))
+  check (part_bytes == 102394)
+  check (part_mid == "<address@hidden>")
   Quark group;
   unsigned long number;
   check (a.xref.find ("cox", group, number))
@@ -84,7 +88,8 @@
   check (a.xref.find ("giganews", group, number))
   check (group=="alt.binaries.newzbin" || group=="alt.binaries.mojo")
   check (number==0)
-  check (a.parts[1].bytes == 4501)
+  check (a.get_part_info (2, part_mid, part_bytes))
+  check (part_bytes == 4501)
 
   std::ostringstream out_stream;
   NZB :: nzb_to_xml (out_stream, tasks);
Index: pan/tasks/nzb.cc
===================================================================
--- pan/tasks/nzb.cc    (revision 278)
+++ pan/tasks/nzb.cc    (working copy)
@@ -40,7 +40,6 @@
 namespace
 {
   typedef std::vector<Task*> tasks_t;
-  typedef std::map<unsigned int,Article::Part> number_to_part_t;
 
   struct MyContext
   {
@@ -48,7 +47,7 @@
     std::string text;
     std::string path;
     Article a;
-    number_to_part_t parts;
+    PartBatch parts;
     tasks_t tasks;
     ArticleCache& cache;
     ArticleRead& read;
@@ -66,7 +65,6 @@
       groups.clear ();
       text.clear ();
       path.clear ();
-      parts.clear ();
       a.clear ();
       bytes = 0;
       number = 0;
@@ -115,22 +113,19 @@
 
     else if (!strcmp(element_name, "segment") && mc.number && 
!mc.text.empty()) {
       const std::string mid ("<" + mc.text + ">");
-      if (mc.a.message_id.empty())
-          mc.a.message_id = mid;
-      Article::Part& part (mc.parts[mc.number]);
-      part.bytes = mc.bytes;
-      part.set_message_id (mc.a.message_id, mid);
+      if (mc.a.message_id.empty()) {
+        mc.a.message_id = mid;
+        mc.parts.init (mid);
+      }
+      mc.parts.add_part (mc.number, mid, mc.bytes);
     }
 
     else if (!strcmp(element_name,"path"))
       mc.path = mc.text;
 
-    else if (!mc.parts.empty() && !strcmp (element_name, "file"))
+    else if (!strcmp (element_name, "file"))
     {
-      // populate mc.a.parts
-      mc.a.set_part_count (mc.parts.rbegin()->first);
-      foreach (number_to_part_t, mc.parts, it)
-        mc.a.get_part(it->first).swap (it->second);
+      mc.a.set_parts (mc.parts);
 
       foreach_const (quarks_t, mc.groups, git) {
         quarks_t servers;
@@ -265,17 +260,11 @@
 
     // now for the parts...
     out << indent(depth++) << "<segments>\n";
-    int part_number (0);
-    foreach_const (Article::parts_t, a.parts, pit)
+    for (Article::part_iterator it(a.pbegin()), end(a.pend()); it!=end; ++it)
     {
-      ++part_number;
+      std::string mid = it.mid ();
 
-      // incomplete multipart...
-      if (pit->empty())
-        continue;
-
       // remove the surrounding < > as per nzb spec
-      std::string mid (pit->get_message_id (a.message_id));
       if (mid.size()>=2 && mid[0]=='<') {
         mid.erase (0, 1);
         mid.resize (mid.size()-1);
@@ -283,8 +272,8 @@
 
       // serialize this part
       out << indent(depth)
-          << "<segment" << " bytes=\"" << pit->bytes << '"'
-                        << " number=\"" << part_number << '"'
+          << "<segment" << " bytes=\"" << it.bytes() << '"'
+                        << " number=\"" << it.number() << '"'
                         << ">";
       escaped(out, mid);
       out  << "</segment>\n";
Index: pan/tasks/task-article.cc
===================================================================
--- pan/tasks/task-article.cc   (revision 278)
+++ pan/tasks/task-article.cc   (working copy)
@@ -66,7 +66,8 @@
                             Progress::Listener        * listener,
                             SaveMode                    save_mode,
                             const Quark               & save_path):
-  Task (save_path.empty() ? "BODIES" : "SAVE", get_description (article, 
!save_path.empty())),
+  Task (save_path.empty() ? "BODIES" : "SAVE",
+        get_description (article, !save_path.empty())),
   _save_path (save_path),
   _server_rank (server_rank),
   _cache (cache),
@@ -96,27 +97,24 @@
   }
 
   unsigned long need_bytes(0), all_bytes(0);
-  foreach_const (Article::parts_t, article.parts, it)
+  for (Article::part_iterator i(article.pbegin()), e(article.pend()); i!=e; 
++i)
   {
-    all_bytes += it->bytes;
+    all_bytes += i.bytes();
+    const std::string mid (i.mid ());
+    if (cache.contains (mid))
+      continue;
 
-    const std::string mid (it->get_message_id (article.message_id));
-    if (!it->empty() && !cache.contains (mid))
-    {
-      need_bytes += it->bytes;
-
-      Needed n;
-      n.part = *it;
-
-      // if we can keep the article-number from the main xref, do so.
-      // otherwise plug in `0' as a null article-number and we'll use
-      // `ARTICLE message-id' instead when talking to the server.
-      foreach_const (quarks_t, servers, sit)
-        foreach_const (quarks_t, groups, git)
-          n.xref.insert (*sit, *git, mid==article.message_id.to_string() ? 
article.xref.find_number(*sit,*git) : 0);
-
-      _needed.push_back (n);
-    }
+    need_bytes += i.bytes();
+    Needed n;
+    n.message_id = mid;
+    n.bytes = i.bytes();
+    // if we can keep the article-number from the main xref, do so.
+    // otherwise plug in `0' as a null article-number and we'll use
+    // `ARTICLE message-id' instead when talking to the server.
+    foreach_const (quarks_t, servers, sit)
+      foreach_const (quarks_t, groups, git)
+        n.xref.insert (*sit, *git, mid==article.message_id.to_string() ? 
article.xref.find_number(*sit,*git) : 0);
+    _needed.push_back (n);
   }
 
   // initialize our progress status...
@@ -180,7 +178,7 @@
 {
   unsigned long bytes (0);
   foreach_const (needed_t, _needed, it) // parts not fetched yet...
-    bytes += (it->part.bytes - it->buf.size());
+    bytes += (it->bytes - it->buf.size());
   return bytes;
 }
 
@@ -215,7 +213,7 @@
     if (number)
       nntp->article (group, number, this);
     else
-      nntp->article (group, 
needed->part.get_message_id(_article.message_id).c_str(), this);
+      nntp->article (group, needed->message_id.c_str(), this);
     update_work ();
   }
 }
@@ -263,11 +261,11 @@
 
   if (health == OK) { // if download succeeded, save it in the cache
     const StringView view (&it->buf.front(), it->buf.size());
-    if (!_cache.add (it->part.get_message_id(_article.message_id), view))
+    if (!_cache.add (it->message_id, view))
       health = ERR_LOCAL;
   }
 
-  // std::cerr << LINE_ID << ' ' << 
it->part.get_message_id(_article.message_id) << " from " << nntp->_server << ": 
health " << health << std::endl;
+  // std::cerr << LINE_ID << ' ' << it->message_id << " from " << 
nntp->_server << ": health " << health << std::endl;
 
   switch (health)
   {
@@ -289,7 +287,7 @@
         Log :: add_err_va (
           _("Article \"%s\" is incomplete -- the news server(s) don't have 
part %s"),
           _article.subject.c_str(),
-          it->part.get_message_id(_article.message_id).c_str());
+          it->message_id.c_str());
         _needed.erase (it);
       }
       break;
Index: pan/tasks/task-article.h
===================================================================
--- pan/tasks/task-article.h    (revision 278)
+++ pan/tasks/task-article.h    (working copy)
@@ -100,7 +100,8 @@
 
     private:
       struct Needed {
-        Article::Part part;
+        std::string message_id;
+        unsigned long bytes;
         NNTP * nntp;
         Xref xref;
         typedef std::vector<char> buf_t;
Index: pan/data-impl/xover.cc
===================================================================
--- pan/data-impl/xover.cc      (revision 278)
+++ pan/data-impl/xover.cc      (working copy)
@@ -271,7 +271,7 @@
       const Article* candidate (h->find_article (candidate_mid));
       if (candidate
           && (candidate->author == author)
-          && ((int)candidate->parts.size() == part_count))
+          && ((int)candidate->get_total_part_count() == part_count))
         art_mid = candidate_mid;
     }
   }
@@ -308,19 +308,15 @@
 
   {
     const int number (part_count<2 ? 1 : part_index);
-    Article::Part part;
-    part.bytes = byte_count;
-    part.set_message_id (art_mid, message_id);
-    load_part (group, art_mid, number, line_count, part);
+    load_part (group, art_mid,
+               number, message_id,
+               line_count, byte_count);
   }
 
   if (!workarea._added_batch.count(art_mid))
     workarea._changed_batch.insert(art_mid);
 
-  /**
-  ***  Maybe flush the batched changes
-  **/
-
+  // maybe flush the batched changes
   if ((time(0) - workarea._last_flush_time) >= 10)
     xover_flush (group);
 
Index: pan/data-impl/data-impl.h
===================================================================
--- pan/data-impl/data-impl.h   (revision 278)
+++ pan/data-impl/data-impl.h   (working copy)
@@ -297,7 +297,11 @@
           local GroupHeaders.  As a side-effect, the value of `part'
           after this call is undefined.  This is an ugly interface,
           but it's fast and only called by one client. */
-      void load_part (const Quark& g, const Quark& mid, size_t number, size_t 
lines, Article::Part& part);
+      void load_part (const Quark& g, const Quark& mid,
+                      int number,
+                      const StringView& part_mid,
+                      unsigned long lines,
+                      unsigned long bytes);
 
       void load_headers (const DataIO&, const Quark& group);
       void save_headers (DataIO&, const Quark& group) const;
Index: pan/data-impl/headers.cc
===================================================================
--- pan/data-impl/headers.cc    (revision 278)
+++ pan/data-impl/headers.cc    (working copy)
@@ -379,20 +379,17 @@
 void
 DataImpl :: load_part (const Quark          & group,
                        const Quark          & mid,
-                       size_t                 number,
-                       size_t                 lines,
-                       Article::Part        & new_part)
+                       int                    number,
+                       const StringView     & part_mid,
+                       unsigned long          lines,
+                       unsigned long          bytes)
 {
    GroupHeaders * h = get_group_headers (group);
    Article * a (h->find_article (mid));
+   pan_return_if_fail (a != 0);
 
-   pan_return_if_fail (a != NULL);
-
-   Article::Part& old_part (a->get_part (number));
-   if (old_part.empty()) {
-       old_part.swap (new_part);
-       a->lines += lines;
-   }
+   if (a->add_part (number, part_mid, bytes))
+     a->lines += lines;
 }
 
 namespace
@@ -472,6 +469,7 @@
       in->getline (line);
       //const unsigned long article_qty = view_to_ul (line); /* unused */
       const time_t now (time (0));
+      PartBatch part_batch;
       for (;;)
       {
         // look for the beginning of an Article record.
@@ -513,7 +511,7 @@
           if (tok.pop_token(server_tok,':') && tok.pop_token(group_tok,':')) {
             target_it->server = server_tok;
             target_it->group = group_tok.len==1 ? 
xref_lookup[(int)*group_tok.str] : Quark(group_tok);
-            target_it->number = view_to_ul (tok);
+            target_it->number = atoi (tok.str);
             const Server * server (find_server (target_it->server));
             if (server && ((!server->article_expiration_age) || (days_old <= 
server->article_expiration_age)))
               ++target_it;
@@ -534,11 +532,10 @@
           s.ltrim(); s.pop_token (tok); total_part_count = atoi(tok.str);
           s.ltrim(); s.pop_token (tok); found_part_count = atoi(tok.str);
         }
-        s.ltrim(); if (s.pop_token (tok)) a.lines = view_to_ul (tok); // this 
field was added in 0.115
-        if (!expired)
-          a.set_part_count (total_part_count);
+        s.ltrim(); if (s.pop_token (tok)) a.lines = atoi (tok.str); // this 
field was added in 0.115
 
         // found parts...
+        part_batch.init (a.message_id, total_part_count, found_part_count);
         for (int i(0), count(found_part_count); i<count; ++i)
         {
           const bool gotline (in->getline (s));
@@ -548,19 +545,25 @@
             StringView tok;
             s.ltrim ();
             s.pop_token (tok);
-            const unsigned long number (view_to_ul (tok));
-            if (!a.has_part(number)) { // corrupted entry
+            const int number (atoi (tok.str));
+            if (number > total_part_count) { // corrupted entry
               expired = true;
               break;
             }
-            Article::Part& p (a.get_part (number));
+            StringView part_mid;
+            unsigned long part_bytes (0);
             s.ltrim ();
-            s.pop_token (tok);
-            p.set_message_id (a.message_id, (tok.len==1 && *tok.str=='"') ? 
a.message_id.to_view() : tok);
-            s.pop_token(tok); p.bytes = view_to_ul (tok);
-            if (s.pop_token(tok)) a.lines += view_to_ul (tok); // this field 
was removed in 0.115
+            s.pop_token (part_mid);
+            if (part_mid.len==1 && *part_mid.str=='"')
+              part_mid = a.message_id.to_view ();
+            s.pop_token(tok); part_bytes = view_to_ul (tok);
+            part_batch.add_part (number, part_mid, part_bytes);
+
+            if (s.pop_token(tok)) a.lines += atoi (tok.str); // this field was 
removed in 0.115
           }
         }
+        if (!expired)
+          a.set_parts (part_batch);
 
         // add the article to the group if it hasn't all expired
         if (expired)
@@ -737,7 +740,6 @@
     // header section
     *out << articles.size() << endl;
     std::string references;
-    std::string part_mid;
     foreach_const (std::vector<Article*>, articles, ait)
     {
       ++article_count;
@@ -764,32 +766,20 @@
 
       // is_binary [total_part_count found_part_count]
       *out << (a->is_binary ? 't' : 'f');
-      if (a->is_binary) {
-        int foundPartCount (0);
-        foreach_const (Article::parts_t, a->parts, pit)
-          if (!pit->empty())
-            ++foundPartCount;
-        *out << ' ' << a->get_part_count() << ' ' << foundPartCount;
-      }
+      if (a->is_binary)
+        *out << ' ' << a->get_total_part_count() << ' ' << 
a->get_found_part_count();
       *out << ' ' << a->lines << '\n';
 
       // one line per foundPartCount (part-index message-id bytes lines)
-      int number (0);
-      bool first (true);
-      foreach_const (Article::parts_t, a->parts, pit) {
-        ++number;
-        if (!pit->empty()) {
-          ++part_count;
-          const Article::Part& p (*pit);
-          *out << '\t' << number << ' ';
-          p.get_message_id (message_id, part_mid);
-          if (message_id.to_view() == part_mid)
-            *out << '"';
-          else
-            *out << part_mid;
-          *out << ' ' << p.bytes << '\n';
-           first = false;
-        }
+      for (Article::part_iterator pit(a->pbegin()), end(a->pend()); pit!=end; 
++pit) {
+        ++part_count;
+        const std::string& part_mid = pit.mid ();
+        *out << '\t' << pit.number() << ' ';
+        if (message_id.to_view() == part_mid)
+          *out << '"';
+        else
+          *out << part_mid;
+        *out << ' ' << pit.bytes() << '\n';
       }
     }
 
Index: pan/data/parts.h
===================================================================
--- pan/data/parts.h    (revision 0)
+++ pan/data/parts.h    (revision 0)
@@ -0,0 +1,165 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * Pan - A Newsreader for Gtk+
+ * Copyright (C) 2002-2007  Charles Kerr <address@hidden>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __Parts_h__
+#define __Parts_h__
+
+#include <ctime>
+#include <vector>
+#include <stdint.h>
+#include <pan/general/sorted-vector.h>
+#include <pan/general/quark.h>
+#include <pan/data/xref.h>
+
+namespace pan
+{
+  class PartBatch;
+
+  /**
+   * Represents a collection of a multipart post's parts.
+   *
+   * Parts are by far the biggest memory hog in large groups,
+   * so their layout has been tweaked to squeeze out extra bytes.
+   * This makes the code fairly inelegant, so the low-level bits
+   * have been encapsulated in this class.
+   *
+   * @see Article
+   * @see PartBatch
+   * @ingroup data
+   */
+  class Parts
+  {
+    public:
+      typedef uint16_t number_t;
+      typedef uint16_t mid_offset_t;
+      typedef uint32_t bytes_t;
+
+    private:
+      struct Part {
+        mid_offset_t mid_offset;
+        number_t number;
+        bytes_t bytes;
+        bool operator< (const Part& that) const { return number < that.number; 
}
+      };
+      number_t n_parts_found;
+      number_t n_parts_total;
+      bytes_t part_mid_buf_len;
+      Part * parts;
+      char * part_mid_buf;
+      void unpack_message_id (std::string  & setme,
+                              const Part   * p,
+                              const Quark  & reference_mid) const;
+
+    public:
+      class const_iterator {
+          const Quark& reference_mid;
+          const Parts& parts;
+          mutable std::string midbuf;
+          int n;
+          const Part* get_part() const { return parts.parts + n; }
+        public:
+          const_iterator (const Quark& q,
+                          const Parts& p,
+                          int pos=0): reference_mid(q), parts(p), n(pos) {}
+          bool operator== (const const_iterator& that)
+                           const { return &parts==&that.parts && n == that.n; }
+          bool operator!= (const const_iterator& that)
+                           const { return !(*this == that); }
+          const_iterator& operator++() { ++n; return *this; }
+          const_iterator operator++(int)
+                             { const_iterator tmp=*this; ++*this; return tmp; }
+          bytes_t bytes() const { return get_part()->bytes; }
+          number_t number() const { return get_part()->number; }
+          const std::string& mid() const {
+            parts.unpack_message_id (midbuf, get_part(), reference_mid);
+            return midbuf;
+          }
+      };
+
+      const_iterator begin(const Quark& q) const
+                                        { return const_iterator(q,*this,0); }
+      const_iterator end(const Quark& q) const
+                            { return const_iterator(q,*this,n_parts_found); }
+
+    public:
+      Parts ();
+      ~Parts () { clear(); }
+      Parts (const Parts& that);
+      Parts& operator= (const Parts&);
+
+    public:
+      void set_part_count (number_t num) { n_parts_total = num; }
+      number_t get_total_part_count () const { return n_parts_total; }
+      number_t get_found_part_count () const { return n_parts_found; }
+      void clear ();
+      bool add_part (number_t num,
+                     const StringView& mid,
+                     bytes_t bytes,
+                     const Quark& reference_mid);
+      void set_parts (const PartBatch& parts);
+
+    public:
+      bool get_part_info (number_t        num,
+                          std::string   & mid,
+                          bytes_t       & bytes,
+                          const Quark   & reference_mid) const;
+  };
+
+  /**
+   * Batches together parts of a multipart post for
+   * efficient adding to a Parts object.
+   *
+   * @see Parts
+   * @ingroup data
+   */
+  class PartBatch
+  {
+    public:
+      typedef Parts::number_t number_t;
+      typedef Parts::bytes_t bytes_t;
+
+    private:
+
+      friend class Parts;
+      struct Part {
+        number_t number;
+        bytes_t bytes;
+        size_t len_used;
+        size_t len_alloced;
+        char * packed_mid;
+        Part(): number(0), bytes(0),
+                len_used(0), len_alloced(0), packed_mid(0) {}
+        ~Part() { delete [] packed_mid; }
+        Part (const Part&);
+        Part& operator= (const Part&);
+      };
+      Quark reference_mid;
+      number_t n_parts_found;
+      number_t n_parts_total;
+      typedef std::vector<Part> parts_t;
+      parts_t parts;
+      size_t packed_mids_len;
+
+    public:
+      void init (const Quark& mid, number_t n_parts=0, number_t n_found=0);
+      void add_part (number_t num, const StringView& mid, bytes_t bytes);
+  };
+}
+
+#endif
Index: pan/data/article-test.cc
===================================================================
--- pan/data/article-test.cc    (revision 278)
+++ pan/data/article-test.cc    (working copy)
@@ -8,19 +8,22 @@
 
 #define test_message_id(s) \
   in = s; \
-  part.set_message_id (key, in); \
-  out = part.get_message_id (key); \
-  check (in == out)
+  a.add_part (++i, in, 1); \
+  check (a.get_part_info (i, out, bytes)) \
+  check (bytes == 1) \
+  check (out == in)
 
 int
 main (void)
 {
+  Parts::number_t i = 0;
+  Parts::bytes_t bytes;
   Quark key = "<address@hidden>";
   std::string in, out;
 
   Article a;
+  a.message_id = key;
   a.set_part_count (2);
-  Article::Part& part (a.get_part(2));
 
   test_message_id (key.to_string()); // two equal strings
   test_message_id ("<address@hidden>"); // extra in the middle...
Index: pan/data/article.cc
===================================================================
--- pan/data/article.cc (revision 278)
+++ pan/data/article.cc (working copy)
@@ -41,16 +41,13 @@
   else if (!is_line_count_ge(250) && has_reply_leader(subject.to_view()))
     part_state = SINGLE;
 
-  // someone's posted a "000/124" info message
-  else if (parts.empty())
-    part_state = SINGLE;
-
-  // a multipart
-  else {
-    part_state = COMPLETE;
-    for (Article::parts_t::const_iterator it(parts.begin()), end(parts.end()); 
part_state==COMPLETE && it!=end; ++it)
-      if (it->empty())
-        part_state = INCOMPLETE;
+  else  {
+    const Parts::number_t total = parts.get_total_part_count ();
+    const Parts::number_t found = parts.get_found_part_count ();
+    if (!found) // someone's posted a "000/124" info message
+      part_state = SINGLE;
+    else // a multipart..
+      part_state = total==found ? COMPLETE : INCOMPLETE;
   }
 
   return part_state;
@@ -66,25 +63,6 @@
 }
 
 bool
-Article :: is_byte_count_ge (unsigned long test) const
-{
-  unsigned long bytes (0);
-  foreach_const (parts_t, parts, it)
-    if (((bytes += it->bytes)) >= test)
-      return true;
-  return false;
-}
-
-unsigned long
-Article :: get_byte_count () const
-{
-  unsigned long bytes (0);
-  foreach_const (parts_t, parts, it)
-    bytes += it->bytes;
-  return bytes;
-}
-
-bool
 Article :: has_reply_leader (const StringView& s)
 {
   return !s.empty()
@@ -95,166 +73,31 @@
     && s.str[3]==' ';
 }
 
-void
-Article :: set_part_count (unsigned int count)
+unsigned long
+Article :: get_byte_count () const
 {
-  assert (count > 0);
-  parts.resize (count);
+  unsigned long bytes = 0;
+  for (part_iterator it(pbegin()), end(pend()); it!=end; ++it)
+    bytes += it.bytes();
+  return bytes;
 }
 
-Article :: Part&
-Article :: get_part (unsigned int number)
+bool
+Article :: is_byte_count_ge (unsigned long test) const
 {
-  //std::cerr << LINE_ID << " parts.size() " << parts.size() << " number " << 
number << std::endl;
-  const unsigned int index (number - 1);
-  assert (parts.size() > index);
-  return parts[index];
+  unsigned long bytes = 0;
+  for (part_iterator it(pbegin()), end(pend()); it!=end; ++it)
+    if (((bytes += it.bytes())) >= test)
+      return true;
+  return false;
 }
 
-const Article :: Part&
-Article :: get_part (unsigned int number) const
-{
-  //std::cerr << LINE_ID << " parts.size() " << parts.size() << " number " << 
number << std::endl;
-  const unsigned int index (number - 1);
-  assert (parts.size() > index);
-  return parts[index];
-}
-
-/* Message-IDs in multipart articles are usually nearly identical, like this:
-**
-**   <address@hidden>
-**   <address@hidden>
-**   <address@hidden>
-**   <address@hidden>
-**   <address@hidden>
-**   <address@hidden>
-**
-** In large newsgroups, _many_ megs can be saved by stripping out common text.
-** We assign Article::Part's Message-ID by passing in its real Message-ID and
-** a reference key (which currently is always the owner Article's message_id).
-** The identical chars at the beginning (b) and end (e) of the two are counted.
-** b and e have an upper bound of UCHAR_MAX (255).
-** Article::Part::folded_message_id's first byte holds 'b', the second holds 
'e',
-** and the remainder is a zero-terminated string with the unique middle 
characters.
-*/
-
-void
-Article :: Part :: set_message_id (const Quark& key_mid, const StringView& mid)
-{
-  register size_t b=0, e=0;
-  register const char *k, *m;
-  const StringView& key (key_mid.to_view());
-
-  const size_t bmax = std::min (std::min (key.len, mid.len), 
size_t(UCHAR_MAX));
-  k = &key.front();
-  m = &mid.front();
-  for (; b!=bmax; ++b)
-    if (*k++ != *m++)
-      break;
-  
-  const size_t emax = bmax - b;
-  k = &key.back();
-  m = &mid.back();
-  for (; e!=emax; ++e)
-    if (*k-- != *m--)
-      break;
-
-  const size_t n_kept (mid.len - e - b);
-  char *str = g_new (char, n_kept+3); // b + e + '\0'
-  str[0] = (char) b;
-  str[1] = (char) e;
-  memcpy (str+2, mid.str+b, n_kept);
-  str[n_kept+2] = '\0';
-
-  g_free (packed_message_id);
-  packed_message_id = str;
-
-  // check our work
-  //assert (mid == get_message_id(key));
-}
-
-std::string
-Article :: Part :: get_message_id (const Quark& key_mid) const
-{
-  std::string setme;
-  get_message_id (key_mid, setme);
-  return setme;
-}
-
-void
-Article :: Part :: get_message_id (const Quark& key_mid, std::string& setme) 
const
-{
-  setme.clear ();
-
-  if (packed_message_id)
-  {
-    const StringView key (key_mid.to_view());
-    const char * pch (packed_message_id);
-    const int b ((unsigned char) *pch++);
-    const int e ((unsigned char) *pch++);
-    setme.append (key.str, 0, b);
-    setme.append (pch);
-    setme.append (key.str + key.len - e, e);
-  }
-}
-
-
-namespace
-{
-  char* clone_packed_mid (const char * mid)
-  {
-    // 2 to pick up b and e; 1 to pick up the '\0'
-    return mid ? (char*) g_memdup (mid, 2+strlen(mid+2)+1) : 0;
-  }
-}
-
-void
-Article :: Part :: clear ()
-{
-  bytes = 0;
-  g_free (packed_message_id);
-  packed_message_id = 0;
-}
-
-void
-Article :: Part :: swap (Part& that)
-{
-  std::swap (bytes, that.bytes);
-  std::swap (packed_message_id, that.packed_message_id);
-}
-
-Article :: Part :: Part (const Part& that):
-  packed_message_id (clone_packed_mid (that.packed_message_id)),
-  bytes (that.bytes)
-{
-}
-
-Article :: Part&
-Article :: Part :: operator= (const Part& that)
-{
-  bytes = that.bytes;
-  g_free (packed_message_id);
-  packed_message_id = clone_packed_mid (that.packed_message_id);
-  return *this;
-}
-
-/***
-****
-***/
-
 Article :: mid_sequence_t
 Article :: get_part_mids () const
 {
   mid_sequence_t mids;
-  mids.reserve (parts.size());
-
-  const Quark& key (message_id);
-  foreach_const (parts_t, parts, it) {
-    const Part& p (*it);
-    if (!p.empty())
-      mids.push_back (p.get_message_id(key));
-  }
-
+  for (part_iterator it(pbegin()), end(pend()); it!=end; ++it)
+    mids.push_back (it.mid());
   return mids;
 }
 
Index: pan/data/article.h
===================================================================
--- pan/data/article.h  (revision 278)
+++ pan/data/article.h  (working copy)
@@ -20,10 +20,11 @@
 #ifndef __Article_h__
 #define __Article_h__
 
-#include <vector>
 #include <ctime>
+#include <vector>
 #include <pan/general/sorted-vector.h>
 #include <pan/general/quark.h>
+#include <pan/data/parts.h>
 #include <pan/data/xref.h>
 
 namespace pan
@@ -46,74 +47,50 @@
   class Article
   {
     public:
+      void set_parts (const PartBatch& b) { parts.set_parts(b); }
+      bool add_part (Parts::number_t num, const StringView& mid, 
Parts::bytes_t bytes) { return parts.add_part(num,mid,bytes,message_id); }
+      void set_part_count (Parts::number_t num) { parts.set_part_count(num); }
+      Parts::number_t get_total_part_count () const { return 
parts.get_total_part_count(); }
+      Parts::number_t get_found_part_count () const { return 
parts.get_found_part_count(); }
+      bool get_part_info (Parts::number_t      num,
+                          std::string & mid,
+                          Parts::bytes_t     & bytes) const { return 
parts.get_part_info(num,mid,bytes,message_id); }
 
-      /**
-       * The fields of a single part of a multipart Article.
-       * For other fields, refer to the parent Article object.
-       */
-      class Part
-      {
-        private:
-          char * packed_message_id;
+      typedef Parts::const_iterator part_iterator;
+      part_iterator pbegin() const { return parts.begin(message_id); }
+      part_iterator pend() const { return parts.end(message_id); }
 
-        public:
-          unsigned long bytes;
-
-        public:
-          void set_message_id (const Quark& key, const StringView& mid);
-          std::string get_message_id (const Quark& key) const;
-          void get_message_id (const Quark& key, std::string& setme) const;
-
-          Part(): packed_message_id(0), bytes(0) {}
-          ~Part() { clear(); }
-
-          bool empty () const { return !packed_message_id; }
-          void swap (Part&);
-          void clear ();
-
-          Part (const Part&);
-          Part& operator= (const Part&);
-      };
-
-      typedef std::vector<Part> parts_t;
-
-      Part& get_part (unsigned int part_number);
-      const Part& get_part (unsigned int part_number) const;
-      void set_part_count (unsigned int);
       typedef std::vector<Quark> mid_sequence_t;
       mid_sequence_t get_part_mids () const;
+      enum PartState { SINGLE, INCOMPLETE, COMPLETE };
+      PartState get_part_state () const;
 
     public:
+      bool is_binary;
       Quark message_id;
       Quark author;
       Quark subject;
       static bool has_reply_leader (const StringView&);
 
     public:
-      enum PartState { SINGLE, INCOMPLETE, COMPLETE };
-      PartState get_part_state () const;
       unsigned int get_crosspost_count () const;
       unsigned long get_line_count () const { return lines; }
-      unsigned long get_byte_count () const;
       bool is_line_count_ge (size_t test) const { return lines >= test; }
-      bool is_byte_count_ge (unsigned long) const;
-      unsigned int get_part_count () const { return parts.size(); }
-      bool has_part (unsigned int part_number) const
-        { return part_number!=0 && get_part_count() >= part_number; }
+      unsigned long get_byte_count () const;
+      bool is_byte_count_ge (unsigned long test) const;
 
+      unsigned int lines;
+      int score;
       time_t time_posted;
-      unsigned long lines;
       Xref xref;
-      int score;
-      parts_t parts;
 
     public:
-      Article (): time_posted(0), lines(0), score(0), is_binary(false) {}
-      ~Article () {}
+      Article (): is_binary(false), lines(0), score(0), time_posted(0) {}
       void clear ();
 
-    public:
-      bool is_binary;
+    private:
+      Parts parts;
+
   };
 }
 
Index: pan/data/Makefile.am
===================================================================
--- pan/data/Makefile.am        (revision 278)
+++ pan/data/Makefile.am        (working copy)
@@ -8,6 +8,7 @@
  article.cc \
  article-cache.cc \
  data.cc \
+ parts.cc \
  xref.cc
 
 noinst_HEADERS = \
@@ -15,6 +16,7 @@
  article-cache.h \
  data.h \
  defgroup.h \
+ parts.h \
  server-info.h \
  xref.h 
 
Index: pan/data/parts.cc
===================================================================
--- pan/data/parts.cc   (revision 0)
+++ pan/data/parts.cc   (revision 0)
@@ -0,0 +1,362 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * Pan - A Newsreader for Gtk+
+ * Copyright (C) 2002-2007  Charles Kerr <address@hidden>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <config.h>
+#include <cassert>
+#include <algorithm>
+#include <pan/general/debug.h>
+#include <pan/general/foreach.h>
+#include "article.h"
+
+using namespace pan;
+
+#undef DEBUG
+
+/***
+****
+***/
+
+/* Message-IDs in multipart articles are usually nearly identical, like this:
+**
+**   <address@hidden>
+**   <address@hidden>
+**   <address@hidden>
+**   <address@hidden>
+**   <address@hidden>
+**   <address@hidden>
+**
+** In large newsgroups, _many_ megs can be saved by stripping out common text.
+** We assign Article::Part's Message-ID by passing in its real Message-ID and
+** a reference key (which currently is always the owner Article's message_id).
+** The identical chars at the beginning (b) and end (e) of the two are counted.
+** b and e have an upper bound of UCHAR_MAX (255).
+** Article::Part::folded_message_id's first byte holds 'b', the second holds 
'e',
+** and the remainder is a zero-terminated string with the unique middle 
characters.
+*/
+namespace
+{
+  /**
+   * pack_message_id is broken into two steps so that
+   * the client code can ensure the buffer is big enough
+   * before we write the mid to it.
+   */
+  struct Packer
+  {
+    int size() const { return midlen+3; } // three for for b, e, and '\0'
+    char b, e;
+    const char * mid;
+    size_t midlen;
+    Packer(): b(0), e(0), mid(0), midlen(0) {}
+    void pack (char* buf) const {
+      *buf++ = (char) b;
+      *buf++ = (char) e;
+      memcpy (buf, mid, midlen);
+      buf[midlen] = '\0';
+    }
+  };
+
+  void
+  pack_message_id (Packer            & setme,
+                   const StringView  & mid,
+                   const Quark       & key_mid)
+  {
+    register size_t b=0, e=0;
+    register const char *k, *m;
+    const StringView& key (key_mid.to_view());
+
+    const size_t bmax = std::min (std::min (key.len, mid.len), 
size_t(UCHAR_MAX));
+    k = &key.front();
+    m = &mid.front();
+    for (; b!=bmax; ++b)
+      if (*k++ != *m++)
+        break;
+    
+    const size_t emax = bmax - b;
+    k = &key.back();
+    m = &mid.back();
+    for (; e!=emax; ++e)
+      if (*k-- != *m--)
+        break;
+
+    setme.b = (char) b;
+    setme.e = (char) e;
+    setme.mid = mid.str + b;
+    setme.midlen = mid.len - e - b;
+  }
+
+  size_t
+  unpack_message_id (std::string       & setme,
+                     const StringView  & mid,
+                     const Quark       & key_mid)
+  {
+    setme.clear ();
+
+    if (mid.len >= 2)
+    {
+      const StringView& key (key_mid.to_view());
+      const int b (mid.str[0]);
+      const int e (mid.str[1]);
+      //std::cerr << LINE_ID << " b " << b << " e " << e << std::endl;
+      if (b) setme.append (key.str, 0, b);
+      setme.append (mid.str+2, mid.len-2);
+      if (e) setme.append (key.str + key.len - e, e);
+      //std::cerr << LINE_ID <<" unpacked [" << setme << ']' << std::endl;
+      return mid.len-2 + b + e;
+    }
+
+    return 0;
+  }
+}
+
+/***
+****
+***/
+
+Parts :: Parts ():
+  n_parts_found (0),
+  n_parts_total (0),
+  part_mid_buf_len (0),
+  parts (0),
+  part_mid_buf (0)
+{
+}
+
+void
+Parts :: clear ()
+{
+  delete [] parts;
+  parts = 0;
+
+  delete [] part_mid_buf;
+  part_mid_buf = 0;
+
+  part_mid_buf_len = 0;
+  n_parts_found = 0;
+  n_parts_total = 0;
+}
+
+Parts :: Parts (const Parts& that):
+  n_parts_found (0),
+  n_parts_total (0),
+  part_mid_buf_len (0),
+  parts (0),
+  part_mid_buf (0)
+{
+  *this = that;
+}
+
+Parts&
+Parts :: operator= (const Parts& that)
+{
+  clear ();
+
+  n_parts_found = that.n_parts_found;
+  n_parts_total = that.n_parts_total;
+  part_mid_buf_len = that.part_mid_buf_len;
+  part_mid_buf = new char [part_mid_buf_len];
+  memcpy (part_mid_buf, that.part_mid_buf, part_mid_buf_len);
+
+  Part * part = parts = new Part [n_parts_found];
+  const Part * that_part = that.parts;
+  for (size_t i=0; i<n_parts_found; ++i, ++part, ++that_part) {
+    part->number = that_part->number;
+    part->bytes = that_part->bytes;
+    part->mid_offset = that_part->mid_offset;
+  }
+
+  return *this;
+}
+
+/***
+****
+***/
+
+void
+Parts :: unpack_message_id (std::string  & setme,
+                                   const Part   * part,
+                                   const Quark  & reference_mid) const
+{
+  StringView v;
+  v.str = part_mid_buf + part->mid_offset;
+  v.len = 2 + strlen (v.str+2);
+  ::unpack_message_id (setme, v, reference_mid);
+}
+
+bool
+Parts :: get_part_info (number_t        part_number,
+                        std::string   & setme_message_id,
+                        bytes_t       & setme_byte_count,
+                        const Quark   & reference_mid) const
+{
+  Part findme;
+  findme.number = part_number;
+  Part * p = std::lower_bound (parts, parts+n_parts_found, findme);
+  if ((p == parts+n_parts_found) || (p->number != part_number))
+    return false;
+
+  unpack_message_id (setme_message_id, p, reference_mid);
+  setme_byte_count = p->bytes;
+  return true;
+}
+
+/***
+****
+***/
+
+void
+Parts :: set_parts (const PartBatch& p)
+{
+  clear ();
+
+  n_parts_found = p.n_parts_found;
+  n_parts_total = p.n_parts_total;
+  part_mid_buf_len = p.packed_mids_len;
+
+  char * pch = part_mid_buf = new char [part_mid_buf_len];
+  Part * part = parts = new Part [n_parts_found];
+  PartBatch::parts_t::const_iterator in = p.parts.begin();
+  for (size_t i=0, n=n_parts_found; i<n; ++i, ++in, ++part) {
+    part->number = in->number;
+    part->bytes = in->bytes;
+    part->mid_offset = pch - part_mid_buf;
+    memcpy (pch, in->packed_mid, in->len_used);
+    pch += in->len_used;
+  }
+
+  assert (pch == part_mid_buf + part_mid_buf_len);
+}
+
+bool
+Parts :: add_part (number_t            num,
+                   const StringView  & mid,
+                   bytes_t             bytes,
+                   const Quark       & reference_mid)
+{
+  Part findme;
+  findme.number = num;
+  Part * p = std::lower_bound (parts, parts+n_parts_found, findme);
+  if (p!=parts+n_parts_found && p->number == num) // we have it already
+    return false;
+
+  const int insert_index = (p - parts);
+  Part * buf = new Part [n_parts_found+1];
+  std::copy (parts, parts+insert_index, buf);
+  std::copy (parts+insert_index, parts+n_parts_found, buf+insert_index+1);
+  delete [] parts;
+  parts = buf;
+  buf += insert_index;
+  buf->number = num;
+  buf->bytes = bytes;
+  buf->mid_offset = part_mid_buf_len;
+  ++n_parts_found;
+
+  Packer packer;
+  pack_message_id (packer, mid, reference_mid);
+  const size_t midlen = packer.size ();
+  char * mbuf = new char [part_mid_buf_len + midlen];
+  memcpy (mbuf, part_mid_buf, part_mid_buf_len);
+  packer.pack (mbuf + part_mid_buf_len);
+  delete [] part_mid_buf;
+  part_mid_buf = mbuf;
+  part_mid_buf_len += midlen;
+
+#ifdef DEBUG
+  std::string test_mid;
+  bytes_t test_bytes;
+  assert (get_part_info (num, test_mid, test_bytes, reference_mid));
+  assert (test_bytes == bytes);
+  assert (mid == test_mid);
+#endif
+
+  return true; // yes, we added it
+}
+
+/****
+*****
+****/
+
+void
+PartBatch :: init (const Quark  & mid,
+                   number_t       n_parts_total,
+                   number_t       n_parts_found)
+{
+  this->reference_mid = mid;
+  this->packed_mids_len = 0;
+  this->n_parts_total = n_parts_total;
+  this->n_parts_found = 0; // they haven't been added yet
+
+  if (n_parts_found > parts.size())
+    parts.resize (n_parts_found);
+}
+
+void
+PartBatch :: add_part (number_t            number,
+                       const StringView  & mid,
+                       bytes_t             bytes)
+{
+  if (n_parts_found >= parts.size())
+    parts.resize (n_parts_found+1);
+
+  Part& p = *(&parts.front() + n_parts_found++);
+  p.number = number;
+  p.bytes = bytes;
+
+  Packer packer;
+  pack_message_id (packer, mid, reference_mid);
+  p.len_used = packer.size ();
+  if (p.len_alloced < p.len_used) {
+    delete [] p.packed_mid;
+    p.packed_mid = new char [p.len_used];
+    p.len_alloced = p.len_used;
+  }
+  packer.pack (p.packed_mid);
+  packed_mids_len += p.len_used;
+
+#ifdef DEBUG
+  // check our work
+  std::string tmp;
+  ::unpack_message_id (tmp, StringView(p.packed_mid,p.len_used-1), 
reference_mid);
+  assert (mid == tmp);
+#endif
+
+  if (n_parts_total < n_parts_found)
+      n_parts_total = n_parts_found;
+}
+
+PartBatch :: Part&
+PartBatch :: Part :: operator= (const PartBatch :: Part& that)
+{
+  number =  that.number;
+  bytes =  that.bytes;
+  len_used = len_alloced = that.len_used;
+  delete [] packed_mid;
+  packed_mid = new char [len_used];
+  memcpy (packed_mid, that.packed_mid, len_used);
+  return *this;
+}
+
+PartBatch :: Part :: Part (const PartBatch::Part& that):
+  number (that.number),
+  bytes (that.bytes),
+  len_used (that.len_used),
+  len_alloced (that.len_used),
+  packed_mid (new char [len_used])
+{
+  memcpy (packed_mid, that.packed_mid, len_used);
+}

[Prev in Thread]

Current Thread

[Next in Thread]

[Pan-devel] potential memory improvement, Ken Geis, 2007/05/09
- [Pan-devel] Re: potential memory improvement, Duncan, 2007/05/10
- Re: [Pan-devel] potential memory improvement, Charles Kerr, 2007/05/13
- Re: [Pan-devel] potential memory improvement, kgeis, 2007/05/13
  - Re: [Pan-devel] potential memory improvement, Charles Kerr <=
    - Re: [Pan-devel] potential memory improvement, Ken Geis, 2007/05/14
    - Re: [Pan-devel] potential memory improvement, Charles Kerr, 2007/05/14
    - Re: [Pan-devel] potential memory improvement, Ken Geis, 2007/05/15
    - Re: [Pan-devel] potential memory improvement, Charles Kerr, 2007/05/15

Prev by Date: Re: [Pan-devel] potential memory improvement
Next by Date: Re: [Pan-devel] potential memory improvement
Previous by thread: Re: [Pan-devel] potential memory improvement
Next by thread: Re: [Pan-devel] potential memory improvement
Index(es):
- Date
- Thread