[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Pan-devel] potential memory improvement
From: |
Charles Kerr |
Subject: |
Re: [Pan-devel] potential memory improvement |
Date: |
Sun, 13 May 2007 01:18:31 -0500 |
User-agent: |
Thunderbird 1.5.0.10 (X11/20070302) |
address@hidden wrote:
I'm running on x64.
Attached is a patch I was working on last weekend that makes a serious
dent in memory use on the x64 architecture. The improvements aren't
as significant on 32-bit machines, but it does speed up loading groups
slightly.
This patch is for svn r280.
Charles
Index: pan/gui/gui.cc
===================================================================
--- pan/gui/gui.cc (revision 278)
+++ pan/gui/gui.cc (working copy)
@@ -1389,22 +1389,22 @@
char * date = date_maker.get_date_string (a->time_posted);
// article parts
- std::vector<int> missing_parts;
- int i = 0;
- foreach_const (Article::parts_t, a->parts, it) {
- ++i;
- if (it->empty())
- missing_parts.push_back (i);
- }
+ typedef Parts::number_t number_t;
+ std::set<number_t> missing_parts;
+ const number_t n_parts = a->get_total_part_count ();
+ for (number_t i=1; i<=n_parts; ++i)
+ missing_parts.insert (i);
+ for (Article::part_iterator it(a->pbegin()), e(a->pend()); it!=e; ++it)
+ missing_parts.erase (it.number());
char msg[512];
*msg = '\0';
std::ostringstream s;
- if (i > 1) {
+ if (n_parts > 1) {
if (missing_parts.empty())
- g_snprintf (msg, sizeof(msg), _("This article has all %d parts."), i);
+ g_snprintf (msg, sizeof(msg), _("This article has all %d parts."),
(int)n_parts);
else
- g_snprintf (msg, sizeof(msg), _("This article is missing %d of its %d
parts:"), (int)missing_parts.size(), i);
- foreach_const (std::vector<int>, missing_parts, it)
+ g_snprintf (msg, sizeof(msg), _("This article is missing %d of its %d
parts:"), (int)missing_parts.size(), (int)n_parts);
+ foreach_const (std::set<number_t>, missing_parts, it)
s << ' ' << *it;
}
Index: pan/gui/server-ui.cc
===================================================================
--- pan/gui/server-ui.cc (revision 278)
+++ pan/gui/server-ui.cc (working copy)
@@ -253,7 +253,7 @@
HIG::workarea_add_section_spacer (t, row, 2);
const int DEFAULT_MAX_PER_SERVER (4);
- a = GTK_ADJUSTMENT (gtk_adjustment_new (1.0, 1.0, DEFAULT_MAX_PER_SERVER,
1.0, 1.0, 1.0));
+ a = GTK_ADJUSTMENT (gtk_adjustment_new (1.0, 0.0, DEFAULT_MAX_PER_SERVER,
1.0, 1.0, 1.0));
d->connection_limit_spin = w = gtk_spin_button_new (GTK_ADJUSTMENT(a),
1.0, 0u);
HIG::workarea_add_row (t, &row, _("Connection _Limit:"), w, NULL);
Index: pan/tasks/nzb-test.cc
===================================================================
--- pan/tasks/nzb-test.cc (revision 278)
+++ pan/tasks/nzb-test.cc (working copy)
@@ -70,12 +70,16 @@
const Article a (dynamic_cast<TaskArticle*>(tasks[0])->get_article());
check (a.author == "Joe Bloggs <address@hidden>")
check (a.subject == "Here's your file! abc-mr2a.r01 (1/2)")
- check (a.parts.size() == 2)
+ check (a.get_total_part_count() == 2)
+ check (a.get_found_part_count() == 2)
check (a.message_id == "<address@hidden>")
check (a.time_posted == 1071674882)
check (a.xref.size() == 3)
- check (a.parts[0].bytes == 102394)
- check (a.parts[0].get_message_id(a.message_id) == "<address@hidden>")
+ std::string part_mid;
+ Parts::bytes_t part_bytes;
+ check (a.get_part_info (1u, part_mid, part_bytes))
+ check (part_bytes == 102394)
+ check (part_mid == "<address@hidden>")
Quark group;
unsigned long number;
check (a.xref.find ("cox", group, number))
@@ -84,7 +88,8 @@
check (a.xref.find ("giganews", group, number))
check (group=="alt.binaries.newzbin" || group=="alt.binaries.mojo")
check (number==0)
- check (a.parts[1].bytes == 4501)
+ check (a.get_part_info (2, part_mid, part_bytes))
+ check (part_bytes == 4501)
std::ostringstream out_stream;
NZB :: nzb_to_xml (out_stream, tasks);
Index: pan/tasks/nzb.cc
===================================================================
--- pan/tasks/nzb.cc (revision 278)
+++ pan/tasks/nzb.cc (working copy)
@@ -40,7 +40,6 @@
namespace
{
typedef std::vector<Task*> tasks_t;
- typedef std::map<unsigned int,Article::Part> number_to_part_t;
struct MyContext
{
@@ -48,7 +47,7 @@
std::string text;
std::string path;
Article a;
- number_to_part_t parts;
+ PartBatch parts;
tasks_t tasks;
ArticleCache& cache;
ArticleRead& read;
@@ -66,7 +65,6 @@
groups.clear ();
text.clear ();
path.clear ();
- parts.clear ();
a.clear ();
bytes = 0;
number = 0;
@@ -115,22 +113,19 @@
else if (!strcmp(element_name, "segment") && mc.number &&
!mc.text.empty()) {
const std::string mid ("<" + mc.text + ">");
- if (mc.a.message_id.empty())
- mc.a.message_id = mid;
- Article::Part& part (mc.parts[mc.number]);
- part.bytes = mc.bytes;
- part.set_message_id (mc.a.message_id, mid);
+ if (mc.a.message_id.empty()) {
+ mc.a.message_id = mid;
+ mc.parts.init (mid);
+ }
+ mc.parts.add_part (mc.number, mid, mc.bytes);
}
else if (!strcmp(element_name,"path"))
mc.path = mc.text;
- else if (!mc.parts.empty() && !strcmp (element_name, "file"))
+ else if (!strcmp (element_name, "file"))
{
- // populate mc.a.parts
- mc.a.set_part_count (mc.parts.rbegin()->first);
- foreach (number_to_part_t, mc.parts, it)
- mc.a.get_part(it->first).swap (it->second);
+ mc.a.set_parts (mc.parts);
foreach_const (quarks_t, mc.groups, git) {
quarks_t servers;
@@ -265,17 +260,11 @@
// now for the parts...
out << indent(depth++) << "<segments>\n";
- int part_number (0);
- foreach_const (Article::parts_t, a.parts, pit)
+ for (Article::part_iterator it(a.pbegin()), end(a.pend()); it!=end; ++it)
{
- ++part_number;
+ std::string mid = it.mid ();
- // incomplete multipart...
- if (pit->empty())
- continue;
-
// remove the surrounding < > as per nzb spec
- std::string mid (pit->get_message_id (a.message_id));
if (mid.size()>=2 && mid[0]=='<') {
mid.erase (0, 1);
mid.resize (mid.size()-1);
@@ -283,8 +272,8 @@
// serialize this part
out << indent(depth)
- << "<segment" << " bytes=\"" << pit->bytes << '"'
- << " number=\"" << part_number << '"'
+ << "<segment" << " bytes=\"" << it.bytes() << '"'
+ << " number=\"" << it.number() << '"'
<< ">";
escaped(out, mid);
out << "</segment>\n";
Index: pan/tasks/task-article.cc
===================================================================
--- pan/tasks/task-article.cc (revision 278)
+++ pan/tasks/task-article.cc (working copy)
@@ -66,7 +66,8 @@
Progress::Listener * listener,
SaveMode save_mode,
const Quark & save_path):
- Task (save_path.empty() ? "BODIES" : "SAVE", get_description (article,
!save_path.empty())),
+ Task (save_path.empty() ? "BODIES" : "SAVE",
+ get_description (article, !save_path.empty())),
_save_path (save_path),
_server_rank (server_rank),
_cache (cache),
@@ -96,27 +97,24 @@
}
unsigned long need_bytes(0), all_bytes(0);
- foreach_const (Article::parts_t, article.parts, it)
+ for (Article::part_iterator i(article.pbegin()), e(article.pend()); i!=e;
++i)
{
- all_bytes += it->bytes;
+ all_bytes += i.bytes();
+ const std::string mid (i.mid ());
+ if (cache.contains (mid))
+ continue;
- const std::string mid (it->get_message_id (article.message_id));
- if (!it->empty() && !cache.contains (mid))
- {
- need_bytes += it->bytes;
-
- Needed n;
- n.part = *it;
-
- // if we can keep the article-number from the main xref, do so.
- // otherwise plug in `0' as a null article-number and we'll use
- // `ARTICLE message-id' instead when talking to the server.
- foreach_const (quarks_t, servers, sit)
- foreach_const (quarks_t, groups, git)
- n.xref.insert (*sit, *git, mid==article.message_id.to_string() ?
article.xref.find_number(*sit,*git) : 0);
-
- _needed.push_back (n);
- }
+ need_bytes += i.bytes();
+ Needed n;
+ n.message_id = mid;
+ n.bytes = i.bytes();
+ // if we can keep the article-number from the main xref, do so.
+ // otherwise plug in `0' as a null article-number and we'll use
+ // `ARTICLE message-id' instead when talking to the server.
+ foreach_const (quarks_t, servers, sit)
+ foreach_const (quarks_t, groups, git)
+ n.xref.insert (*sit, *git, mid==article.message_id.to_string() ?
article.xref.find_number(*sit,*git) : 0);
+ _needed.push_back (n);
}
// initialize our progress status...
@@ -180,7 +178,7 @@
{
unsigned long bytes (0);
foreach_const (needed_t, _needed, it) // parts not fetched yet...
- bytes += (it->part.bytes - it->buf.size());
+ bytes += (it->bytes - it->buf.size());
return bytes;
}
@@ -215,7 +213,7 @@
if (number)
nntp->article (group, number, this);
else
- nntp->article (group,
needed->part.get_message_id(_article.message_id).c_str(), this);
+ nntp->article (group, needed->message_id.c_str(), this);
update_work ();
}
}
@@ -263,11 +261,11 @@
if (health == OK) { // if download succeeded, save it in the cache
const StringView view (&it->buf.front(), it->buf.size());
- if (!_cache.add (it->part.get_message_id(_article.message_id), view))
+ if (!_cache.add (it->message_id, view))
health = ERR_LOCAL;
}
- // std::cerr << LINE_ID << ' ' <<
it->part.get_message_id(_article.message_id) << " from " << nntp->_server << ":
health " << health << std::endl;
+ // std::cerr << LINE_ID << ' ' << it->message_id << " from " <<
nntp->_server << ": health " << health << std::endl;
switch (health)
{
@@ -289,7 +287,7 @@
Log :: add_err_va (
_("Article \"%s\" is incomplete -- the news server(s) don't have
part %s"),
_article.subject.c_str(),
- it->part.get_message_id(_article.message_id).c_str());
+ it->message_id.c_str());
_needed.erase (it);
}
break;
Index: pan/tasks/task-article.h
===================================================================
--- pan/tasks/task-article.h (revision 278)
+++ pan/tasks/task-article.h (working copy)
@@ -100,7 +100,8 @@
private:
struct Needed {
- Article::Part part;
+ std::string message_id;
+ unsigned long bytes;
NNTP * nntp;
Xref xref;
typedef std::vector<char> buf_t;
Index: pan/data-impl/xover.cc
===================================================================
--- pan/data-impl/xover.cc (revision 278)
+++ pan/data-impl/xover.cc (working copy)
@@ -271,7 +271,7 @@
const Article* candidate (h->find_article (candidate_mid));
if (candidate
&& (candidate->author == author)
- && ((int)candidate->parts.size() == part_count))
+ && ((int)candidate->get_total_part_count() == part_count))
art_mid = candidate_mid;
}
}
@@ -308,19 +308,15 @@
{
const int number (part_count<2 ? 1 : part_index);
- Article::Part part;
- part.bytes = byte_count;
- part.set_message_id (art_mid, message_id);
- load_part (group, art_mid, number, line_count, part);
+ load_part (group, art_mid,
+ number, message_id,
+ line_count, byte_count);
}
if (!workarea._added_batch.count(art_mid))
workarea._changed_batch.insert(art_mid);
- /**
- *** Maybe flush the batched changes
- **/
-
+ // maybe flush the batched changes
if ((time(0) - workarea._last_flush_time) >= 10)
xover_flush (group);
Index: pan/data-impl/data-impl.h
===================================================================
--- pan/data-impl/data-impl.h (revision 278)
+++ pan/data-impl/data-impl.h (working copy)
@@ -297,7 +297,11 @@
local GroupHeaders. As a side-effect, the value of `part'
after this call is undefined. This is an ugly interface,
but it's fast and only called by one client. */
- void load_part (const Quark& g, const Quark& mid, size_t number, size_t
lines, Article::Part& part);
+ void load_part (const Quark& g, const Quark& mid,
+ int number,
+ const StringView& part_mid,
+ unsigned long lines,
+ unsigned long bytes);
void load_headers (const DataIO&, const Quark& group);
void save_headers (DataIO&, const Quark& group) const;
Index: pan/data-impl/headers.cc
===================================================================
--- pan/data-impl/headers.cc (revision 278)
+++ pan/data-impl/headers.cc (working copy)
@@ -379,20 +379,17 @@
void
DataImpl :: load_part (const Quark & group,
const Quark & mid,
- size_t number,
- size_t lines,
- Article::Part & new_part)
+ int number,
+ const StringView & part_mid,
+ unsigned long lines,
+ unsigned long bytes)
{
GroupHeaders * h = get_group_headers (group);
Article * a (h->find_article (mid));
+ pan_return_if_fail (a != 0);
- pan_return_if_fail (a != NULL);
-
- Article::Part& old_part (a->get_part (number));
- if (old_part.empty()) {
- old_part.swap (new_part);
- a->lines += lines;
- }
+ if (a->add_part (number, part_mid, bytes))
+ a->lines += lines;
}
namespace
@@ -472,6 +469,7 @@
in->getline (line);
//const unsigned long article_qty = view_to_ul (line); /* unused */
const time_t now (time (0));
+ PartBatch part_batch;
for (;;)
{
// look for the beginning of an Article record.
@@ -513,7 +511,7 @@
if (tok.pop_token(server_tok,':') && tok.pop_token(group_tok,':')) {
target_it->server = server_tok;
target_it->group = group_tok.len==1 ?
xref_lookup[(int)*group_tok.str] : Quark(group_tok);
- target_it->number = view_to_ul (tok);
+ target_it->number = atoi (tok.str);
const Server * server (find_server (target_it->server));
if (server && ((!server->article_expiration_age) || (days_old <=
server->article_expiration_age)))
++target_it;
@@ -534,11 +532,10 @@
s.ltrim(); s.pop_token (tok); total_part_count = atoi(tok.str);
s.ltrim(); s.pop_token (tok); found_part_count = atoi(tok.str);
}
- s.ltrim(); if (s.pop_token (tok)) a.lines = view_to_ul (tok); // this
field was added in 0.115
- if (!expired)
- a.set_part_count (total_part_count);
+ s.ltrim(); if (s.pop_token (tok)) a.lines = atoi (tok.str); // this
field was added in 0.115
// found parts...
+ part_batch.init (a.message_id, total_part_count, found_part_count);
for (int i(0), count(found_part_count); i<count; ++i)
{
const bool gotline (in->getline (s));
@@ -548,19 +545,25 @@
StringView tok;
s.ltrim ();
s.pop_token (tok);
- const unsigned long number (view_to_ul (tok));
- if (!a.has_part(number)) { // corrupted entry
+ const int number (atoi (tok.str));
+ if (number > total_part_count) { // corrupted entry
expired = true;
break;
}
- Article::Part& p (a.get_part (number));
+ StringView part_mid;
+ unsigned long part_bytes (0);
s.ltrim ();
- s.pop_token (tok);
- p.set_message_id (a.message_id, (tok.len==1 && *tok.str=='"') ?
a.message_id.to_view() : tok);
- s.pop_token(tok); p.bytes = view_to_ul (tok);
- if (s.pop_token(tok)) a.lines += view_to_ul (tok); // this field
was removed in 0.115
+ s.pop_token (part_mid);
+ if (part_mid.len==1 && *part_mid.str=='"')
+ part_mid = a.message_id.to_view ();
+ s.pop_token(tok); part_bytes = view_to_ul (tok);
+ part_batch.add_part (number, part_mid, part_bytes);
+
+ if (s.pop_token(tok)) a.lines += atoi (tok.str); // this field was
removed in 0.115
}
}
+ if (!expired)
+ a.set_parts (part_batch);
// add the article to the group if it hasn't all expired
if (expired)
@@ -737,7 +740,6 @@
// header section
*out << articles.size() << endl;
std::string references;
- std::string part_mid;
foreach_const (std::vector<Article*>, articles, ait)
{
++article_count;
@@ -764,32 +766,20 @@
// is_binary [total_part_count found_part_count]
*out << (a->is_binary ? 't' : 'f');
- if (a->is_binary) {
- int foundPartCount (0);
- foreach_const (Article::parts_t, a->parts, pit)
- if (!pit->empty())
- ++foundPartCount;
- *out << ' ' << a->get_part_count() << ' ' << foundPartCount;
- }
+ if (a->is_binary)
+ *out << ' ' << a->get_total_part_count() << ' ' <<
a->get_found_part_count();
*out << ' ' << a->lines << '\n';
// one line per foundPartCount (part-index message-id bytes lines)
- int number (0);
- bool first (true);
- foreach_const (Article::parts_t, a->parts, pit) {
- ++number;
- if (!pit->empty()) {
- ++part_count;
- const Article::Part& p (*pit);
- *out << '\t' << number << ' ';
- p.get_message_id (message_id, part_mid);
- if (message_id.to_view() == part_mid)
- *out << '"';
- else
- *out << part_mid;
- *out << ' ' << p.bytes << '\n';
- first = false;
- }
+ for (Article::part_iterator pit(a->pbegin()), end(a->pend()); pit!=end;
++pit) {
+ ++part_count;
+ const std::string& part_mid = pit.mid ();
+ *out << '\t' << pit.number() << ' ';
+ if (message_id.to_view() == part_mid)
+ *out << '"';
+ else
+ *out << part_mid;
+ *out << ' ' << pit.bytes() << '\n';
}
}
Index: pan/data/parts.h
===================================================================
--- pan/data/parts.h (revision 0)
+++ pan/data/parts.h (revision 0)
@@ -0,0 +1,165 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * Pan - A Newsreader for Gtk+
+ * Copyright (C) 2002-2007 Charles Kerr <address@hidden>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __Parts_h__
+#define __Parts_h__
+
+#include <ctime>
+#include <vector>
+#include <stdint.h>
+#include <pan/general/sorted-vector.h>
+#include <pan/general/quark.h>
+#include <pan/data/xref.h>
+
+namespace pan
+{
+ class PartBatch;
+
+ /**
+ * Represents a collection of a multipart post's parts.
+ *
+ * Parts are by far the biggest memory hog in large groups,
+ * so their layout has been tweaked to squeeze out extra bytes.
+ * This makes the code fairly inelegant, so the low-level bits
+ * have been encapsulated in this class.
+ *
+ * @see Article
+ * @see PartBatch
+ * @ingroup data
+ */
+ class Parts
+ {
+ public:
+ typedef uint16_t number_t;
+ typedef uint16_t mid_offset_t;
+ typedef uint32_t bytes_t;
+
+ private:
+ struct Part {
+ mid_offset_t mid_offset;
+ number_t number;
+ bytes_t bytes;
+ bool operator< (const Part& that) const { return number < that.number;
}
+ };
+ number_t n_parts_found;
+ number_t n_parts_total;
+ bytes_t part_mid_buf_len;
+ Part * parts;
+ char * part_mid_buf;
+ void unpack_message_id (std::string & setme,
+ const Part * p,
+ const Quark & reference_mid) const;
+
+ public:
+ class const_iterator {
+ const Quark& reference_mid;
+ const Parts& parts;
+ mutable std::string midbuf;
+ int n;
+ const Part* get_part() const { return parts.parts + n; }
+ public:
+ const_iterator (const Quark& q,
+ const Parts& p,
+ int pos=0): reference_mid(q), parts(p), n(pos) {}
+ bool operator== (const const_iterator& that)
+ const { return &parts==&that.parts && n == that.n; }
+ bool operator!= (const const_iterator& that)
+ const { return !(*this == that); }
+ const_iterator& operator++() { ++n; return *this; }
+ const_iterator operator++(int)
+ { const_iterator tmp=*this; ++*this; return tmp; }
+ bytes_t bytes() const { return get_part()->bytes; }
+ number_t number() const { return get_part()->number; }
+ const std::string& mid() const {
+ parts.unpack_message_id (midbuf, get_part(), reference_mid);
+ return midbuf;
+ }
+ };
+
+ const_iterator begin(const Quark& q) const
+ { return const_iterator(q,*this,0); }
+ const_iterator end(const Quark& q) const
+ { return const_iterator(q,*this,n_parts_found); }
+
+ public:
+ Parts ();
+ ~Parts () { clear(); }
+ Parts (const Parts& that);
+ Parts& operator= (const Parts&);
+
+ public:
+ void set_part_count (number_t num) { n_parts_total = num; }
+ number_t get_total_part_count () const { return n_parts_total; }
+ number_t get_found_part_count () const { return n_parts_found; }
+ void clear ();
+ bool add_part (number_t num,
+ const StringView& mid,
+ bytes_t bytes,
+ const Quark& reference_mid);
+ void set_parts (const PartBatch& parts);
+
+ public:
+ bool get_part_info (number_t num,
+ std::string & mid,
+ bytes_t & bytes,
+ const Quark & reference_mid) const;
+ };
+
+ /**
+ * Batches together parts of a multipart post for
+ * efficient adding to a Parts object.
+ *
+ * @see Parts
+ * @ingroup data
+ */
+ class PartBatch
+ {
+ public:
+ typedef Parts::number_t number_t;
+ typedef Parts::bytes_t bytes_t;
+
+ private:
+
+ friend class Parts;
+ struct Part {
+ number_t number;
+ bytes_t bytes;
+ size_t len_used;
+ size_t len_alloced;
+ char * packed_mid;
+ Part(): number(0), bytes(0),
+ len_used(0), len_alloced(0), packed_mid(0) {}
+ ~Part() { delete [] packed_mid; }
+ Part (const Part&);
+ Part& operator= (const Part&);
+ };
+ Quark reference_mid;
+ number_t n_parts_found;
+ number_t n_parts_total;
+ typedef std::vector<Part> parts_t;
+ parts_t parts;
+ size_t packed_mids_len;
+
+ public:
+ void init (const Quark& mid, number_t n_parts=0, number_t n_found=0);
+ void add_part (number_t num, const StringView& mid, bytes_t bytes);
+ };
+}
+
+#endif
Index: pan/data/article-test.cc
===================================================================
--- pan/data/article-test.cc (revision 278)
+++ pan/data/article-test.cc (working copy)
@@ -8,19 +8,22 @@
#define test_message_id(s) \
in = s; \
- part.set_message_id (key, in); \
- out = part.get_message_id (key); \
- check (in == out)
+ a.add_part (++i, in, 1); \
+ check (a.get_part_info (i, out, bytes)) \
+ check (bytes == 1) \
+ check (out == in)
int
main (void)
{
+ Parts::number_t i = 0;
+ Parts::bytes_t bytes;
Quark key = "<address@hidden>";
std::string in, out;
Article a;
+ a.message_id = key;
a.set_part_count (2);
- Article::Part& part (a.get_part(2));
test_message_id (key.to_string()); // two equal strings
test_message_id ("<address@hidden>"); // extra in the middle...
Index: pan/data/article.cc
===================================================================
--- pan/data/article.cc (revision 278)
+++ pan/data/article.cc (working copy)
@@ -41,16 +41,13 @@
else if (!is_line_count_ge(250) && has_reply_leader(subject.to_view()))
part_state = SINGLE;
- // someone's posted a "000/124" info message
- else if (parts.empty())
- part_state = SINGLE;
-
- // a multipart
- else {
- part_state = COMPLETE;
- for (Article::parts_t::const_iterator it(parts.begin()), end(parts.end());
part_state==COMPLETE && it!=end; ++it)
- if (it->empty())
- part_state = INCOMPLETE;
+ else {
+ const Parts::number_t total = parts.get_total_part_count ();
+ const Parts::number_t found = parts.get_found_part_count ();
+ if (!found) // someone's posted a "000/124" info message
+ part_state = SINGLE;
+ else // a multipart..
+ part_state = total==found ? COMPLETE : INCOMPLETE;
}
return part_state;
@@ -66,25 +63,6 @@
}
bool
-Article :: is_byte_count_ge (unsigned long test) const
-{
- unsigned long bytes (0);
- foreach_const (parts_t, parts, it)
- if (((bytes += it->bytes)) >= test)
- return true;
- return false;
-}
-
-unsigned long
-Article :: get_byte_count () const
-{
- unsigned long bytes (0);
- foreach_const (parts_t, parts, it)
- bytes += it->bytes;
- return bytes;
-}
-
-bool
Article :: has_reply_leader (const StringView& s)
{
return !s.empty()
@@ -95,166 +73,31 @@
&& s.str[3]==' ';
}
-void
-Article :: set_part_count (unsigned int count)
+unsigned long
+Article :: get_byte_count () const
{
- assert (count > 0);
- parts.resize (count);
+ unsigned long bytes = 0;
+ for (part_iterator it(pbegin()), end(pend()); it!=end; ++it)
+ bytes += it.bytes();
+ return bytes;
}
-Article :: Part&
-Article :: get_part (unsigned int number)
+bool
+Article :: is_byte_count_ge (unsigned long test) const
{
- //std::cerr << LINE_ID << " parts.size() " << parts.size() << " number " <<
number << std::endl;
- const unsigned int index (number - 1);
- assert (parts.size() > index);
- return parts[index];
+ unsigned long bytes = 0;
+ for (part_iterator it(pbegin()), end(pend()); it!=end; ++it)
+ if (((bytes += it.bytes())) >= test)
+ return true;
+ return false;
}
-const Article :: Part&
-Article :: get_part (unsigned int number) const
-{
- //std::cerr << LINE_ID << " parts.size() " << parts.size() << " number " <<
number << std::endl;
- const unsigned int index (number - 1);
- assert (parts.size() > index);
- return parts[index];
-}
-
-/* Message-IDs in multipart articles are usually nearly identical, like this:
-**
-** <address@hidden>
-** <address@hidden>
-** <address@hidden>
-** <address@hidden>
-** <address@hidden>
-** <address@hidden>
-**
-** In large newsgroups, _many_ megs can be saved by stripping out common text.
-** We assign Article::Part's Message-ID by passing in its real Message-ID and
-** a reference key (which currently is always the owner Article's message_id).
-** The identical chars at the beginning (b) and end (e) of the two are counted.
-** b and e have an upper bound of UCHAR_MAX (255).
-** Article::Part::folded_message_id's first byte holds 'b', the second holds
'e',
-** and the remainder is a zero-terminated string with the unique middle
characters.
-*/
-
-void
-Article :: Part :: set_message_id (const Quark& key_mid, const StringView& mid)
-{
- register size_t b=0, e=0;
- register const char *k, *m;
- const StringView& key (key_mid.to_view());
-
- const size_t bmax = std::min (std::min (key.len, mid.len),
size_t(UCHAR_MAX));
- k = &key.front();
- m = &mid.front();
- for (; b!=bmax; ++b)
- if (*k++ != *m++)
- break;
-
- const size_t emax = bmax - b;
- k = &key.back();
- m = &mid.back();
- for (; e!=emax; ++e)
- if (*k-- != *m--)
- break;
-
- const size_t n_kept (mid.len - e - b);
- char *str = g_new (char, n_kept+3); // b + e + '\0'
- str[0] = (char) b;
- str[1] = (char) e;
- memcpy (str+2, mid.str+b, n_kept);
- str[n_kept+2] = '\0';
-
- g_free (packed_message_id);
- packed_message_id = str;
-
- // check our work
- //assert (mid == get_message_id(key));
-}
-
-std::string
-Article :: Part :: get_message_id (const Quark& key_mid) const
-{
- std::string setme;
- get_message_id (key_mid, setme);
- return setme;
-}
-
-void
-Article :: Part :: get_message_id (const Quark& key_mid, std::string& setme)
const
-{
- setme.clear ();
-
- if (packed_message_id)
- {
- const StringView key (key_mid.to_view());
- const char * pch (packed_message_id);
- const int b ((unsigned char) *pch++);
- const int e ((unsigned char) *pch++);
- setme.append (key.str, 0, b);
- setme.append (pch);
- setme.append (key.str + key.len - e, e);
- }
-}
-
-
-namespace
-{
- char* clone_packed_mid (const char * mid)
- {
- // 2 to pick up b and e; 1 to pick up the '\0'
- return mid ? (char*) g_memdup (mid, 2+strlen(mid+2)+1) : 0;
- }
-}
-
-void
-Article :: Part :: clear ()
-{
- bytes = 0;
- g_free (packed_message_id);
- packed_message_id = 0;
-}
-
-void
-Article :: Part :: swap (Part& that)
-{
- std::swap (bytes, that.bytes);
- std::swap (packed_message_id, that.packed_message_id);
-}
-
-Article :: Part :: Part (const Part& that):
- packed_message_id (clone_packed_mid (that.packed_message_id)),
- bytes (that.bytes)
-{
-}
-
-Article :: Part&
-Article :: Part :: operator= (const Part& that)
-{
- bytes = that.bytes;
- g_free (packed_message_id);
- packed_message_id = clone_packed_mid (that.packed_message_id);
- return *this;
-}
-
-/***
-****
-***/
-
Article :: mid_sequence_t
Article :: get_part_mids () const
{
mid_sequence_t mids;
- mids.reserve (parts.size());
-
- const Quark& key (message_id);
- foreach_const (parts_t, parts, it) {
- const Part& p (*it);
- if (!p.empty())
- mids.push_back (p.get_message_id(key));
- }
-
+ for (part_iterator it(pbegin()), end(pend()); it!=end; ++it)
+ mids.push_back (it.mid());
return mids;
}
Index: pan/data/article.h
===================================================================
--- pan/data/article.h (revision 278)
+++ pan/data/article.h (working copy)
@@ -20,10 +20,11 @@
#ifndef __Article_h__
#define __Article_h__
-#include <vector>
#include <ctime>
+#include <vector>
#include <pan/general/sorted-vector.h>
#include <pan/general/quark.h>
+#include <pan/data/parts.h>
#include <pan/data/xref.h>
namespace pan
@@ -46,74 +47,50 @@
class Article
{
public:
+ void set_parts (const PartBatch& b) { parts.set_parts(b); }
+ bool add_part (Parts::number_t num, const StringView& mid,
Parts::bytes_t bytes) { return parts.add_part(num,mid,bytes,message_id); }
+ void set_part_count (Parts::number_t num) { parts.set_part_count(num); }
+ Parts::number_t get_total_part_count () const { return
parts.get_total_part_count(); }
+ Parts::number_t get_found_part_count () const { return
parts.get_found_part_count(); }
+ bool get_part_info (Parts::number_t num,
+ std::string & mid,
+ Parts::bytes_t & bytes) const { return
parts.get_part_info(num,mid,bytes,message_id); }
- /**
- * The fields of a single part of a multipart Article.
- * For other fields, refer to the parent Article object.
- */
- class Part
- {
- private:
- char * packed_message_id;
+ typedef Parts::const_iterator part_iterator;
+ part_iterator pbegin() const { return parts.begin(message_id); }
+ part_iterator pend() const { return parts.end(message_id); }
- public:
- unsigned long bytes;
-
- public:
- void set_message_id (const Quark& key, const StringView& mid);
- std::string get_message_id (const Quark& key) const;
- void get_message_id (const Quark& key, std::string& setme) const;
-
- Part(): packed_message_id(0), bytes(0) {}
- ~Part() { clear(); }
-
- bool empty () const { return !packed_message_id; }
- void swap (Part&);
- void clear ();
-
- Part (const Part&);
- Part& operator= (const Part&);
- };
-
- typedef std::vector<Part> parts_t;
-
- Part& get_part (unsigned int part_number);
- const Part& get_part (unsigned int part_number) const;
- void set_part_count (unsigned int);
typedef std::vector<Quark> mid_sequence_t;
mid_sequence_t get_part_mids () const;
+ enum PartState { SINGLE, INCOMPLETE, COMPLETE };
+ PartState get_part_state () const;
public:
+ bool is_binary;
Quark message_id;
Quark author;
Quark subject;
static bool has_reply_leader (const StringView&);
public:
- enum PartState { SINGLE, INCOMPLETE, COMPLETE };
- PartState get_part_state () const;
unsigned int get_crosspost_count () const;
unsigned long get_line_count () const { return lines; }
- unsigned long get_byte_count () const;
bool is_line_count_ge (size_t test) const { return lines >= test; }
- bool is_byte_count_ge (unsigned long) const;
- unsigned int get_part_count () const { return parts.size(); }
- bool has_part (unsigned int part_number) const
- { return part_number!=0 && get_part_count() >= part_number; }
+ unsigned long get_byte_count () const;
+ bool is_byte_count_ge (unsigned long test) const;
+ unsigned int lines;
+ int score;
time_t time_posted;
- unsigned long lines;
Xref xref;
- int score;
- parts_t parts;
public:
- Article (): time_posted(0), lines(0), score(0), is_binary(false) {}
- ~Article () {}
+ Article (): is_binary(false), lines(0), score(0), time_posted(0) {}
void clear ();
- public:
- bool is_binary;
+ private:
+ Parts parts;
+
};
}
Index: pan/data/Makefile.am
===================================================================
--- pan/data/Makefile.am (revision 278)
+++ pan/data/Makefile.am (working copy)
@@ -8,6 +8,7 @@
article.cc \
article-cache.cc \
data.cc \
+ parts.cc \
xref.cc
noinst_HEADERS = \
@@ -15,6 +16,7 @@
article-cache.h \
data.h \
defgroup.h \
+ parts.h \
server-info.h \
xref.h
Index: pan/data/parts.cc
===================================================================
--- pan/data/parts.cc (revision 0)
+++ pan/data/parts.cc (revision 0)
@@ -0,0 +1,362 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/*
+ * Pan - A Newsreader for Gtk+
+ * Copyright (C) 2002-2007 Charles Kerr <address@hidden>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <config.h>
+#include <cassert>
+#include <algorithm>
+#include <pan/general/debug.h>
+#include <pan/general/foreach.h>
+#include "article.h"
+
+using namespace pan;
+
+#undef DEBUG
+
+/***
+****
+***/
+
+/* Message-IDs in multipart articles are usually nearly identical, like this:
+**
+** <address@hidden>
+** <address@hidden>
+** <address@hidden>
+** <address@hidden>
+** <address@hidden>
+** <address@hidden>
+**
+** In large newsgroups, _many_ megs can be saved by stripping out common text.
+** We assign Article::Part's Message-ID by passing in its real Message-ID and
+** a reference key (which currently is always the owner Article's message_id).
+** The identical chars at the beginning (b) and end (e) of the two are counted.
+** b and e have an upper bound of UCHAR_MAX (255).
+** Article::Part::folded_message_id's first byte holds 'b', the second holds
'e',
+** and the remainder is a zero-terminated string with the unique middle
characters.
+*/
+namespace
+{
+ /**
+ * pack_message_id is broken into two steps so that
+ * the client code can ensure the buffer is big enough
+ * before we write the mid to it.
+ */
+ struct Packer
+ {
+ int size() const { return midlen+3; } // three for for b, e, and '\0'
+ char b, e;
+ const char * mid;
+ size_t midlen;
+ Packer(): b(0), e(0), mid(0), midlen(0) {}
+ void pack (char* buf) const {
+ *buf++ = (char) b;
+ *buf++ = (char) e;
+ memcpy (buf, mid, midlen);
+ buf[midlen] = '\0';
+ }
+ };
+
+ void
+ pack_message_id (Packer & setme,
+ const StringView & mid,
+ const Quark & key_mid)
+ {
+ register size_t b=0, e=0;
+ register const char *k, *m;
+ const StringView& key (key_mid.to_view());
+
+ const size_t bmax = std::min (std::min (key.len, mid.len),
size_t(UCHAR_MAX));
+ k = &key.front();
+ m = &mid.front();
+ for (; b!=bmax; ++b)
+ if (*k++ != *m++)
+ break;
+
+ const size_t emax = bmax - b;
+ k = &key.back();
+ m = &mid.back();
+ for (; e!=emax; ++e)
+ if (*k-- != *m--)
+ break;
+
+ setme.b = (char) b;
+ setme.e = (char) e;
+ setme.mid = mid.str + b;
+ setme.midlen = mid.len - e - b;
+ }
+
+ size_t
+ unpack_message_id (std::string & setme,
+ const StringView & mid,
+ const Quark & key_mid)
+ {
+ setme.clear ();
+
+ if (mid.len >= 2)
+ {
+ const StringView& key (key_mid.to_view());
+ const int b (mid.str[0]);
+ const int e (mid.str[1]);
+ //std::cerr << LINE_ID << " b " << b << " e " << e << std::endl;
+ if (b) setme.append (key.str, 0, b);
+ setme.append (mid.str+2, mid.len-2);
+ if (e) setme.append (key.str + key.len - e, e);
+ //std::cerr << LINE_ID <<" unpacked [" << setme << ']' << std::endl;
+ return mid.len-2 + b + e;
+ }
+
+ return 0;
+ }
+}
+
+/***
+****
+***/
+
+Parts :: Parts ():
+ n_parts_found (0),
+ n_parts_total (0),
+ part_mid_buf_len (0),
+ parts (0),
+ part_mid_buf (0)
+{
+}
+
+void
+Parts :: clear ()
+{
+ delete [] parts;
+ parts = 0;
+
+ delete [] part_mid_buf;
+ part_mid_buf = 0;
+
+ part_mid_buf_len = 0;
+ n_parts_found = 0;
+ n_parts_total = 0;
+}
+
+Parts :: Parts (const Parts& that):
+ n_parts_found (0),
+ n_parts_total (0),
+ part_mid_buf_len (0),
+ parts (0),
+ part_mid_buf (0)
+{
+ *this = that;
+}
+
+Parts&
+Parts :: operator= (const Parts& that)
+{
+ clear ();
+
+ n_parts_found = that.n_parts_found;
+ n_parts_total = that.n_parts_total;
+ part_mid_buf_len = that.part_mid_buf_len;
+ part_mid_buf = new char [part_mid_buf_len];
+ memcpy (part_mid_buf, that.part_mid_buf, part_mid_buf_len);
+
+ Part * part = parts = new Part [n_parts_found];
+ const Part * that_part = that.parts;
+ for (size_t i=0; i<n_parts_found; ++i, ++part, ++that_part) {
+ part->number = that_part->number;
+ part->bytes = that_part->bytes;
+ part->mid_offset = that_part->mid_offset;
+ }
+
+ return *this;
+}
+
+/***
+****
+***/
+
+void
+Parts :: unpack_message_id (std::string & setme,
+ const Part * part,
+ const Quark & reference_mid) const
+{
+ StringView v;
+ v.str = part_mid_buf + part->mid_offset;
+ v.len = 2 + strlen (v.str+2);
+ ::unpack_message_id (setme, v, reference_mid);
+}
+
+bool
+Parts :: get_part_info (number_t part_number,
+ std::string & setme_message_id,
+ bytes_t & setme_byte_count,
+ const Quark & reference_mid) const
+{
+ Part findme;
+ findme.number = part_number;
+ Part * p = std::lower_bound (parts, parts+n_parts_found, findme);
+ if ((p == parts+n_parts_found) || (p->number != part_number))
+ return false;
+
+ unpack_message_id (setme_message_id, p, reference_mid);
+ setme_byte_count = p->bytes;
+ return true;
+}
+
+/***
+****
+***/
+
+void
+Parts :: set_parts (const PartBatch& p)
+{
+ clear ();
+
+ n_parts_found = p.n_parts_found;
+ n_parts_total = p.n_parts_total;
+ part_mid_buf_len = p.packed_mids_len;
+
+ char * pch = part_mid_buf = new char [part_mid_buf_len];
+ Part * part = parts = new Part [n_parts_found];
+ PartBatch::parts_t::const_iterator in = p.parts.begin();
+ for (size_t i=0, n=n_parts_found; i<n; ++i, ++in, ++part) {
+ part->number = in->number;
+ part->bytes = in->bytes;
+ part->mid_offset = pch - part_mid_buf;
+ memcpy (pch, in->packed_mid, in->len_used);
+ pch += in->len_used;
+ }
+
+ assert (pch == part_mid_buf + part_mid_buf_len);
+}
+
+bool
+Parts :: add_part (number_t num,
+ const StringView & mid,
+ bytes_t bytes,
+ const Quark & reference_mid)
+{
+ Part findme;
+ findme.number = num;
+ Part * p = std::lower_bound (parts, parts+n_parts_found, findme);
+ if (p!=parts+n_parts_found && p->number == num) // we have it already
+ return false;
+
+ const int insert_index = (p - parts);
+ Part * buf = new Part [n_parts_found+1];
+ std::copy (parts, parts+insert_index, buf);
+ std::copy (parts+insert_index, parts+n_parts_found, buf+insert_index+1);
+ delete [] parts;
+ parts = buf;
+ buf += insert_index;
+ buf->number = num;
+ buf->bytes = bytes;
+ buf->mid_offset = part_mid_buf_len;
+ ++n_parts_found;
+
+ Packer packer;
+ pack_message_id (packer, mid, reference_mid);
+ const size_t midlen = packer.size ();
+ char * mbuf = new char [part_mid_buf_len + midlen];
+ memcpy (mbuf, part_mid_buf, part_mid_buf_len);
+ packer.pack (mbuf + part_mid_buf_len);
+ delete [] part_mid_buf;
+ part_mid_buf = mbuf;
+ part_mid_buf_len += midlen;
+
+#ifdef DEBUG
+ std::string test_mid;
+ bytes_t test_bytes;
+ assert (get_part_info (num, test_mid, test_bytes, reference_mid));
+ assert (test_bytes == bytes);
+ assert (mid == test_mid);
+#endif
+
+ return true; // yes, we added it
+}
+
+/****
+*****
+****/
+
+void
+PartBatch :: init (const Quark & mid,
+ number_t n_parts_total,
+ number_t n_parts_found)
+{
+ this->reference_mid = mid;
+ this->packed_mids_len = 0;
+ this->n_parts_total = n_parts_total;
+ this->n_parts_found = 0; // they haven't been added yet
+
+ if (n_parts_found > parts.size())
+ parts.resize (n_parts_found);
+}
+
+void
+PartBatch :: add_part (number_t number,
+ const StringView & mid,
+ bytes_t bytes)
+{
+ if (n_parts_found >= parts.size())
+ parts.resize (n_parts_found+1);
+
+ Part& p = *(&parts.front() + n_parts_found++);
+ p.number = number;
+ p.bytes = bytes;
+
+ Packer packer;
+ pack_message_id (packer, mid, reference_mid);
+ p.len_used = packer.size ();
+ if (p.len_alloced < p.len_used) {
+ delete [] p.packed_mid;
+ p.packed_mid = new char [p.len_used];
+ p.len_alloced = p.len_used;
+ }
+ packer.pack (p.packed_mid);
+ packed_mids_len += p.len_used;
+
+#ifdef DEBUG
+ // check our work
+ std::string tmp;
+ ::unpack_message_id (tmp, StringView(p.packed_mid,p.len_used-1),
reference_mid);
+ assert (mid == tmp);
+#endif
+
+ if (n_parts_total < n_parts_found)
+ n_parts_total = n_parts_found;
+}
+
+PartBatch :: Part&
+PartBatch :: Part :: operator= (const PartBatch :: Part& that)
+{
+ number = that.number;
+ bytes = that.bytes;
+ len_used = len_alloced = that.len_used;
+ delete [] packed_mid;
+ packed_mid = new char [len_used];
+ memcpy (packed_mid, that.packed_mid, len_used);
+ return *this;
+}
+
+PartBatch :: Part :: Part (const PartBatch::Part& that):
+ number (that.number),
+ bytes (that.bytes),
+ len_used (that.len_used),
+ len_alloced (that.len_used),
+ packed_mid (new char [len_used])
+{
+ memcpy (packed_mid, that.packed_mid, len_used);
+}