lynx-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

lynx-dev [PATCH 2.8.5-dev14] *Really* large tables


From: Ilya Zakharevich
Subject: lynx-dev [PATCH 2.8.5-dev14] *Really* large tables
Date: Mon, 7 Apr 2003 22:40:22 -0700
User-agent: Mutt/1.4i

This patch raises lynx to "actually linear" behaviour when processing tables.
Before the patch the algorithm was linear, but the implementation of malloc()
would make the time quadratic - although with a pretty small constant.
Actually, the old algorithm would lead to quadratic memory requirement with
most malloc()s.

In addition to removing quadratic behaviour, this patch should also
significantly reduce the coefficient at the linear part.

As a test, I provide
 <URL:ilyaz.org/software/tmp/table_2col_bold_it_500000.html.gz>.
This is a simple table with 2 columns, one with bold contents, another with
italic one.  The total number of rows is 500K.  With the patch and an
acceptable malloc(), lynx should use the working set of about 110M to show
the table.  On my system with 128M memory, this leads to only 4M of the
process space swapped.

With Athlon/850, the file loads in about 40 secs.  (Thus it can be viewed
over 56Kb/sec line without any slowdown).  I tried several different possible
modifications, but could not improve the footprint.  You may want to experiment
with CELLS_GROWBY_FACTOR, ROWS_GROWBY_DIVISOR and REUSE_ROWS_AS_CELLS_POOLS if
you are interested.

 [Note that #undef'ing SAVE_TIME_NOT_SPACE would lead to *larger* memory
  consumption (due to realloc()s most probably not able to reallocate in
  place); I think this #ifdef should be removed.]

Enjoy,
Ilya

P.S.  Note that there is the third place where addmem_rowinfo() should be used:
      in Stbl_fakeFinishCellInTable().  But I could not quickly follow the
      logic of this function (I think the logic is mine :-[).  This means
      that tables with <br> and <p> in cells may still show quadratic
      behaviour.

P.P.S. I tried the above URL, but it is not shown with partial-display-while-
       downloading.  I have zlib used by lynx; why it would not gunzip on the
       fly?  (It would download the file, then would open the copy.)

--- ./src/TRSTable.c-ini        Sat Jul  7 17:30:12 2001
+++ ./src/TRSTable.c    Mon Apr  7 22:12:46 2003
@@ -17,11 +17,17 @@
 #ifdef SAVE_TIME_NOT_SPACE
 #define CELLS_GROWBY 16
 #define ROWS_GROWBY 16
-#else
+#define ROWS_GROWBY_DIVISOR 2
+#else  /* This is very silly, and leads to *larger* memory consumption... */
 #define CELLS_GROWBY 2
 #define ROWS_GROWBY 2
+#define ROWS_GROWBY_DIVISOR 10
 #endif
 
+/* Experiments show that 2 is better than 1.5 is better than 1.25 (all by
+   a small margin only)??? */
+#define CELLS_GROWBY_FACTOR 2
+
 #ifdef USE_CURSES_PADS
 #  define MAX_STBL_POS (LYwideLines ? MAX_COLS - 1 : LYcols-1)
 #else
@@ -123,11 +129,21 @@ typedef struct _STable_rowinfo {
        enum ended_state ended; /* if we saw </tr> etc */
        int     content;        /* Whether contains end-of-cell etc */
        int     offset;         /* >=0 after line break in a multiline cell */
-       int     allocated;      /* number of table cells allocated */
+       int     allocated;      /* number of table cells allocated or 0
+                                  if the .cells should not be free()ed */
        STable_cellinfo * cells;
        int     alignment;      /* global align attribute for this row */
 } STable_rowinfo;
 
+struct _STable_chunk;
+typedef struct _STable_chunk {
+    struct _STable_chunk *next;
+    int alloc_cells;
+    int used_cells;
+    STable_cellinfo cells[1];
+} STable_chunk;
+
+
 struct _STable_info {
 #ifdef EXP_NESTED_TABLES
        struct _STable_info *enclosing; /* The table which contain us */
@@ -149,6 +165,8 @@ struct _STable_info {
        short   pending_colgroup_align;
        int     pending_colgroup_next;
        STable_states s;
+       STable_chunk *free_chunks;
+       STable_chunk *used_chunks;
 };
 
 /*
@@ -248,6 +266,7 @@ PUBLIC struct _STable_info * Stbl_startT
        if (nested_tables)
            me->enclosing = 0;
 #endif
+       me->used_chunks = me->free_chunks = NULL;
     }
     return me;
 }
@@ -260,6 +279,95 @@ PRIVATE void free_rowinfo ARGS1(
     }
 }
 
+PRIVATE int addmem_rowinfo ARGS2(
+    STable_info *,     me,
+    int,               incr)
+{
+    int i;
+    int growby = 0;
+    STable_rowinfo *rows, *row;
+
+    while (me->nrows + incr + 1 > me->allocated_rows + growby)
+       growby += ROWS_GROWBY + me->allocated_rows/ROWS_GROWBY_DIVISOR;
+    if (growby) {
+       if (me->allocated_rows == 0 && !me->rows) {
+           rows = typecallocn(STable_rowinfo, growby);
+       } else {
+#if REUSE_ROWS_AS_CELLS_POOLS          /* Turns out to be not beneficial */
+           /* Work in a regime which has a chance to work efficiently
+              even with lousy malloc()s: do not realloc() until we
+              have many (2) free chunks available (possible with very
+              simple structure of each row).  Simultaneously,
+              make it possible to use an effecient realloc() which
+              would grow the region in place - so DO use realloc() if
+              we already have many free chunks to put the cellinfo into.
+            */
+           if ( me->free_chunks && me->free_chunks->next
+                || (me->allocated_rows*sizeof(STable_rowinfo) <
+                   (sizeof(STable_chunk) 
+                    + (CELLS_GROWBY-1)*sizeof(STable_cellinfo)))
+                || 1)
+#endif
+           {
+               rows = realloc(me->rows,
+                              (me->allocated_rows + growby)
+                              * sizeof(STable_rowinfo));
+           }
+#if REUSE_ROWS_AS_CELLS_POOLS
+           else {
+               rows = malloc((me->allocated_rows + growby)
+                             * sizeof(STable_rowinfo));
+               if (rows) {
+                   STable_chunk *p;
+
+                   memcpy(rows, me->rows,
+                          (me->allocated_rows + growby) 
+                          * sizeof(STable_rowinfo));
+                   p = (STable_chunk*)me->rows;
+                   p->alloc_cells =
+                       1 + (me->allocated_rows*sizeof(STable_rowinfo)
+                            - sizeof(STable_chunk))/sizeof(STable_cellinfo);
+                   p->used_cells = 0;
+                   p->next = me->free_chunks;
+                   me->free_chunks = p;
+               }
+           }
+#endif
+           for (i = 0; rows && i < growby; i++) {
+               row = rows + me->allocated_rows + i;
+               if (!me->rowspans2eog.allocated) {
+                   row->allocated = 0;
+                   row->cells = NULL;
+               } else {
+                   row->cells = typecallocn(STable_cellinfo,
+                                            me->rowspans2eog.allocated);
+                   if (row->cells) {
+                       row->allocated = me->rowspans2eog.allocated;
+                       memcpy(row->cells, me->rowspans2eog.cells,
+                              row->allocated * sizeof(STable_cellinfo));
+                   } else {
+                       FREE(rows);
+                       break;
+                   }
+               }
+               row->ncells = 0;
+               row->fixed_line = NO;
+               row->alignment = HT_ALIGN_NONE;
+               row->offset = 0;
+               row->content = 0;
+           }
+       }
+       if (rows) {
+           me->allocated_rows += growby;
+           me->rows = rows;
+       } else {
+           return 0;
+       }
+    }
+    return 1;
+}
+
+
 PUBLIC void Stbl_free ARGS1(
     STable_info *,     me)
 {
@@ -274,6 +382,17 @@ PUBLIC void Stbl_free ARGS1(
     free_rowinfo(&me->rowspans2eog);
     if (me)
        FREE(me->sumcols);
+    if (me) {
+       STable_chunk *this;
+       while ((this = me->free_chunks)) {
+           me->free_chunks = this->next;
+           FREE(this);
+       }
+       while ((this = me->used_chunks)) {
+           me->used_chunks = this->next;
+           FREE(this);
+       }
+    }
     FREE(me);
 }
 
@@ -949,7 +1068,6 @@ PRIVATE int Stbl_reserveCellsInTable ARG
     int,               rowspan)
 {
     STable_rowinfo *rows, *row;
-    int growby;
     int i;
 
     if (me->nrows <= 0)
@@ -969,36 +1087,9 @@ PRIVATE int Stbl_reserveCellsInTable ARG
        Stbl_reserveCellsInRow(&me->rowspans2eog, icell, colspan);
     }
 
-    growby = me->nrows + rowspan - 1 - me->allocated_rows;
-    if (growby > 0) {
-       rows = realloc(me->rows,
-                      (me->allocated_rows + growby)
-                      * sizeof(STable_rowinfo));
-       if (!rows)
-           return 0; /* ignore silently, no free memory, may be recoverable */
-       for (i = 0; i < growby; i++) {
-           row = rows + me->allocated_rows + i;
-           row->allocated = 0;
-           row->offset = 0;
-           row->content = 0;
-           if (!me->rowspans2eog.allocated) {
-               row->cells = NULL;
-           } else {
-               row->cells = typecallocn(STable_cellinfo,
-                                        me->rowspans2eog.allocated);
-               if (row->cells) {
-                   row->allocated = me->rowspans2eog.allocated;
-                   memcpy(row->cells, me->rowspans2eog.cells,
-                          row->allocated * sizeof(STable_cellinfo));
-               }
-           }
-           row->ncells = 0;
-           row->fixed_line = NO;
-           row->alignment = HT_ALIGN_NONE;
-       }
-       me->allocated_rows += growby;
-       me->rows = rows;
-    }
+    if (!addmem_rowinfo(me, rowspan - 1))
+       return 0; /* ignore silently, no free memory, may be recoverable */
+
     for (i = me->nrows;
         i < (rowspan == 0 ? me->allocated_rows : me->nrows + rowspan - 1);
         i++) {
@@ -1057,50 +1148,8 @@ PUBLIC int Stbl_addRowToTable ARGS3(
     s->pending_len = 0;
     s->x_td = -1;
 
-    {
-       int i;
-       int growby = 0;
-       while (me->nrows + 2 > me->allocated_rows + growby)
-           growby += ROWS_GROWBY;
-       if (growby) {
-           if (me->allocated_rows == 0 && !me->rows) {
-               rows = typecallocn(STable_rowinfo, growby);
-           } else {
-               rows = realloc(me->rows,
-                                 (me->allocated_rows + growby)
-                                 * sizeof(STable_rowinfo));
-               for (i = 0; rows && i < growby; i++) {
-                   row = rows + me->allocated_rows + i;
-                   if (!me->rowspans2eog.allocated) {
-                       row->allocated = 0;
-                       row->cells = NULL;
-                   } else {
-                       row->cells = typecallocn(STable_cellinfo,
-                                                me->rowspans2eog.allocated);
-                       if (row->cells) {
-                           row->allocated = me->rowspans2eog.allocated;
-                           memcpy(row->cells, me->rowspans2eog.cells,
-                                  row->allocated * sizeof(STable_cellinfo));
-                       } else {
-                           FREE(rows);
-                           break;
-                       }
-                   }
-                   row->ncells = 0;
-                   row->fixed_line = NO;
-                   row->alignment = HT_ALIGN_NONE;
-                   row->offset = 0;
-                   row->content = 0;
-               }
-           }
-           if (rows) {
-               me->allocated_rows += growby;
-               me->rows = rows;
-           } else {
-               return -1;
-           }
-       }
-    }
+    if (!addmem_rowinfo(me, 1))
+       return -1;
 
     me->rows[me->nrows].Line = lineno;
     if (me->nrows == 0)
@@ -1111,6 +1160,49 @@ PUBLIC int Stbl_addRowToTable ARGS3(
        me->rows[me->nrows].alignment =
            (me->rowgroup_align==HT_ALIGN_NONE) ?
                                  me->alignment : me->rowgroup_align;
+    if (me->nrows >= 2 /* We may use RESERVEDCELL flag of cells in nrows-1 */
+       && me->rows[me->nrows - 2].allocated > me->rows[me->nrows - 2].ncells) {
+       int c = me->rows[me->nrows - 2].ncells;
+       STable_cellinfo *p  = me->rows[me->nrows - 2].cells;
+
+#if 0  /* Leads to no memory savings and quadratic time with EMX malloc */
+       /* Do not need extra cells any more */
+       me->rows[me->nrows - 2].cells = realloc(p,c * sizeof(STable_cellinfo));
+       me->rows[me->nrows - 2].allocated = me->rows[me->nrows - 2].ncells;
+#else
+       if (!me->used_chunks 
+           || ((me->used_chunks->alloc_cells - me->used_chunks->used_cells)
+               < c)) {
+           if (me->free_chunks && (me->free_chunks->alloc_cells >= c)) {
+               STable_chunk *p = me->free_chunks;
+
+               me->free_chunks = p->next;
+               p->next = me->used_chunks;
+               me->used_chunks = p;
+           } else {                    /* Need to get a new guy */
+               STable_chunk *p;
+
+               if (c < CELLS_GROWBY)
+                   c = CELLS_GROWBY;
+               if ( me->used_chunks
+                    && c < me->used_chunks->alloc_cells*CELLS_GROWBY_FACTOR )
+                   c = 2*me->used_chunks->alloc_cells * CELLS_GROWBY_FACTOR;
+               p = malloc(sizeof(STable_chunk) + 
(c-1)*sizeof(STable_cellinfo));
+               p->alloc_cells = c;
+               p->used_cells = 0;
+               p->next = me->used_chunks;
+               me->used_chunks = p;
+           }
+       }
+       memcpy(me->used_chunks->cells + me->used_chunks->used_cells,
+              p, me->rows[me->nrows - 2].ncells * sizeof(STable_cellinfo));
+       me->rows[me->nrows - 2].cells = 
+           me->used_chunks->cells + me->used_chunks->used_cells;
+       me->used_chunks->used_cells += me->rows[me->nrows - 2].ncells;
+       me->rows[me->nrows - 2].allocated = 0; /* Do not FREE() */
+       FREE(p);
+#endif
+    }
     me->nrows++;
     if (me->pending_colgroup_next > me->ncolinfo) {
        me->ncolinfo = me->pending_colgroup_next;

; To UNSUBSCRIBE: Send "unsubscribe lynx-dev" to address@hidden

reply via email to

[Prev in Thread] Current Thread [Next in Thread]