--- textutils-2.0.12/src/fmt.c.orig Fri Mar 16 14:50:32 2001 +++ textutils-2.0.12/src/fmt.c Fri Mar 16 14:43:26 2001 @@ -166,6 +166,7 @@ static void put_line PARAMS ((WORD *w, int indent)); static void put_word PARAMS ((WORD *w)); static void put_space PARAMS ((int space)); +static bool fix_avg PARAMS ((void)); /* The name this program was run with. */ const char *program_name; @@ -192,6 +193,14 @@ longer than this will each comprise a single word. */ static int max_width; +/* If TRUE, attempt to determine new paragraphs based on short final + lines */ +static bool new_split; + +/* User-supplied percentage of the average line width used to figure + lines that should end a paragraph for the new_split option */ +static int percentage = 70; + /* Values derived from the option values. */ /* The length of prefix minus leading space. */ @@ -262,6 +271,19 @@ paragraphs chosen by fmt_paragraph(). */ static int last_line_length; +/* The number of characters in the current line of input */ +static int cur_line_length; + +/* The number of characters in the last line of input */ +int last_cur_line_length; + +/* The number of characters in the input as read so far */ +int total_line_length = 0; + +/* The number of lines of input read */ +int num_lines = 0; + + void usage (int status) { @@ -277,6 +299,7 @@ \n\ Mandatory arguments to long options are mandatory for short options too.\n\ -c, --crown-margin preserve indentation of first two lines\n\ + -n, --new-split[=%WIDTH] base paragraphs on length of end lines\n\ -p, --prefix=STRING combine only lines having STRING as prefix\n\ -s, --split-only split long lines, but do not refill\n\ -t, --tagged-paragraph indentation of first line different from second\n\ @@ -297,6 +320,7 @@ static const struct option long_options[] = { {"crown-margin", no_argument, NULL, 'c'}, + {"new-split", optional_argument, NULL, 'n'}, {"prefix", required_argument, NULL, 'p'}, {"split-only", no_argument, NULL, 's'}, {"tagged-paragraph", no_argument, NULL, 't'}, @@ -319,7 +343,7 @@ atexit (close_stdout); - crown = tagged = split = uniform = FALSE; + crown = tagged = split = uniform = new_split = FALSE; max_width = WIDTH; prefix = ""; prefix_length = prefix_lead_space = prefix_full_length = 0; @@ -340,7 +364,7 @@ argc--; } - while ((optchar = getopt_long (argc, argv, "0123456789cstuw:p:", + while ((optchar = getopt_long (argc, argv, "0123456789cn::stuw:p:", long_options, NULL)) != -1) switch (optchar) @@ -378,6 +402,20 @@ } break; + case 'n': + { + long int tmp_long; + new_split = TRUE; + if (optarg != NULL) { + if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK + || tmp_long <= 0 || tmp_long > 99) + error (EXIT_FAILURE, 0, _("invalid percentage: `%s'"), + optarg); + percentage = (int) tmp_long; + } + } + break; + case 'p': set_prefix (optarg); break; @@ -479,6 +517,9 @@ last_line_length = 0; c = next_char; + cur_line_length = 0; + last_cur_line_length = -1; + /* Scan (and copy) blank lines, and lines not introduced by the prefix. */ while (c == '\n' || c == EOF @@ -514,6 +555,10 @@ other_indent = in_column; do { /* for each line till the end of the para */ + num_lines++; + total_line_length += cur_line_length; + last_cur_line_length = cur_line_length; + cur_line_length = 0; c = get_line (f, c); } while (same_para (c) && in_column == other_indent); @@ -528,6 +573,10 @@ other_indent = in_column; do { /* for each line till the end of the para */ + num_lines++; + total_line_length += cur_line_length; + last_cur_line_length = cur_line_length; + cur_line_length = 0; c = get_line (f, c); } while (same_para (c) && in_column == other_indent); @@ -545,7 +594,13 @@ { other_indent = first_indent; while (same_para (c) && in_column == other_indent) - c = get_line (f, c); + { + num_lines++; + total_line_length += cur_line_length; + last_cur_line_length = cur_line_length; + cur_line_length = 0; + c = get_line (f, c); + } } (word_limit - 1)->period = (word_limit - 1)->final = TRUE; next_char = c; @@ -574,6 +629,7 @@ { putchar (c); c = getc (f); + cur_line_length++; } return c; } @@ -587,9 +643,18 @@ { return (next_prefix_indent == prefix_indent && in_column >= next_prefix_indent + prefix_full_length + && (new_split?( total_line_length == 0 || ( last_cur_line_length == -1 && ( num_lines > 0 ) && ( ( cur_line_length * 100 ) > ( ((total_line_length)/num_lines) * percentage ) ) ) || ( last_cur_line_length != -1 && num_lines > 0 && ( ( cur_line_length * 100 ) > ( ((total_line_length)/num_lines) * percentage ) || fix_avg() ) ) ):TRUE) && c != '\n' && c != EOF); } +static bool +fix_avg () +{ + --num_lines; + total_line_length -= last_cur_line_length; + return FALSE; +} + /* Read a line from input file F, given first non-blank character C after the prefix, and the following indent, and break it into words. A word is a maximal non-empty string of non-white characters. A word @@ -620,6 +685,7 @@ flush_paragraph (); *wptr++ = c; c = getc (f); + cur_line_length++; } while (c != EOF && !ISSPACE (c)); in_column += word_limit->length = wptr - word_limit->text; @@ -656,6 +722,7 @@ in_column = 0; c = get_space (f, getc (f)); + cur_line_length++; if (prefix_length == 0) next_prefix_indent = prefix_lead_space < in_column ? prefix_lead_space : in_column; @@ -668,6 +735,7 @@ return c; in_column++; c = getc (f); + cur_line_length++; } c = get_space (f, c); } @@ -692,6 +760,7 @@ else return c; c = getc (f); + cur_line_length++; } } --- textutils-2.0.12/doc/textutils.texi.orig Fri Mar 16 14:55:31 2001 +++ textutils-2.0.12/doc/textutils.texi Fri Mar 16 14:42:16 2001 @@ -885,6 +885,23 @@ lines within a paragraph, and align the left margin of each subsequent line with that of the second line. address@hidden address@hidden address@hidden address@hidden address@hidden -n address@hidden --new-split +Determine end of paragraph based on line lengths. By default, @code{fmt} +determines where new paragraphs begin based on blank lines. This option +adds the ability for @code{fmt} to decide that the end of a paragraph has +been reached by noticing that its last line is significantly shorter than +the average line. The portion of average that a line must be less than in +order to be determined a final line defaults to 75%, but this can be changed +by passing an argument to the option in the form of 100 times the fraction. +That is, 90% would be signified by 90, not 0.9. This feature only works for +already formatted text, and can still fail often, but this problem occurs +often enough that even a rudimentary start can be helpful. Also, the average +line length is determined as the file is parsed, so irregularities early in +the file can have more significant effects than those later on. + @item -t @itemx --tagged-paragraph @opindex -t