texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

texinfo/tp/Texinfo/Convert Text.pm


From: Patrice Dumas
Subject: texinfo/tp/Texinfo/Convert Text.pm
Date: Thu, 29 Sep 2011 16:23:35 +0000

CVSROOT:        /sources/texinfo
Module name:    texinfo
Changes by:     Patrice Dumas <pertusus>        11/09/29 16:23:35

Modified files:
        tp/Texinfo/Convert: Text.pm 

Log message:
        Try to improve the readability of the eight_bit_accents function.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Text.pm?cvsroot=texinfo&r1=1.65&r2=1.66

Patches:
Index: Text.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Text.pm,v
retrieving revision 1.65
retrieving revision 1.66
diff -u -b -r1.65 -r1.66
--- Text.pm     29 Sep 2011 15:37:00 -0000      1.65
+++ Text.pm     29 Sep 2011 16:23:34 -0000      1.66
@@ -164,6 +164,9 @@
   my $in_upper_case = shift;
   my ($contents, $stack)
       = Texinfo::Common::find_innermost_accent_contents($current);
+
+  # FIXME shouldn't it be better to format the innermost contents with 
+  # a converter, if present?
   my $options = {};
   $options->{'enabled_encoding'} = $encoding if (defined($encoding));
   $options->{'sc'} = $in_upper_case if (defined($in_upper_case));
@@ -181,21 +184,19 @@
   my $debug;
   #$debug = 1;
 
-  # FIXME shouldn't it be better to format the innermost contents with 
-  # a converter, if present?
-  my ($text, $stack) 
+  my ($unicode_formatted, $stack) 
     = _find_innermost_accent($current, $encoding, $in_upper_case);
+  my $result = $unicode_formatted;
 
   if ($debug) {
-    print STDERR "stack: ".join('|', map {$_->{'cmdname'}} @$stack)."\n";
+    print STDERR "STACK: ".join('|', map {$_->{'cmdname'}} @$stack)."\n";
   }
 
   # accents are formatted and the intermediate results are kept, such
   # that we can return the maximum of multiaccented letters that can be
   # rendered with a given eight bit formatting.  undef is stored when 
   # there is no corresponding unicode anymore.
-  my @results_stack;
-  my $unicode_formatted = $text;
+  my @results_stack = ([$unicode_formatted, undef]);
 
   while (@$stack) {
     if (defined($unicode_formatted)) {
@@ -211,11 +212,13 @@
   if ($debug) {
     print STDERR "PARTIAL_RESULTS_STACK:\n";
     foreach my $partial_result (@results_stack) {
+      my $command = 'TEXT';
+      $command = $partial_result->[1]->{'cmdname'} if ($partial_result->[1]);
       if (defined($partial_result->[0])) {
         print STDERR "   -> ".Encode::encode('utf8', $partial_result->[0])
-                            ."|$partial_result->[1]->{'cmdname'}\n";
+                            ."|$command\n";
       } else {
-        print STDERR "   -> NO UTF8 |$partial_result->[1]->{'cmdname'}\n";
+        print STDERR "   -> NO UTF8 |$command\n";
       }
     }
   }
@@ -223,23 +226,24 @@
   # At this point we have the utf8 encoded results for the accent
   # commands stack, with all the intermediate results.
   # For each one we'll check if it is possible to encode it in the 
-  # current eight bit output encoding table
-  my ($eight_bit, $dummy) 
-     = Texinfo::Convert::Unicode::eight_bit_and_unicode_point($text, 
-                                                            $encoding);
-  my $eight_bit_command_index = -1;
-  foreach my $partial_result (@results_stack) {
-    my $char = $partial_result->[0];
+  # current eight bit output encoding table and, if so set the result
+  # to the character.
+
+  my $eight_bit = '';
+
+  while (@results_stack) {
+    my $char = $results_stack[0]->[0];
     last if (!defined($char));
 
     my ($new_eight_bit, $new_codepoint) 
       = Texinfo::Convert::Unicode::eight_bit_and_unicode_point($char,
                                                               $encoding);
     if ($debug) {
-      my $eight_bit_txt = 'undef';
-      $eight_bit_txt = $eight_bit if (defined($eight_bit));
+      my $command = 'TEXT';
+      $command = $results_stack[0]->[1]->{'cmdname'} 
+        if ($results_stack[0]->[1]);
       print STDERR "" . Encode::encode('utf8', $char) 
-        . " ($partial_result->[1]->{'cmdname'}), new_codepoint: $new_codepoint 
8bit: $new_eight_bit old:$eight_bit_txt\n";
+        . " ($command) new_codepoint: $new_codepoint 8bit: $new_eight_bit old: 
$eight_bit\n";
     }
 
     # no corresponding eight bit character found for a composed character
@@ -262,23 +266,22 @@
     #    of the string is unchanged. This, for example, happens for 
     #    @ubaraccent{a} since there is no composed accent with a and an 
     #    underbar.
-    last if (($new_eight_bit eq $eight_bit)
-       and !($partial_result->[1]->{'cmdname'} eq 'dotless' and $char eq 'i'));
+    last if ($new_eight_bit eq $eight_bit
+             and !($results_stack[0]->[1]->{'cmdname'} eq 'dotless' 
+                   and $char eq 'i'));
+    $result = $results_stack[0]->[0];
     $eight_bit = $new_eight_bit;
-    $eight_bit_command_index++;
+    shift @results_stack;
   }
+
   # handle the remaining accents, that have not been converted to 8bit
   # compatible unicode
-  my $result = $text;
-  $result = $results_stack[$eight_bit_command_index]->[0] 
-    if ($eight_bit_command_index > -1);
-  for (my $remaining_accents = $eight_bit_command_index+1; 
-          $remaining_accents <= $#results_stack; $remaining_accents++) {
+  shift @results_stack if (!defined($results_stack[0]->[1]));
+  while (@results_stack) {
     $result = &$convert_accent($result, 
-                               $results_stack[$remaining_accents]->[1],
+                               $results_stack[0]->[1],
                                $in_upper_case);
-    print STDERR "REMAINING($remaining_accents) "
-       .Encode::encode('utf8', $result)."\n" if ($debug);
+    shift @results_stack;
   }
 
   # An important remark is that the final conversion to 8bit is left to



reply via email to

[Prev in Thread] Current Thread [Next in Thread]