texinfo/tp/Texinfo/Convert Text.pm Unicode.pm

texinfo-commits

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

texinfo/tp/Texinfo/Convert Text.pm Unicode.pm

From:	Patrice Dumas
Subject:	texinfo/tp/Texinfo/Convert Text.pm Unicode.pm
Date:	Thu, 29 Sep 2011 14:36:30 +0000

CVSROOT:        /sources/texinfo
Module name:    texinfo
Changes by:     Patrice Dumas <pertusus>        11/09/29 14:36:30

Modified files:
        tp/Texinfo/Convert: Text.pm Unicode.pm 

Log message:
        Move eight_bit_and_unicode_point to Texinfo::Convert::Unicode.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Text.pm?cvsroot=texinfo&r1=1.62&r2=1.63
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Unicode.pm?cvsroot=texinfo&r1=1.17&r2=1.18

Patches:
Index: Text.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Text.pm,v
retrieving revision 1.62
retrieving revision 1.63
diff -u -b -r1.62 -r1.63
--- Text.pm     29 Sep 2011 14:22:21 -0000      1.62
+++ Text.pm     29 Sep 2011 14:36:29 -0000      1.63
@@ -139,7 +139,6 @@
 
 my %accent_commands = %Texinfo::Common::accent_commands;
 my %no_brace_commands = %Texinfo::Common::no_brace_commands;
-my %unicode_to_eight_bit = %Texinfo::Convert::Unicode::unicode_to_eight_bit;
 
 our %formatting_misc_commands;
 foreach my $command ('verbatiminclude', 'sp', 'center', 'exdent', 
@@ -172,26 +171,6 @@
           $innermost_accent, $stack);
 }
 
-# return the 8 bit, if it exists, and the unicode codepoint
-sub _eight_bit_and_unicode_point($$)
-{
-  my $char = shift;
-  my $encoding_map_name = shift;
-  my ($eight_bit, $codepoint);
-  if (ord($char) <= 128) { 
-    # 7bit ascii characters, the same in every 8bit encodings
-    $eight_bit = uc(sprintf("%02x",ord($char)));
-    $codepoint = uc(sprintf("%04x",ord($char)));
-  } elsif (ord($char) <= hex(0xFFFF)) {
-    $codepoint = uc(sprintf("%04x",ord($char)));
-    if (exists($unicode_to_eight_bit{$encoding_map_name}->{$codepoint})) {
-     $eight_bit 
-         = $unicode_to_eight_bit{$encoding_map_name}->{$codepoint};
-    }
-  }
-  return ($eight_bit, $codepoint);
-}
-
 sub eight_bit_accents($$$;$)
 {
   my $current = shift;
@@ -252,21 +231,21 @@
     }
   }
 
-  my $encoding_map_name 
-       = $Texinfo::Encoding::eight_bit_encoding_aliases{$encoding};
   # At this point we have the utf8 encoded results for the accent
   # commands stack, with all the intermediate results.
   # For each one we'll check if it is possible to encode it in the 
   # current eight bit output encoding table
   my ($eight_bit, $dummy) 
-     = _eight_bit_and_unicode_point($text, $encoding_map_name);
+     = Texinfo::Convert::Unicode::eight_bit_and_unicode_point($text, 
+                                                            $encoding);
   my $eight_bit_command_index = -1;
   foreach my $partial_result (@results_stack) {
     my $char = $partial_result->[0];
     last if (!defined($char));
 
-    my ($new_eight_bit, $new_codepoint) = _eight_bit_and_unicode_point($char,
-                                                           $encoding_map_name);
+    my ($new_eight_bit, $new_codepoint) 
+      = Texinfo::Convert::Unicode::eight_bit_and_unicode_point($char,
+                                                              $encoding);
     if ($debug) {
       my $eight_bit_txt = 'undef';
       $eight_bit_txt = $eight_bit if (defined($eight_bit));

Index: Unicode.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Unicode.pm,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -b -r1.17 -r1.18
--- Unicode.pm  29 Sep 2011 14:22:21 -0000      1.17
+++ Unicode.pm  29 Sep 2011 14:36:30 -0000      1.18
@@ -610,7 +610,7 @@
   }
 }
 
-our %unicode_to_eight_bit = (
+my %unicode_to_eight_bit = (
    'iso8859_1' => {
       '00A0' => 'A0',
       '00A1' => 'A1',
@@ -1222,6 +1222,32 @@
   return Unicode::Normalize::NFC($text);
 }
 
+# return the 8 bit, if it exists, and the unicode codepoint
+sub eight_bit_and_unicode_point($$)
+{
+  my $char = shift;
+  my $encoding = shift;
+
+  my $encoding_map_name
+   = $Texinfo::Encoding::eight_bit_encoding_aliases{$encoding};
+  my ($eight_bit, $codepoint);
+  if (ord($char) <= 128) {
+    # 7bit ascii characters, the same in every 8bit encodings
+    $eight_bit = uc(sprintf("%02x",ord($char)));
+    $codepoint = uc(sprintf("%04x",ord($char)));
+  } elsif (ord($char) <= hex(0xFFFF)) {
+    $codepoint = uc(sprintf("%04x",ord($char)));
+    if (exists($unicode_to_eight_bit{$encoding_map_name}->{$codepoint})) {
+     $eight_bit
+         = $unicode_to_eight_bit{$encoding_map_name}->{$codepoint};
+    }
+  }
+  return ($eight_bit, $codepoint);
+}
+
+
+# returns the unicode for a command with brace and no arg
+# if it is known that it is present for the encoding
 sub unicode_for_brace_no_arg_command($$) {
   my $command = shift;
   my $encoding = shift;

[Prev in Thread]

Current Thread

[Next in Thread]

texinfo/tp/Texinfo/Convert Text.pm Unicode.pm, Patrice Dumas <=

Prev by Date: texinfo/tp/Texinfo/Convert HTML.pm Text.pm Unic...
Next by Date: texinfo/tp Texinfo/Common.pm Texinfo/Convert/Co...
Previous by thread: texinfo/tp/Texinfo/Convert HTML.pm Text.pm Unic...
Next by thread: texinfo/tp Texinfo/Common.pm Texinfo/Convert/Co...
Index(es):
- Date
- Thread