[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[GNUnet-SVN] r588 - Extractor-docs/WWW GNUnet-docs/WWW GNUnet-docs/WWW/p
From: |
grothoff |
Subject: |
[GNUnet-SVN] r588 - Extractor-docs/WWW GNUnet-docs/WWW GNUnet-docs/WWW/papers doodle-docs/WWW i18nHTML-docs/WWW |
Date: |
Sun, 3 Apr 2005 23:51:26 -0700 (PDT) |
Author: grothoff
Date: 2005-04-03 23:51:09 -0700 (Sun, 03 Apr 2005)
New Revision: 588
Added:
GNUnet-docs/WWW/papers/CameraReady_174.pdf
Modified:
Extractor-docs/WWW/commitMassTranslation.php
Extractor-docs/WWW/commitTranslation.php
Extractor-docs/WWW/editor.php
Extractor-docs/WWW/html_header.php3
Extractor-docs/WWW/i18nhtml.inc
Extractor-docs/WWW/i18nhtml_config.inc
Extractor-docs/WWW/index.php
Extractor-docs/WWW/translate.php
Extractor-docs/WWW/vote.php
GNUnet-docs/WWW/html_header.php3
GNUnet-docs/WWW/i18nhtml.inc
GNUnet-docs/WWW/i18nhtml_config.inc
doodle-docs/WWW/commitMassTranslation.php
doodle-docs/WWW/commitTranslation.php
doodle-docs/WWW/editor.php
doodle-docs/WWW/i18nhtml.inc
doodle-docs/WWW/i18nhtml_config.inc
doodle-docs/WWW/index.php
doodle-docs/WWW/translate.php
doodle-docs/WWW/vote.php
i18nHTML-docs/WWW/commitMassTranslation.php
i18nHTML-docs/WWW/commitTranslation.php
i18nHTML-docs/WWW/editor.php
i18nHTML-docs/WWW/i18nhtml.inc
i18nHTML-docs/WWW/i18nhtml_config.inc
i18nHTML-docs/WWW/index.php
i18nHTML-docs/WWW/start.php
i18nHTML-docs/WWW/status.php
i18nHTML-docs/WWW/translate.php
i18nHTML-docs/WWW/vote.php
Log:
i18nHTML update -- keep fingers crossed
Modified: Extractor-docs/WWW/commitMassTranslation.php
===================================================================
--- Extractor-docs/WWW/commitMassTranslation.php 2005-04-04 06:47:24 UTC
(rev 587)
+++ Extractor-docs/WWW/commitMassTranslation.php 2005-04-04 06:51:09 UTC
(rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -19,22 +19,20 @@
*/
include("i18nhtml.inc");
if (!$connection) {
- echo "<html><head><title>WWW translation: commit</title></head><body>";
echo "Database is down. Cannot edit translations.";
- echo "</body></html>";
die();
}
if ($xlang == "English") {
- echo "<html><head><title>WWW translation: commit</title></head><body>";
W("Translating to English currently not allowed.\n");
- echo "</body></html>";
die();
}
-echo "<html><head><title>WWW translation: commit</title></head><body>";
+echo "<html><head>";
+TITLE("WWW translation: commit");
+echo "</head><body>";
W("Processing translations...");
P();
$done = 0;
-foreach($_GET as $dec=>$val) {
+foreach($_POST as $dec=>$val) {
if ($val == "")
continue;
if ( ($dec == "xlang") || ($dec == "start") )
@@ -48,7 +46,7 @@
$num--;
$row = mysql_fetch_array($result);
if ($dec == bin2hex(md5(urldecode($row["c"])))) {
- $enc = $row["c"];
+ $enc = mysql_real_escape_string($row["c"]);
break;
}
}
@@ -60,22 +58,30 @@
}
$query = "DELETE FROM pending WHERE lang=\"$lang\" AND c=\"$enc\"";
mysql_query($query, $connection);
- //$t = urlencode($val);
- $t = urlencode($val);
- // $t = urlencode(htmlentities($val, ENT_QUOTES, $charset));
+ $t = mysql_real_escape_string(to_unicode($val));
$query = "SELECT ranking FROM map WHERE name=\"$enc\" AND lang=\"$lang\" AND
translation=\"$t\"";
$result = mysql_query($query, $connection);
$num = 0;
if ($result)
$num = mysql_numrows($result);
if ($num == 0) {
- $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
- mysql_query($query, $connection);
- $done++;
- W("Storing translation for "%s" = "%s".",
- ARRAY(urldecode($enc),
- urldecode($t)));
- BR();
+ $txtCnt = count_chars(urldecode($enc), 1);
+ $tCnt = count_chars($t, 1);
+ if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+ W("Commit '%s->%s' failed.", $enc, $t);
+ W("The number of percent signs in source text and translation do not
match.");
+ W("Note that you must preserve all %%s expressions unchanged.");
+ W("Also, a single displayed %% sign must be translated into two (%%%%)
such signs.");
+ P();
+ } else {
+ $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
+ mysql_query($query, $connection);
+ $done++;
+ W("Storing translation for "%s" = "%s".",
+ ARRAY(urldecode($enc),
+ urldecode($t)));
+ BR();
+ }
}
}
P();
Modified: Extractor-docs/WWW/commitTranslation.php
===================================================================
--- Extractor-docs/WWW/commitTranslation.php 2005-04-04 06:47:24 UTC (rev
587)
+++ Extractor-docs/WWW/commitTranslation.php 2005-04-04 06:51:09 UTC (rev
588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -18,7 +18,7 @@
Boston, MA 02111-1307, USA.
*/
include("i18nhtml.inc");
-$text = $_REQUEST['text'];
+$text = mysql_real_escape_string($_REQUEST['text']);
$translation = $_REQUEST['translation'];
$back = $_REQUEST['back'];
if (!$connection) {
@@ -30,12 +30,7 @@
die();
}
-// note: $text is already urlencoded (by submitting via form) and html
compatible
-// ensure translation is stored in encoded form and html compatible
-// if (get_magic_quotes_gpc()) $translation = stripslashes($translation);
-
-// $t = urlencode(htmlentities($translation, ENT_QUOTES, $charset));
-$t = urlencode($translation);
+$t = mysql_real_escape_string(to_unicode($translation));
// check for identical translation
$query = "SELECT ranking FROM map WHERE name=\"$text\" AND lang=\"$lang\" AND
translation=\"$t\"";
$result = mysql_query($query, $connection);
@@ -44,22 +39,38 @@
$num = mysql_numrows($result);
}
if ($num > 0) {
- echo "<html><body>";
+ echo "<html><head>";
+ TITLE("Translation exists.");
+ echo "</head><body>";
W("Translation exists.");
extlink($back, "Back...");
generateFooter();
echo "</body></html>";
} else {
- // if (!get_magic_quotes_gpc()) $t = addslashes($t); // ensure escaped
before adding to DB
- $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
- $result = mysql_query($query, $connection);
- if ($result) {
- header("Location: " . $back); /* Redirect browser */
+ $txtCnt = count_chars(urldecode($text), 1);
+ $tCnt = count_chars($t, 1);
+ if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+ echo "<html><head>";
+ TITLE("Commit failed.");
+ echo "</head><body>";
+ W("Commit failed.");
+ W("The number of percent signs in source text and translation do not
match.");
+ W("Note that you must preserve all %%s expressions unchanged.");
+ W("Also, a single displayed %% sign must be translated into two (%%%%)
such signs.");
+ echo "</body></html>";
} else {
- echo "<html><body>";
- W("Commit ('%s') failed: ", $query);
- echo mysql_error();
- echo "</body></html>";
+ $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
+ $result = mysql_query($query, $connection);
+ if ($result) {
+ header("Location: " . $back); /* Redirect browser */
+ } else {
+ echo "<html><head>";
+ TITLE("Commit failed.");
+ echo "</head><body>";
+ W("Commit ('%s') failed: ", $query);
+ echo mysql_error();
+ echo "</body></html>";
+ }
}
}
?>
\ No newline at end of file
Modified: Extractor-docs/WWW/editor.php
===================================================================
--- Extractor-docs/WWW/editor.php 2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/editor.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -65,7 +65,7 @@
ARRAY($start, $end));
P();
- echo "<form action=\"" . $i18nHTMLbase . "commitMassTranslation.php\">";
+ echo "<form method=\"POST\" action=\"" . $i18nHTMLbase .
"commitMassTranslation.php\">";
echo "<input type=hidden name=\"xlang\" value=\"$xlang\">";
$endp = $end + 1;
echo "<input type=hidden name=\"start\" value=\"$endp\">";
Modified: Extractor-docs/WWW/html_header.php3
===================================================================
--- Extractor-docs/WWW/html_header.php3 2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/html_header.php3 2005-04-04 06:51:09 UTC (rev 588)
@@ -3,9 +3,9 @@
DOCTYPE("HTML", "Transitional");
echo "<html><head>\n";
if ($title) {
- echo "<title>";
- TRANSLATE($title);
- echo "</title>";
+ TITLE($title);
+ } else {
+ TITLE("libextractor");
}
if ($description) {
echo "<meta name=\"description\" content=\"";
@@ -14,7 +14,7 @@
}
if ($author) {
echo "<meta name=\"author\" content=\"$author\">\n";
- echo "<meta name=\"rights\" content=\"(C) 2002,2003,2004 by $author\">\n";
+ echo "<meta name=\"rights\" content=\"(C) 2002,2003,2004,2005 by
$author\">\n";
}
if ($date)
echo "<meta name=\"date\" content=\"$date\">\n";
Modified: Extractor-docs/WWW/i18nhtml.inc
===================================================================
--- Extractor-docs/WWW/i18nhtml.inc 2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/i18nhtml.inc 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff and other contributing authors.
+ (C) 2003, 2004, 2005 Christian Grothoff and other contributing authors.
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -27,6 +27,7 @@
// $editor can be set to create a translation
// tag even if a translation is already available.
// there is currently no security.
+ //
// An "_" is used for functions that return the
// translated string instead of printing it directly.
// These functions are used for "%s" printing with W().
@@ -39,6 +40,8 @@
// obtain user db specific configuration parameters
include("i18nhtml_config.inc");
+header("Content-type: text/html; charset=utf-8");
+
// establish default connection to database server
$connection = @mysql_connect($i18nHTMLsqlServer,
$i18nHTMLsqlUser,
@@ -119,6 +122,7 @@
if ($xlang)
$lang = $xlang;
$lang = ucfirst(strtolower($lang));
+$lang = mysql_real_escape_string($lang);
$editor = $_REQUEST['editor'];
@@ -241,6 +245,464 @@
}
}
+
+/**
+ * restore UTF-8 from HTML Unicode entities
+ *
+ * This function is triggered by the YACS handler during page
+ * rendering. It is aiming to transcode HTML Unicode entities
+ * (eg, &#8364;) back to actual UTF-8 encoding (eg, �).
+ *
+ * @param string a string with a mix of UTF-8 and of HTML Unicode entities
+ * @return an UTF-8 string
+ */
+function from_unicode($text) {
+ // translate extended ISO8859-1 chars, if any
+ $text = utf8_encode($text);
+
+ // translate Unicode entities
+ $areas = preg_split('/&#(\d+?);/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+ $text = '';
+ $index = 0;
+ foreach($areas as $area) {
+ switch($index%2) {
+ case 0: // before entity
+ $text .= $area;
+ break;
+ case 1: // the entity itself
+
+ // get the integer value
+ $unicode = intval($area);
+
+ // one byte
+ if($unicode < 0x80) {
+
+ $text .= chr($unicode);
+
+ // two bytes
+ } elseif($unicode < 0x800) {
+
+ $text .= chr( 0xC0 + ( ( $unicode - ( $unicode % 0x40 ) ) / 0x40 ) );
+ $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+
+ // three bytes
+ } elseif($unicode < 0x10000) {
+
+ $text .= chr( 0xE0 + ( ( $unicode - ( $unicode % 0x1000 ) ) / 0x1000 )
);
+ $text .= chr( 0x80 + ( ( ( $unicode % 0x1000 ) - ( $unicode % 0x40 ) )
/ 0x40 ) );
+ $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+
+ // more bytes, keep it as it is...
+ } else
+ $text .= '&#'.$unicode.';';
+
+ break;
+ }
+ $index++;
+ }
+
+ // the updated string
+ return $text;
+}
+
+
+/**
+ * transcode unicode entities to/from HTML entities
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get
UTF-8 instead.
+ *
+ * @link
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple
Character Entity Chart
+ *
+ * @param string the string to be transcoded
+ * @param boolean TRUE to transcode to Unicode, FALSE to transcode to HTML
+ * @return a transcoded string
+ */
+function transcode($input, $to_unicode=TRUE) {
+
+ // initialize tables only once
+ static $html_entities, $unicode_entities;
+ if(!is_array($html_entities)) {
+
+
+ // numerical order
+ $codes = array(
+ ' ' => ' ', // non-breaking space
+ '¡' => '¡', // inverted exclamation mark
+ '¢' => '¢', // cent sign
+ '£' => '£', // pound sign
+ '¤' => '¤', // currency sign
+ '¥' => '¥', // yen sign
+ '¦' => '¦', // broken bar
+ '§' => '§', // section sign
+ '¨' => '¨', // diaeresis
+ '©' => '©', // copyright sign
+ 'ª' => 'ª', // feminine ordinal indicator
+ '«' => '«', // left-pointing double angle
quotation mark
+ '¬' => '¬', // not sign
+ '­' => '­', // soft hyphen
+ '®' => '®', // registered sign
+ '¯' => '¯', // macron
+ '°' => '°', // degree sign
+ '±' => '±', // plus-minus sign
+ '²' => '²', // superscript two
+ '³' => '³', // superscript three
+ '´' => '´', // acute accent
+ 'µ' => 'µ', // micro sign
+ '¶' => '¶', // pilcrow sign
+ '·' => '·', // middle dot
+ '¸' => '¸', // cedilla
+ '¹' => '¹', // superscript one
+ 'º' => 'º', // masculine ordinal indicator
+ '»' => '»', // right-pointing double angle
quotation mark
+ '¼' => '¼', // vulgar fraction one quarter
+ '½' => '½', // vulgar fraction one half
+ '¾' => '¾', // vulgar fraction three
quarters
+ '¿' => '¿', // inverted question mark
+ 'À' => 'À', // latin capital letter A with
grave
+ 'Á' => 'Á', // latin capital letter A with
acute
+ 'Â' => 'Â', // latin capital letter A with
circumflex
+ 'Ã' => 'Ã', // latin capital letter A with
tilde
+ 'Ä' => 'Ä', // latin capital letter A with
diaeresis
+ 'Å' => 'Å', // latin capital letter A with
ring above
+ 'Æ' => 'Æ', // latin capital letter AE
+ 'Ç' => 'Ç', // latin capital letter C with
cedilla
+ 'È' => 'È', // latin capital letter E with
grave
+ 'É' => 'É', // latin capital letter E with
acute
+ 'Ê' => 'Ê', // latin capital letter E with
circumflex
+ 'Ë' => 'Ë', // latin capital letter E with
diaeresis
+ 'Ì' => 'Ì', // latin capital letter I with
grave
+ 'Í' => 'Í', // latin capital letter I with
acute
+ 'Î' => 'Î', // latin capital letter I with
circumflex
+ 'Ï' => 'Ï', // latin capital letter I with
diaeresis
+ 'Ð' => 'Ð', // latin capital letter
ETH
+ 'Ñ' => 'Ñ', // latin capital letter N with
tilde
+ 'Ò' => 'Ò', // latin capital letter O with
grave
+ 'Ó' => 'Ó', // latin capital letter O with
acute
+ 'Ô' => 'Ô', // latin capital letter O with
circumflex
+ 'Õ' => 'Õ', // latin capital letter O with
tilde
+ 'Ö' => 'Ö', // latin capital letter O with
diaeresis
+ '×' => '×', // multiplication sign
+ 'Ø' => 'Ø', // latin capital letter O with
stroke
+ 'Ù' => 'Ù', // latin capital letter U with
grave
+ 'Ú' => 'Ú', // latin capital letter U with
acute
+ 'Û' => 'Û', // latin capital letter U with
circumflex
+ 'Ü' => 'Ü', // latin capital letter U with
diaeresis
+ 'Ý' => 'Ý', // latin capital letter Y with
acute
+ 'Þ' => 'Þ', // latin capital letter THORN
+ 'ß' => 'ß', // latin small letter sharp s
+ 'à' => 'à', // latin small letter a with
grave
+ 'á' => 'á', // latin small letter a with
acute
+ 'â' => 'â', // latin small letter a with
circumflex
+ 'ã' => 'ã', // latin small letter a with
tilde
+ 'ä' => 'ä', // latin small letter a with
diaeresis
+ 'å' => 'å', // latin small letter a with
ring above
+ 'æ' => 'æ', // latin small letter ae
+ 'ç' => 'ç', // latin small letter c with
cedilla
+ 'è' => 'è', // latin small letter e with
grave
+ 'é' => 'é', // latin small letter e with
acute
+ 'ê' => 'ê', // latin small letter e with
circumflex
+ 'ë' => 'ë', // latin small letter e with
diaeresis
+ 'ì' => 'ì', // latin small letter i with
grave
+ 'í' => 'í', // latin small letter i with
acute
+ 'î' => 'î', // latin small letter i with
circumflex
+ 'ï' => 'ï', // latin small letter i with
diaeresis
+ 'ð' => 'ð', // latin small letter
eth
+ 'ñ' => 'ñ', // latin small letter n with
tilde
+ 'ò' => 'ò', // latin small letter o with
grave
+ 'ó' => 'ó', // latin small letter o with
acute
+ 'ô' => 'ô', // latin small letter o with
circumflex
+ 'õ' => 'õ', // latin small letter o with
tilde
+ 'ö' => 'ö', // latin small letter o with
diaeresis
+ '÷' => '÷', // division sign
+ 'ø' => 'ø', // latin small letter o with
stroke
+ 'ù' => 'ù', // latin small letter u with
grave
+ 'ú' => 'ú', // latin small letter u with
acute
+ 'û' => 'û', // latin small letter u with
circumflex
+ 'ü' => 'ü', // latin small letter u with
diaeresis
+ 'ý' => 'ý', // latin small letter y with
acute
+ 'þ' => 'þ', // latin small letter thorn
+ 'ÿ' => 'ÿ', //
+ 'Œ' => 'Œ', // latin capital ligature OE
+ 'œ' => 'œ', // latin small ligature oe
+ 'Š' => 'Š', // latin capital letter S with
caron
+ 'š' => 'š', // latin small letter s with
caron
+ 'Ÿ' => 'Ÿ', // latin capital letter Y with
diaeresis
+ 'ƒ' => 'ƒ' , // latin small f with hook
+ 'ˆ' => 'ˆ', // modifier letter circumflex
accent
+ '˜' => '˜', // small tilde
+ 'Α' => 'Α', // greek capital letter alpha
+ 'Β' => 'Β', // greek capital letter beta
+ 'Γ' => 'Γ', // greek capital letter gamma
+ 'Δ' => 'Δ', // greek capital letter delta
+ 'Ε' => 'Ε', // greek capital letter epsilon
+ 'Ζ' => 'Ζ', // greek capital letter zeta
+ 'Η' => 'Η', // greek capital letter
eta
+ 'Θ' => 'Θ', // greek capital letter theta
+ 'Ι' => 'Ι', // greek capital letter iota
+ 'Κ' => 'Κ', // greek capital letter kappa
+ 'Λ' => 'Λ', // greek capital letter lambda
+ 'Μ' => 'Μ', // greek capital letter
mu
+ 'Ν' => 'Ν', // greek capital letter
nu
+ 'Ξ' => 'Ξ', // greek capital letter
xi
+ 'Ο' => 'Ο', // greek capital letter omicron
+ 'Π' => 'Π', // greek capital letter
pi
+ 'Ρ' => 'Ρ', // greek capital letter
rho
+ 'Σ' => 'Σ', // greek capital letter sigma
+ 'Τ' => 'Τ', // greek capital letter
tau
+ 'Υ' => 'Υ', // greek capital letter upsilon
+ 'Φ' => 'Φ', // greek capital letter
phi
+ 'Χ' => 'Χ', // greek capital letter
chi
+ 'Ψ' => 'Ψ', // greek capital letter
psi
+ 'Ω' => 'Ω', // greek capital letter omega
+ 'α' => 'α', // greek small letter alpha
+ 'β' => 'β', // greek small letter beta
+ 'γ' => 'γ', // greek small letter gamma
+ 'δ' => 'δ', // greek small letter delta
+ 'ε' => 'ε', // greek small letter epsilon
+ 'ζ' => 'ζ', // greek small letter zeta
+ 'η' => 'η', // greek small letter
eta
+ 'θ' => 'θ', // greek small letter theta
+ 'ι' => 'ι', // greek small letter iota
+ 'κ' => 'κ', // greek small letter kappa
+ 'λ' => 'λ', // greek small letter lambda
+ 'μ' => 'μ', // greek small letter mu
+ 'ν' => 'ν', // greek small letter nu
+ 'ξ' => 'ξ', // greek small letter xi
+ 'ο' => 'ο', // greek small letter omicron
+ 'π' => 'π', // greek small letter pi
+ 'ρ' => 'ρ', // greek small letter
rho
+ 'ς' => 'ς', // greek small letter final
sigma
+ 'σ' => 'σ', // greek small letter sigma
+ 'τ' => 'τ', // greek small letter
tau
+ 'υ' => 'υ', // greek small letter upsilon
+ 'φ' => 'φ', // greek small letter
phi
+ 'χ' => 'χ', // greek small letter
chi
+ 'ψ' => 'ψ', // greek small letter
psi
+ 'ω' => 'ω', // greek small letter omega
+ 'ϑ' => 'ϑ', // greek small letter
theta symbol
+ 'ϒ' => 'ϒ', // greek upsilon with hook
symbol
+ 'ϖ' => 'ϖ', // greek pi symbol
+ ' ' => ' ', // en space
+ ' ' => ' ', // em space
+ ' ' => ' ', // thin space
+ '‌' => '‌', // zero width non-joiner
+ '‍' => '‍', // zero width joiner
+ '‎' => '‎', // left-to-right mark
+ '‏' => '‏', // right-to-left mark
+ '–' => '–', // en dash
+ '—' => '—', // em dash
+ '‘' => '‘', // left single quotation mark
+ '’' => '’', // right single quotation mark
+ '‚' => '‚', // single low-9 quotation mark
+ '“' => '“', // left double quotation mark
+ '”' => '”', // right double quotation mark
+ '„' => '„', // double low-9 quotation mark
+ '†' => '†', // dagger
+ '‡' => '‡', // double dagger
+ '•' => '•', // bullet
+ '…' => '…', // horizontal ellipsis
+ '‰' => '‰', // per mille sign
+ '′' => '′', // primeminutes
+ '″' => '″', // double prime
+ '‹' => '‹', // single left-pointing angle
quotation mark
+ '›' => '›', // single right-pointing angle
quotation mark
+ '‾' => '‾', // overline
+ '⁄' => '⁄', // fraction slash
+ '€' => '€', // euro sign
+ 'ℑ' => 'ℑ', // blackletter capital I
+ '℘' => '℘', // script capital P
+ 'ℜ' => 'ℜ', // blackletter capital R
+ '™' => '™', // trade mark sign
+ 'ℵ' => 'ℵ', // alef symbol
+ '←' => '←', // leftwards arrow
+ '↑' => '↑', // upwards arrow
+ '→' => '→', // rightwards arrow
+ '↓' => '↓', // downwards arrow
+ '↔' => '↔', // left right arrow
+ '↵' => '↵', // downwards arrow with corner
leftwards
+ '⇐' => '⇐', // leftwards double arrow
+ '⇑' => '⇑', // upwards double arrow
+ '⇒' => '⇒', // rightwards double arrow
+ '⇓' => '⇓', // downwards double arrow
+ '⇔' => '⇔', // left right double arrow
+ '∀' => '∀', // for all
+ '∂' => '∂', // partial differential
+ '∃' => '∃', // there exists
+ '∅' => '∅', // empty set
+ '∇' => '∇', // nabla
+ '∈' => '∈', // element of
+ '∉' => '∉', // not an element of
+ '∋' => '∋', // contains as member
+ '∏' => '∏', // n-ary product
+ '∑' => '∑', // n-ary sumation
+ '−' => '−', // minus sign
+ '∗' => '∗', // asterisk operator
+ '√' => '√', // square root
+ '∝' => '∝', // proportional to
+ '∞' => '∞', // infinity
+ '∠' => '∠', // angle
+ '∧' => '∧', // logical and
+ '∨' => '∨', // logical or
+ '∩' => '∩', // intersection
+ '∪' => '∪', // union
+ '∫' => '∫', // integral
+ '∴' => '∴', // therefore
+ '∼' => '∼', // tilde operator
+ '≅' => '≅', // approximately equal to
+ '≈' => '≈', // almost equal to
+ '≠' => '≠', // not equal to
+ '≡' => '≡', // identical to
+ '≤' => '≤', // less-than or equal to
+ '≥' => '≥', // greater-than or
equal to
+ '⊂' => '⊂', // subset of
+ '⊃' => '⊃', // superset of
+ '⊄' => '⊄', // not a subset of
+ '⊆' => '⊆', // subset of or equal to
+ '⊇' => '⊇', // superset of or equal to
+ '⊕' => '⊕', // circled plus
+ '⊗' => '⊗', // circled times
+ '⊥' => '⊥', // up tack
+ '⋅' => '⋅', // dot operator
+ '⌈' => '⌈', // left ceiling
+ '⌉' => '⌉', // right ceiling
+ '⌊' => '⌊', // left floor
+ '⌋' => '⌋', // right floor
+ '〈' => '⟨', // left-pointing angle bracket
+ '〉' => '⟩', // right-pointing angle bracket
+ '◊' => '◊', // lozenge
+ '♠' => '♠', // black spade suit
+ '♣' => '♣', // black club suit
+ '♥' => '♥', // black heart suit
+ '♦' => '♦' // black diam suit
+ );
+
+ // split entities for use in str_replace()
+ foreach($codes as $unicode_entity => $html_entity) {
+ $unicode_entities[] = $unicode_entity;
+ $html_entities[] = $html_entity;
+ }
+ }
+
+ // transcode HTML entities to Unicode
+ if($to_unicode)
+ return str_replace($html_entities, $unicode_entities, $input);
+
+ // transcode Unicode entities to HTML entities
+ else
+ return str_replace($unicode_entities, $html_entities, $input);
+}
+
+
+
+
+/**
+ * transcode multi-byte characters to HTML representations for Unicode
+ *
+ * This function is aiming to preserve Unicode characters through storage in a
ISO-8859-1 compliant system.
+ *
+ * Every multi-byte UTF-8 character is transformed to its equivalent HTML
numerical entity (eg, &#4568;)
+ * that may be handled safely by PHP and by MySQL.
+ *
+ * Of course, this solution does not allow for full-text search in the
database and therefore, is not a
+ * definitive solution to internationalization issues.
+ * It does enable, however, practical use of Unicode to build pages in foreign
languages.
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get
UTF-8 instead.
+ *
+ * @link
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple
Character Entity Chart
+ *
+ * @param string the original UTF-8 string
+ * @return a string acceptable in an ISO-8859-1 storage system (ie., PHP4 +
MySQl 3)
+ */
+function to_unicode($input) {
+
+ // transcode HTML entities to Unicode entities
+ $input = transcode($input);
+
+ // scan the whole string
+ $output = '';
+ $index = 0;
+ while($index < strlen($input)) {
+
+ // look at one char
+ $char = ord($input[$index]);
+
+ // one byte (0xxxxxxx)
+ if($char < 0x80) {
+
+ // some chars may be undefined
+ $output .= chr($char);
+ $index += 1;
+
+ // two bytes (110xxxxx 10xxxxxx)
+ } elseif($char < 0xE0) {
+
+ // strip weird sequences (eg, C0 80 -> NUL)
+ if($value = (($char % 0x20) * 0x40) + (ord($input[$index + 1]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 2;
+
+ // three bytes (1110xxxx 10xxxxxx 10xxxxxx) example: euro sign =
\xE2\x82\xAC -> €
+ } elseif($char < 0xF0) {
+
+ // strip weird sequences
+ if($value = (($char % 0x10) * 0x1000) + ((ord($input[$index + 1]) %
0x40) * 0x40) + (ord($input[$index + 2]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 3;
+
+ // four bytes (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } elseif($char < 0xF8) {
+
+ // strip weird sequences
+ if($value = (($char % 0x08) * 0x40000) + ((ord($input[$index + 1]) %
0x40) * 0x1000) + ((ord($input[$index + 2]) % 0x40) * 0x40)
+ + (ord($input[$index + 3]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 4;
+
+ // five bytes (111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } elseif($char < 0xFC) {
+
+ // strip weird sequences
+ if($value = (($char % 0x04) * 0x1000000) + ((ord($input[$index + 1]) %
0x40) * 0x40000) + ((ord($input[$index + 2]) % 0x40) * 0x1000)
+ + ((ord($input[$index + 3]) % 0x40) * 0x40) + (ord($input[$index + 4])
% 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 5;
+
+ // six bytes (1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } else {
+
+ // strip weird sequences
+ if($value = (($char % 0x02) * 0x40000000) + ((ord($input[$index + 1]) %
0x40) * 0x1000000) + ((ord($input[$index + 2]) % 0x40) * 0x40000)
+ + ((ord($input[$index + 3]) % 0x40) * 0x1000) + ((ord($input[$index +
4]) % 0x40) * 0x40) + (ord($input[$index + 4]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 6;
+ }
+
+ }
+
+ // return the translated string
+ return $output;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
// returns either the translated string
// or the original string. Assumes we
// are passed the original string as occurs
@@ -264,8 +726,7 @@
if ($a == "")
return $a;
- // $a = htmlentities($a, ENT_QUOTES, $charset);
- $u = urlencode($a);
+ $u = mysql_real_escape_string(urlencode($a));
if (!$connection) {
// database not available, just print English
@@ -319,7 +780,7 @@
return fix($a); // just return English string
} else { // translation available
$row = mysql_fetch_array($result);
- return fix(urldecode($row["translation"]));
+ return $row["translation"];
}
}
@@ -361,7 +822,16 @@
}
function TITLE($a,$b="") {
- echo "<title>" . W_($a,$b) . "</title>\n";
+ global $lang;
+ global $languagecodes;
+ echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"
>";
+ echo "<title>" . TRANSLATE_($a,$b) . "</title>\n";
+ if (isset($languagecodes[$lang])) {
+ echo "<meta name=\"content-language\" content=\"" .
+ $languagecodes[$lang] . "\">";
+ echo "<meta name=\"language\" content=\"" .
+ $languagecodes[$lang] . "\">";
+ }
}
Modified: Extractor-docs/WWW/i18nhtml_config.inc
===================================================================
--- Extractor-docs/WWW/i18nhtml_config.inc 2005-04-04 06:47:24 UTC (rev
587)
+++ Extractor-docs/WWW/i18nhtml_config.inc 2005-04-04 06:51:09 UTC (rev
588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -24,12 +24,18 @@
$i18nHTMLrecordMode = 2; // 1: only missing, 2: everything, 0: disable
$i18nHTMLsqlServer = "localhost";
-$i18nHTMLsqlUser = "GNUnetWWW";
-$i18nHTMLsqlPass = "garlic";
+$i18nHTMLsqlUser = "i18nHTML";
+$i18nHTMLsqlPass = "pass";
-$i18nHTMLsqlDB = "translations"; // default is "translation"
+$i18nHTMLsqlDB = "translation"; // default is "translation"
$i18nHTMLbase = ""; // base directory prepended to i18nHTML php pages used in
links
$i18nHTMLmarker = "*"; // default value if never changed
+// Note that if you enable debug, the scripts may print
+// warnings even if everything is ok!
+$i18nHTMLdebug = 0; // 0 = no, 1 = yes
+// for selectively cloning a DB
+// $i18nHTMLclone = "/tmp/cloneFile.sql";
+
?>
Modified: Extractor-docs/WWW/index.php
===================================================================
--- Extractor-docs/WWW/index.php 2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/index.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,108 +1,68 @@
<?php
-$title="libextractor - a simple library for keyword extraction";
-$description="a simple library for keyword extraction";
-$email="address@hidden";
-$keywords="keyword, extraction, mp3, html, pdf, images, jpeg, gif, ps, mime,
real, qt, asf, mpeg, avi, riff, tiff, summary, summaries, kbps, format,
mime-type, zip, elf, doc, ppt, xls, sha-1, md5, open office, sxw, dvi, id3,
id3v2, id3v2.3, id3v2.4, thumbnails";
-$author="Vids Samanta and Christian Grothoff";
-$page="home";
-include("html_header.php3");
-
-ANCHOR("about");
-H2("About libextractor");
-
-IMG("extractor_logo.png", "libextractor", "right", "136", "94", "0");
+include("i18nhtml.inc");
+DOCTYPE("HTML", "Transitional");
+echo "<html><head>\n";
+TITLE("i18nHTML - enabling collaborative webpage translation");
+echo "<meta name=\"description\" content=\"";
+TRANSLATE("i18nHTML is a collection of PHP scripts that allow visitors of a
webpage to help translating it.");
+echo "\">";
+?>
+<meta name="author" content="Christian Grothoff">
+<meta name="keywords"
content="i18n,HTML,PHP,translation,languages,mysql,database,internationalization,www,free,GNU,GPL">
+<meta name="robots" content="index,follow">
+<meta name="revisit-after" content="28 days">
+<meta name="publisher" content="Christian Grothoff">
+<meta name="date" content="2005-01-03">
+<meta name="rights" content="(C) 2004,2005 by Christian Grothoff>";
+<meta http-equiv="expires" content="43200">
+<meta http-equiv="content-type" content="text/html">
+</head>
+<body>
+<?php
+generateLanguageBar();
+H1("i18nHTML");
+H2("About");
+W("i18nHTML is a collection of PHP files that can be used to write webpages
that visitors can translate into their respective native languages.");
+W("i18nHTML uses a database to match sentences from the webpage against
translations.");
+W("i18nHTML defines a set of PHP functions that generate either the translated
HTML sentences or the original (typically English) text with decorations that
allow users to provide translations.");
+W("i18nHTML requires the internationalized webpages to be written using the
provided PHP functions but does not constrain the page design in any way.");
+W("Webpages internationalized with i18nHTML can be updated without loosing
existing translations for sentences that were not changed.");
+W("Note that it is important that you use the i18nHTML <tt>TITLE</tt> command
in your documents in order to ensure that the character set and other meta-data
is set properly.");
P();
-W("libextractor is a library used to extract meta-data from files of arbitrary
type.");
-W("It is designed to use helper-libraries to perform the actual extraction,
and to be trivially extendable by linking against external extractors for
additional file types.");
-W("libextractor is part of the %s.",
- extlink_("http://www.gnu.org/", "GNU project"));
-//W("Our official GNU website can be found at %s.",
-//
extlink_("http://www.gnu.org/software/libextractor/","http://www.gnu.org/software/libextractor/"));
-W("libextractor can be downloaded from this site or the %s.",
- extlink_("http://www.gnu.org/prep/ftp.html","GNU mirrors"));
-
+H2("Download");
+W("You can find the latest version %s.",
+ extlink_("https://gnunet.org/i18nHTML/download/", "here"));
+W("The latest CVS version can be obtained using");
+PRE("$ svn checkout https://gnunet.org/svn/i18nHTML/");
+P();
+W("If you want to be notified about updates, subscribe to %s",
+ extlink_("http://freshmeat.net/projects/i18nHTML/", "i18nHTML on
freshmeat"));
P();
-W("The goal is to provide developers of file-sharing networks or WWW-indexing
bots with a universal library to obtain simple keywords to match against
queries.");
-W("libextractor contains a shell-command "extract" that, similar to
the well-known "file" command, can extract meta-data from a file an
print the results to stdout.");
-P();
-W("Currently, libextractor supports the following formats:");
-include("plugins_list");
-BR();
-W("Also, various additional MIME types are detected.");
-P();
-W("libextractor is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option) any
later version.");
-ANCHOR("news");
-H2("Recent News");
-P();
-echo "<dl>";
-DTDD("Thu Feb 24 01:23:31 EST 2005 | libextractor v0.4.2 released.",
- "This release fixes some bugs in the ID3, PDF, PNG and REAL extractors.
The REAL extractor now also handles the new Helix formats. libextractor can
now also be used to extract thumbnails from images (using ImageMagick).");
-DTDD("Wed Jan 26 19:51:44 EST 2005 | libextractor v0.4.1 released.",
- "This release fixes a security issue (inherited from xpdf). It also
extracts more meta-data from files of TAR or QuickTime format.");
-DTDD("Sat Dec 25 21:42:26 CET 2004 | libextractor v0.4.0 released.",
- "This release improves support for character sets (plugins are now
expected to convert to UTF-8). It also improves support for mp3 (adding
genres) and png (handling of compressed comments).");
-DTDD("Sat Nov 13 13:23:23 EST 2004 | libextractor v0.3.11 released.",
- "This release fixes bugs in the dvi, man, ID3v2.3, ole2 and pdf
extractors.");
-DTDD("Sun Oct 18 13:23:35 EST 2004 | libextractor v0.3.10 released.",
- "This release adds support for ID3v2.3 and ID3v2.4. It fixes bugs in the
tar, man, deb, mp3 and ole2 extractors.");
-DTDD("Sat Oct 17 18:12:11 EST 2004 | libextractor v0.3.9 released.",
- "This release adds support for the man, tar (including tar.gz) and deb
formats. It fixes bugs in the id3v2 and jpeg extractors. The size of jpeg
images is now also extracted. This version adds support for 64-bit file
sizes.");
-DTDD("Sat Oct 02 20:00:04 EST 2004 | libextractor v0.3.8 released.",
- "This release adds support for dvi (from TeX). The plugins are now installed
in a separate plugin directory. libextractor now works under OS X (10.3).");
-DTDD("Fri Sep 23 23:30:33 EST 2004 | libextractor v0.3.7 released.",
- "This release adds support for StarOffice formats, ID3v2 tags and the
Ripe160MD hash function. It also improves the performance of the HTML and ZIP
extractors.");
-DTDD("Fri Sep 10 20:10:38 EST 2004 | libextractor v0.3.6 released.",
- "This release adds support for OpenOffice formats, hash functions (md5,
sha-1) and fixes some build problems.");
-DTDD("Mon Aug 30 23:18:49 IST 2004 | libextractor v0.3.5 released.",
- "This release adds support for OLE2 (WinWord, PowerPoint, Excel formats)
and fixes various minor bugs. For OLE2 support you will have to have glib 2.0
installed (yes, that is glib from GTK/Gnome, not glibc!).");
-DTDD("Thu Aug 26 20:27:24 IST 2004 | Bugtracking using Mantis enabled.",
- "You can now report and view bug-reports about libextractor on %s.",
- extlink_("https://gnunet.org/mantis/","Mantis"));
-DTDD("Wed Aug 25 19:02:07 IST 2004 | libextractor v0.3.4 released.",
- "This release fixes a minor linking error (<tt>-lm</tt> for
<tt>floor</tt>), improves performance and adds support for GNU gettext
(internationalization).");
-DTDD("Wed May 31 19:22:07 EST 2004 | libextractor v0.3.3 released.",
- "This release fixes various minor bugs (segmentation faults and
non-termination of mpeg and riff extractors for malformed files) and adds
support for WAV files.");
-DTDD("Wed May 31 19:22:07 EST 2004 | libextractor v0.3.2 released.",
- "This release fixes various minor bugs (plugins misbehaving for malformed
files) and improves portability to Cygwin/MinGW.");
-echo "</dl>";
-P();
-W("%s",
- intlink_("oldnews","Older news archive"));
-ANCHOR("links");
-H2("Links");
P();
-W("Related work:");
-echo "<ul>";
-LILI("http://www.wotsit.org","File format database");
-LILI("http://getid3.sf.net/","getid3, similar project for PHP");
-LILI("download/php/",
- "PHP wrapper for libextractor (mirrored, not written by us, see README)");
-LILI("http://dublincore.org/documents/dcmi-terms/","Meta-data categorization
standard");
-LILI("http://hul.harvard.edu/jhove/","JHOVE, Harvard Object Validation
Environment");
-echo "</ul>";
-W("Projects that use libextractor:");
-echo "<ul>";
-LILI("http://witme.sourceforge.net/libferris.web/","libferris, a virtual file
system");
-LILI("http://evidence.sf.net/","Evidence, enlightened file manager");
-LILI("http://gnunet.org/","GNUnet, secure P2P file sharing");
-LILI("http://gnunet.org/doodle/","doodle, index your disk");
-echo "</ul>";
+if ( ($xlang) && ($xlang != "English")) {
+ H2("Mass translation");
+ W("The mass-translation page for translating many sentences at once is %s.",
+ intlink_("editor.php", "here"));
+ W("Note that the sentence database is shared with the %s, %s and %s
projects.",
+ ARRAY(extlink_("http://gnunet.org/", "GNUnet"),
+ extlink_("http://gnunet.org/doodle/", "doodle"),
+ extlink_("http://gnunet.org/libextractor/", "libExtractor")));
+ }
-ANCHOR("contact");
-H2("Contact");
+H2("Bugtrack");
+W("i18nHTML uses Mantis for bugtracking.");
+W("Visit %s to report bugs.",
+ extlink_("https://gnunet.org/mantis/","https://gnunet.org/mantis/"));
+W("You need to sign up for a reporter account.");
+W("Please make sure you report bugs under <strong>I18nHTML</strong> and not
under any of the other projects.");
P();
-W("libextractor is developed by %s and %s.",
- ARRAY(extlink_("http://grothoff.org/christian/",
- "Christian Grothoff"),
- extlink_("http://compilers.cs.purdue.edu/~vids/",
- "Vids Samanta")));
-W("For questions about libextractor send email to %s.",
- extlink_("mailto:address@hidden",
- "address@hidden"));
+W("If you dislike Mantis and need to report a bug contact %s via e-mail (good
luck getting by the spam-filter).",
+ extlink_("mailto:address@hidden","address@hidden"));
-include("html_footer.php3");
+HR();
+generateFooter();
+echo "</body></html>\n";
?>
-
Modified: Extractor-docs/WWW/translate.php
===================================================================
--- Extractor-docs/WWW/translate.php 2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/translate.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -27,9 +27,7 @@
DOCTYPE("HTML", "Transitional");
echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("WWW translation");
-echo "</title>";
+TITLE("WWW translation");
echo "<meta name=\"description\" content=\"";
TRANSLATE("Help translating this webpage.");
echo "\">";
@@ -45,7 +43,7 @@
W("Destination language: ");
W($lang);
P();
-echo "<form action=\"" . $i18nHTMLbase . "commitTranslation.php\">\n";
+echo "<form method=\"POST\" action=\"" . $i18nHTMLbase .
"commitTranslation.php\">\n";
echo "<input type=hidden name=\"text\" value=\"" . urlencode($text) . "\">\n";
echo "<input type=hidden name=\"xlang\" value=\"$xlang\">\n";
echo "<input type=hidden name=\"back\" value=\"$back\">\n";
@@ -93,9 +91,9 @@
printf("<tr><td>%s</td><td><a href=\"" . $i18nHTMLbase .
"vote.php?xlang=%s&text=%s&translation=%s\">%s</a></td></tr>\n",
W_($row["lang"]),
urlencode($row["lang"]),
- urlencode($text),
- $translation,
- urldecode($translation));
+ $u,
+ urlencode(from_unicode($translation)),
+ fix(from_unicode($translation)));
}
echo "</table>";
@@ -121,4 +119,4 @@
generateFooter();
echo "</body></html>";
-?>
\ No newline at end of file
+?>
Modified: Extractor-docs/WWW/vote.php
===================================================================
--- Extractor-docs/WWW/vote.php 2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/vote.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -21,17 +21,18 @@
// For sentences with multiple translations, the one with the most
// votes is displayed.
include("i18nhtml.inc");
-echo "<html><head><title>";
-W("WWW translation: vote");
-echo "</title></head><body>";
+echo "<html><head>";
+echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" >";
+TITLE("WWW translation: vote");
+echo "</head><body>";
if (!$connection) {
echo "Database is down. Cannot edit translations.";
die();
}
$text = $_REQUEST['text'];
$translation = $_REQUEST['translation'];
-$u = urlencode($text);
-$t = urlencode($translation);
+$u = mysql_real_escape_string($text);
+$t = mysql_real_escape_string(to_unicode($translation));
echo "text = " . $text . "<br>\n";
echo "translation = " . $translation . "<br>\n";
Modified: GNUnet-docs/WWW/html_header.php3
===================================================================
--- GNUnet-docs/WWW/html_header.php3 2005-04-04 06:47:24 UTC (rev 587)
+++ GNUnet-docs/WWW/html_header.php3 2005-04-04 06:51:09 UTC (rev 588)
@@ -3,9 +3,9 @@
echo "<html><head>\n";
$haveNBO = 0;
if ($title) {
- echo "<title>";
- TRANSLATE($title);
- echo "</title>";
+ TITLE($title);
+ } else {
+ TITLE("GNUnet");
}
if ($description) {
echo "<meta name=\"description\" content=\"";
Modified: GNUnet-docs/WWW/i18nhtml.inc
===================================================================
--- GNUnet-docs/WWW/i18nhtml.inc 2005-04-04 06:47:24 UTC (rev 587)
+++ GNUnet-docs/WWW/i18nhtml.inc 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff and other contributing authors.
+ (C) 2003, 2004, 2005 Christian Grothoff and other contributing authors.
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -27,6 +27,7 @@
// $editor can be set to create a translation
// tag even if a translation is already available.
// there is currently no security.
+ //
// An "_" is used for functions that return the
// translated string instead of printing it directly.
// These functions are used for "%s" printing with W().
@@ -39,6 +40,8 @@
// obtain user db specific configuration parameters
include("i18nhtml_config.inc");
+header("Content-type: text/html; charset=utf-8");
+
// establish default connection to database server
$connection = @mysql_connect($i18nHTMLsqlServer,
$i18nHTMLsqlUser,
@@ -119,6 +122,7 @@
if ($xlang)
$lang = $xlang;
$lang = ucfirst(strtolower($lang));
+$lang = mysql_real_escape_string($lang);
$editor = $_REQUEST['editor'];
@@ -241,6 +245,464 @@
}
}
+
+/**
+ * restore UTF-8 from HTML Unicode entities
+ *
+ * This function is triggered by the YACS handler during page
+ * rendering. It is aiming to transcode HTML Unicode entities
+ * (eg, &#8364;) back to actual UTF-8 encoding (eg, �).
+ *
+ * @param string a string with a mix of UTF-8 and of HTML Unicode entities
+ * @return an UTF-8 string
+ */
+function from_unicode($text) {
+ // translate extended ISO8859-1 chars, if any
+ $text = utf8_encode($text);
+
+ // translate Unicode entities
+ $areas = preg_split('/&#(\d+?);/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+ $text = '';
+ $index = 0;
+ foreach($areas as $area) {
+ switch($index%2) {
+ case 0: // before entity
+ $text .= $area;
+ break;
+ case 1: // the entity itself
+
+ // get the integer value
+ $unicode = intval($area);
+
+ // one byte
+ if($unicode < 0x80) {
+
+ $text .= chr($unicode);
+
+ // two bytes
+ } elseif($unicode < 0x800) {
+
+ $text .= chr( 0xC0 + ( ( $unicode - ( $unicode % 0x40 ) ) / 0x40 ) );
+ $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+
+ // three bytes
+ } elseif($unicode < 0x10000) {
+
+ $text .= chr( 0xE0 + ( ( $unicode - ( $unicode % 0x1000 ) ) / 0x1000 )
);
+ $text .= chr( 0x80 + ( ( ( $unicode % 0x1000 ) - ( $unicode % 0x40 ) )
/ 0x40 ) );
+ $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+
+ // more bytes, keep it as it is...
+ } else
+ $text .= '&#'.$unicode.';';
+
+ break;
+ }
+ $index++;
+ }
+
+ // the updated string
+ return $text;
+}
+
+
+/**
+ * transcode unicode entities to/from HTML entities
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get
UTF-8 instead.
+ *
+ * @link
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple
Character Entity Chart
+ *
+ * @param string the string to be transcoded
+ * @param boolean TRUE to transcode to Unicode, FALSE to transcode to HTML
+ * @return a transcoded string
+ */
+function transcode($input, $to_unicode=TRUE) {
+
+ // initialize tables only once
+ static $html_entities, $unicode_entities;
+ if(!is_array($html_entities)) {
+
+
+ // numerical order
+ $codes = array(
+ ' ' => ' ', // non-breaking space
+ '¡' => '¡', // inverted exclamation mark
+ '¢' => '¢', // cent sign
+ '£' => '£', // pound sign
+ '¤' => '¤', // currency sign
+ '¥' => '¥', // yen sign
+ '¦' => '¦', // broken bar
+ '§' => '§', // section sign
+ '¨' => '¨', // diaeresis
+ '©' => '©', // copyright sign
+ 'ª' => 'ª', // feminine ordinal indicator
+ '«' => '«', // left-pointing double angle
quotation mark
+ '¬' => '¬', // not sign
+ '­' => '­', // soft hyphen
+ '®' => '®', // registered sign
+ '¯' => '¯', // macron
+ '°' => '°', // degree sign
+ '±' => '±', // plus-minus sign
+ '²' => '²', // superscript two
+ '³' => '³', // superscript three
+ '´' => '´', // acute accent
+ 'µ' => 'µ', // micro sign
+ '¶' => '¶', // pilcrow sign
+ '·' => '·', // middle dot
+ '¸' => '¸', // cedilla
+ '¹' => '¹', // superscript one
+ 'º' => 'º', // masculine ordinal indicator
+ '»' => '»', // right-pointing double angle
quotation mark
+ '¼' => '¼', // vulgar fraction one quarter
+ '½' => '½', // vulgar fraction one half
+ '¾' => '¾', // vulgar fraction three
quarters
+ '¿' => '¿', // inverted question mark
+ 'À' => 'À', // latin capital letter A with
grave
+ 'Á' => 'Á', // latin capital letter A with
acute
+ 'Â' => 'Â', // latin capital letter A with
circumflex
+ 'Ã' => 'Ã', // latin capital letter A with
tilde
+ 'Ä' => 'Ä', // latin capital letter A with
diaeresis
+ 'Å' => 'Å', // latin capital letter A with
ring above
+ 'Æ' => 'Æ', // latin capital letter AE
+ 'Ç' => 'Ç', // latin capital letter C with
cedilla
+ 'È' => 'È', // latin capital letter E with
grave
+ 'É' => 'É', // latin capital letter E with
acute
+ 'Ê' => 'Ê', // latin capital letter E with
circumflex
+ 'Ë' => 'Ë', // latin capital letter E with
diaeresis
+ 'Ì' => 'Ì', // latin capital letter I with
grave
+ 'Í' => 'Í', // latin capital letter I with
acute
+ 'Î' => 'Î', // latin capital letter I with
circumflex
+ 'Ï' => 'Ï', // latin capital letter I with
diaeresis
+ 'Ð' => 'Ð', // latin capital letter
ETH
+ 'Ñ' => 'Ñ', // latin capital letter N with
tilde
+ 'Ò' => 'Ò', // latin capital letter O with
grave
+ 'Ó' => 'Ó', // latin capital letter O with
acute
+ 'Ô' => 'Ô', // latin capital letter O with
circumflex
+ 'Õ' => 'Õ', // latin capital letter O with
tilde
+ 'Ö' => 'Ö', // latin capital letter O with
diaeresis
+ '×' => '×', // multiplication sign
+ 'Ø' => 'Ø', // latin capital letter O with
stroke
+ 'Ù' => 'Ù', // latin capital letter U with
grave
+ 'Ú' => 'Ú', // latin capital letter U with
acute
+ 'Û' => 'Û', // latin capital letter U with
circumflex
+ 'Ü' => 'Ü', // latin capital letter U with
diaeresis
+ 'Ý' => 'Ý', // latin capital letter Y with
acute
+ 'Þ' => 'Þ', // latin capital letter THORN
+ 'ß' => 'ß', // latin small letter sharp s
+ 'à' => 'à', // latin small letter a with
grave
+ 'á' => 'á', // latin small letter a with
acute
+ 'â' => 'â', // latin small letter a with
circumflex
+ 'ã' => 'ã', // latin small letter a with
tilde
+ 'ä' => 'ä', // latin small letter a with
diaeresis
+ 'å' => 'å', // latin small letter a with
ring above
+ 'æ' => 'æ', // latin small letter ae
+ 'ç' => 'ç', // latin small letter c with
cedilla
+ 'è' => 'è', // latin small letter e with
grave
+ 'é' => 'é', // latin small letter e with
acute
+ 'ê' => 'ê', // latin small letter e with
circumflex
+ 'ë' => 'ë', // latin small letter e with
diaeresis
+ 'ì' => 'ì', // latin small letter i with
grave
+ 'í' => 'í', // latin small letter i with
acute
+ 'î' => 'î', // latin small letter i with
circumflex
+ 'ï' => 'ï', // latin small letter i with
diaeresis
+ 'ð' => 'ð', // latin small letter
eth
+ 'ñ' => 'ñ', // latin small letter n with
tilde
+ 'ò' => 'ò', // latin small letter o with
grave
+ 'ó' => 'ó', // latin small letter o with
acute
+ 'ô' => 'ô', // latin small letter o with
circumflex
+ 'õ' => 'õ', // latin small letter o with
tilde
+ 'ö' => 'ö', // latin small letter o with
diaeresis
+ '÷' => '÷', // division sign
+ 'ø' => 'ø', // latin small letter o with
stroke
+ 'ù' => 'ù', // latin small letter u with
grave
+ 'ú' => 'ú', // latin small letter u with
acute
+ 'û' => 'û', // latin small letter u with
circumflex
+ 'ü' => 'ü', // latin small letter u with
diaeresis
+ 'ý' => 'ý', // latin small letter y with
acute
+ 'þ' => 'þ', // latin small letter thorn
+ 'ÿ' => 'ÿ', //
+ 'Œ' => 'Œ', // latin capital ligature OE
+ 'œ' => 'œ', // latin small ligature oe
+ 'Š' => 'Š', // latin capital letter S with
caron
+ 'š' => 'š', // latin small letter s with
caron
+ 'Ÿ' => 'Ÿ', // latin capital letter Y with
diaeresis
+ 'ƒ' => 'ƒ' , // latin small f with hook
+ 'ˆ' => 'ˆ', // modifier letter circumflex
accent
+ '˜' => '˜', // small tilde
+ 'Α' => 'Α', // greek capital letter alpha
+ 'Β' => 'Β', // greek capital letter beta
+ 'Γ' => 'Γ', // greek capital letter gamma
+ 'Δ' => 'Δ', // greek capital letter delta
+ 'Ε' => 'Ε', // greek capital letter epsilon
+ 'Ζ' => 'Ζ', // greek capital letter zeta
+ 'Η' => 'Η', // greek capital letter
eta
+ 'Θ' => 'Θ', // greek capital letter theta
+ 'Ι' => 'Ι', // greek capital letter iota
+ 'Κ' => 'Κ', // greek capital letter kappa
+ 'Λ' => 'Λ', // greek capital letter lambda
+ 'Μ' => 'Μ', // greek capital letter
mu
+ 'Ν' => 'Ν', // greek capital letter
nu
+ 'Ξ' => 'Ξ', // greek capital letter
xi
+ 'Ο' => 'Ο', // greek capital letter omicron
+ 'Π' => 'Π', // greek capital letter
pi
+ 'Ρ' => 'Ρ', // greek capital letter
rho
+ 'Σ' => 'Σ', // greek capital letter sigma
+ 'Τ' => 'Τ', // greek capital letter
tau
+ 'Υ' => 'Υ', // greek capital letter upsilon
+ 'Φ' => 'Φ', // greek capital letter
phi
+ 'Χ' => 'Χ', // greek capital letter
chi
+ 'Ψ' => 'Ψ', // greek capital letter
psi
+ 'Ω' => 'Ω', // greek capital letter omega
+ 'α' => 'α', // greek small letter alpha
+ 'β' => 'β', // greek small letter beta
+ 'γ' => 'γ', // greek small letter gamma
+ 'δ' => 'δ', // greek small letter delta
+ 'ε' => 'ε', // greek small letter epsilon
+ 'ζ' => 'ζ', // greek small letter zeta
+ 'η' => 'η', // greek small letter
eta
+ 'θ' => 'θ', // greek small letter theta
+ 'ι' => 'ι', // greek small letter iota
+ 'κ' => 'κ', // greek small letter kappa
+ 'λ' => 'λ', // greek small letter lambda
+ 'μ' => 'μ', // greek small letter mu
+ 'ν' => 'ν', // greek small letter nu
+ 'ξ' => 'ξ', // greek small letter xi
+ 'ο' => 'ο', // greek small letter omicron
+ 'π' => 'π', // greek small letter pi
+ 'ρ' => 'ρ', // greek small letter
rho
+ 'ς' => 'ς', // greek small letter final
sigma
+ 'σ' => 'σ', // greek small letter sigma
+ 'τ' => 'τ', // greek small letter
tau
+ 'υ' => 'υ', // greek small letter upsilon
+ 'φ' => 'φ', // greek small letter
phi
+ 'χ' => 'χ', // greek small letter
chi
+ 'ψ' => 'ψ', // greek small letter
psi
+ 'ω' => 'ω', // greek small letter omega
+ 'ϑ' => 'ϑ', // greek small letter
theta symbol
+ 'ϒ' => 'ϒ', // greek upsilon with hook
symbol
+ 'ϖ' => 'ϖ', // greek pi symbol
+ ' ' => ' ', // en space
+ ' ' => ' ', // em space
+ ' ' => ' ', // thin space
+ '‌' => '‌', // zero width non-joiner
+ '‍' => '‍', // zero width joiner
+ '‎' => '‎', // left-to-right mark
+ '‏' => '‏', // right-to-left mark
+ '–' => '–', // en dash
+ '—' => '—', // em dash
+ '‘' => '‘', // left single quotation mark
+ '’' => '’', // right single quotation mark
+ '‚' => '‚', // single low-9 quotation mark
+ '“' => '“', // left double quotation mark
+ '”' => '”', // right double quotation mark
+ '„' => '„', // double low-9 quotation mark
+ '†' => '†', // dagger
+ '‡' => '‡', // double dagger
+ '•' => '•', // bullet
+ '…' => '…', // horizontal ellipsis
+ '‰' => '‰', // per mille sign
+ '′' => '′', // primeminutes
+ '″' => '″', // double prime
+ '‹' => '‹', // single left-pointing angle
quotation mark
+ '›' => '›', // single right-pointing angle
quotation mark
+ '‾' => '‾', // overline
+ '⁄' => '⁄', // fraction slash
+ '€' => '€', // euro sign
+ 'ℑ' => 'ℑ', // blackletter capital I
+ '℘' => '℘', // script capital P
+ 'ℜ' => 'ℜ', // blackletter capital R
+ '™' => '™', // trade mark sign
+ 'ℵ' => 'ℵ', // alef symbol
+ '←' => '←', // leftwards arrow
+ '↑' => '↑', // upwards arrow
+ '→' => '→', // rightwards arrow
+ '↓' => '↓', // downwards arrow
+ '↔' => '↔', // left right arrow
+ '↵' => '↵', // downwards arrow with corner
leftwards
+ '⇐' => '⇐', // leftwards double arrow
+ '⇑' => '⇑', // upwards double arrow
+ '⇒' => '⇒', // rightwards double arrow
+ '⇓' => '⇓', // downwards double arrow
+ '⇔' => '⇔', // left right double arrow
+ '∀' => '∀', // for all
+ '∂' => '∂', // partial differential
+ '∃' => '∃', // there exists
+ '∅' => '∅', // empty set
+ '∇' => '∇', // nabla
+ '∈' => '∈', // element of
+ '∉' => '∉', // not an element of
+ '∋' => '∋', // contains as member
+ '∏' => '∏', // n-ary product
+ '∑' => '∑', // n-ary sumation
+ '−' => '−', // minus sign
+ '∗' => '∗', // asterisk operator
+ '√' => '√', // square root
+ '∝' => '∝', // proportional to
+ '∞' => '∞', // infinity
+ '∠' => '∠', // angle
+ '∧' => '∧', // logical and
+ '∨' => '∨', // logical or
+ '∩' => '∩', // intersection
+ '∪' => '∪', // union
+ '∫' => '∫', // integral
+ '∴' => '∴', // therefore
+ '∼' => '∼', // tilde operator
+ '≅' => '≅', // approximately equal to
+ '≈' => '≈', // almost equal to
+ '≠' => '≠', // not equal to
+ '≡' => '≡', // identical to
+ '≤' => '≤', // less-than or equal to
+ '≥' => '≥', // greater-than or
equal to
+ '⊂' => '⊂', // subset of
+ '⊃' => '⊃', // superset of
+ '⊄' => '⊄', // not a subset of
+ '⊆' => '⊆', // subset of or equal to
+ '⊇' => '⊇', // superset of or equal to
+ '⊕' => '⊕', // circled plus
+ '⊗' => '⊗', // circled times
+ '⊥' => '⊥', // up tack
+ '⋅' => '⋅', // dot operator
+ '⌈' => '⌈', // left ceiling
+ '⌉' => '⌉', // right ceiling
+ '⌊' => '⌊', // left floor
+ '⌋' => '⌋', // right floor
+ '〈' => '⟨', // left-pointing angle bracket
+ '〉' => '⟩', // right-pointing angle bracket
+ '◊' => '◊', // lozenge
+ '♠' => '♠', // black spade suit
+ '♣' => '♣', // black club suit
+ '♥' => '♥', // black heart suit
+ '♦' => '♦' // black diam suit
+ );
+
+ // split entities for use in str_replace()
+ foreach($codes as $unicode_entity => $html_entity) {
+ $unicode_entities[] = $unicode_entity;
+ $html_entities[] = $html_entity;
+ }
+ }
+
+ // transcode HTML entities to Unicode
+ if($to_unicode)
+ return str_replace($html_entities, $unicode_entities, $input);
+
+ // transcode Unicode entities to HTML entities
+ else
+ return str_replace($unicode_entities, $html_entities, $input);
+}
+
+
+
+
+/**
+ * transcode multi-byte characters to HTML representations for Unicode
+ *
+ * This function is aiming to preserve Unicode characters through storage in a
ISO-8859-1 compliant system.
+ *
+ * Every multi-byte UTF-8 character is transformed to its equivalent HTML
numerical entity (eg, &#4568;)
+ * that may be handled safely by PHP and by MySQL.
+ *
+ * Of course, this solution does not allow for full-text search in the
database and therefore, is not a
+ * definitive solution to internationalization issues.
+ * It does enable, however, practical use of Unicode to build pages in foreign
languages.
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get
UTF-8 instead.
+ *
+ * @link
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple
Character Entity Chart
+ *
+ * @param string the original UTF-8 string
+ * @return a string acceptable in an ISO-8859-1 storage system (ie., PHP4 +
MySQl 3)
+ */
+function to_unicode($input) {
+
+ // transcode HTML entities to Unicode entities
+ $input = transcode($input);
+
+ // scan the whole string
+ $output = '';
+ $index = 0;
+ while($index < strlen($input)) {
+
+ // look at one char
+ $char = ord($input[$index]);
+
+ // one byte (0xxxxxxx)
+ if($char < 0x80) {
+
+ // some chars may be undefined
+ $output .= chr($char);
+ $index += 1;
+
+ // two bytes (110xxxxx 10xxxxxx)
+ } elseif($char < 0xE0) {
+
+ // strip weird sequences (eg, C0 80 -> NUL)
+ if($value = (($char % 0x20) * 0x40) + (ord($input[$index + 1]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 2;
+
+ // three bytes (1110xxxx 10xxxxxx 10xxxxxx) example: euro sign =
\xE2\x82\xAC -> €
+ } elseif($char < 0xF0) {
+
+ // strip weird sequences
+ if($value = (($char % 0x10) * 0x1000) + ((ord($input[$index + 1]) %
0x40) * 0x40) + (ord($input[$index + 2]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 3;
+
+ // four bytes (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } elseif($char < 0xF8) {
+
+ // strip weird sequences
+ if($value = (($char % 0x08) * 0x40000) + ((ord($input[$index + 1]) %
0x40) * 0x1000) + ((ord($input[$index + 2]) % 0x40) * 0x40)
+ + (ord($input[$index + 3]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 4;
+
+ // five bytes (111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } elseif($char < 0xFC) {
+
+ // strip weird sequences
+ if($value = (($char % 0x04) * 0x1000000) + ((ord($input[$index + 1]) %
0x40) * 0x40000) + ((ord($input[$index + 2]) % 0x40) * 0x1000)
+ + ((ord($input[$index + 3]) % 0x40) * 0x40) + (ord($input[$index + 4])
% 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 5;
+
+ // six bytes (1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } else {
+
+ // strip weird sequences
+ if($value = (($char % 0x02) * 0x40000000) + ((ord($input[$index + 1]) %
0x40) * 0x1000000) + ((ord($input[$index + 2]) % 0x40) * 0x40000)
+ + ((ord($input[$index + 3]) % 0x40) * 0x1000) + ((ord($input[$index +
4]) % 0x40) * 0x40) + (ord($input[$index + 4]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 6;
+ }
+
+ }
+
+ // return the translated string
+ return $output;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
// returns either the translated string
// or the original string. Assumes we
// are passed the original string as occurs
@@ -264,8 +726,7 @@
if ($a == "")
return $a;
- // $a = htmlentities($a, ENT_QUOTES, $charset);
- $u = urlencode($a);
+ $u = mysql_real_escape_string(urlencode($a));
if (!$connection) {
// database not available, just print English
@@ -319,7 +780,7 @@
return fix($a); // just return English string
} else { // translation available
$row = mysql_fetch_array($result);
- return fix(urldecode($row["translation"]));
+ return $row["translation"];
}
}
@@ -361,7 +822,16 @@
}
function TITLE($a,$b="") {
- echo "<title>" . W_($a,$b) . "</title>\n";
+ global $lang;
+ global $languagecodes;
+ echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"
>";
+ echo "<title>" . TRANSLATE_($a,$b) . "</title>\n";
+ if (isset($languagecodes[$lang])) {
+ echo "<meta name=\"content-language\" content=\"" .
+ $languagecodes[$lang] . "\">";
+ echo "<meta name=\"language\" content=\"" .
+ $languagecodes[$lang] . "\">";
+ }
}
Modified: GNUnet-docs/WWW/i18nhtml_config.inc
===================================================================
--- GNUnet-docs/WWW/i18nhtml_config.inc 2005-04-04 06:47:24 UTC (rev 587)
+++ GNUnet-docs/WWW/i18nhtml_config.inc 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -24,12 +24,18 @@
$i18nHTMLrecordMode = 2; // 1: only missing, 2: everything, 0: disable
$i18nHTMLsqlServer = "localhost";
-$i18nHTMLsqlUser = "GNUnetWWW";
-$i18nHTMLsqlPass = "garlic";
+$i18nHTMLsqlUser = "i18nHTML";
+$i18nHTMLsqlPass = "pass";
-$i18nHTMLsqlDB = "translations"; // default is "translation"
+$i18nHTMLsqlDB = "translation"; // default is "translation"
$i18nHTMLbase = ""; // base directory prepended to i18nHTML php pages used in
links
$i18nHTMLmarker = "*"; // default value if never changed
+// Note that if you enable debug, the scripts may print
+// warnings even if everything is ok!
+$i18nHTMLdebug = 0; // 0 = no, 1 = yes
+// for selectively cloning a DB
+// $i18nHTMLclone = "/tmp/cloneFile.sql";
+
?>
Added: GNUnet-docs/WWW/papers/CameraReady_174.pdf
===================================================================
(Binary files differ)
Property changes on: GNUnet-docs/WWW/papers/CameraReady_174.pdf
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Modified: doodle-docs/WWW/commitMassTranslation.php
===================================================================
--- doodle-docs/WWW/commitMassTranslation.php 2005-04-04 06:47:24 UTC (rev
587)
+++ doodle-docs/WWW/commitMassTranslation.php 2005-04-04 06:51:09 UTC (rev
588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -19,22 +19,20 @@
*/
include("i18nhtml.inc");
if (!$connection) {
- echo "<html><head><title>WWW translation: commit</title></head><body>";
echo "Database is down. Cannot edit translations.";
- echo "</body></html>";
die();
}
if ($xlang == "English") {
- echo "<html><head><title>WWW translation: commit</title></head><body>";
W("Translating to English currently not allowed.\n");
- echo "</body></html>";
die();
}
-echo "<html><head><title>WWW translation: commit</title></head><body>";
+echo "<html><head>";
+TITLE("WWW translation: commit");
+echo "</head><body>";
W("Processing translations...");
P();
$done = 0;
-foreach($_GET as $dec=>$val) {
+foreach($_POST as $dec=>$val) {
if ($val == "")
continue;
if ( ($dec == "xlang") || ($dec == "start") )
@@ -48,7 +46,7 @@
$num--;
$row = mysql_fetch_array($result);
if ($dec == bin2hex(md5(urldecode($row["c"])))) {
- $enc = $row["c"];
+ $enc = mysql_real_escape_string($row["c"]);
break;
}
}
@@ -60,22 +58,30 @@
}
$query = "DELETE FROM pending WHERE lang=\"$lang\" AND c=\"$enc\"";
mysql_query($query, $connection);
- //$t = urlencode($val);
- $t = urlencode($val);
- // $t = urlencode(htmlentities($val, ENT_QUOTES, $charset));
+ $t = mysql_real_escape_string(to_unicode($val));
$query = "SELECT ranking FROM map WHERE name=\"$enc\" AND lang=\"$lang\" AND
translation=\"$t\"";
$result = mysql_query($query, $connection);
$num = 0;
if ($result)
$num = mysql_numrows($result);
if ($num == 0) {
- $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
- mysql_query($query, $connection);
- $done++;
- W("Storing translation for "%s" = "%s".",
- ARRAY(urldecode($enc),
- urldecode($t)));
- BR();
+ $txtCnt = count_chars(urldecode($enc), 1);
+ $tCnt = count_chars($t, 1);
+ if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+ W("Commit '%s->%s' failed.", $enc, $t);
+ W("The number of percent signs in source text and translation do not
match.");
+ W("Note that you must preserve all %%s expressions unchanged.");
+ W("Also, a single displayed %% sign must be translated into two (%%%%)
such signs.");
+ P();
+ } else {
+ $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
+ mysql_query($query, $connection);
+ $done++;
+ W("Storing translation for "%s" = "%s".",
+ ARRAY(urldecode($enc),
+ urldecode($t)));
+ BR();
+ }
}
}
P();
Modified: doodle-docs/WWW/commitTranslation.php
===================================================================
--- doodle-docs/WWW/commitTranslation.php 2005-04-04 06:47:24 UTC (rev
587)
+++ doodle-docs/WWW/commitTranslation.php 2005-04-04 06:51:09 UTC (rev
588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -18,7 +18,7 @@
Boston, MA 02111-1307, USA.
*/
include("i18nhtml.inc");
-$text = $_REQUEST['text'];
+$text = mysql_real_escape_string($_REQUEST['text']);
$translation = $_REQUEST['translation'];
$back = $_REQUEST['back'];
if (!$connection) {
@@ -30,12 +30,7 @@
die();
}
-// note: $text is already urlencoded (by submitting via form) and html
compatible
-// ensure translation is stored in encoded form and html compatible
-// if (get_magic_quotes_gpc()) $translation = stripslashes($translation);
-
-// $t = urlencode(htmlentities($translation, ENT_QUOTES, $charset));
-$t = urlencode($translation);
+$t = mysql_real_escape_string(to_unicode($translation));
// check for identical translation
$query = "SELECT ranking FROM map WHERE name=\"$text\" AND lang=\"$lang\" AND
translation=\"$t\"";
$result = mysql_query($query, $connection);
@@ -44,22 +39,38 @@
$num = mysql_numrows($result);
}
if ($num > 0) {
- echo "<html><body>";
+ echo "<html><head>";
+ TITLE("Translation exists.");
+ echo "</head><body>";
W("Translation exists.");
extlink($back, "Back...");
generateFooter();
echo "</body></html>";
} else {
- // if (!get_magic_quotes_gpc()) $t = addslashes($t); // ensure escaped
before adding to DB
- $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
- $result = mysql_query($query, $connection);
- if ($result) {
- header("Location: " . $back); /* Redirect browser */
+ $txtCnt = count_chars(urldecode($text), 1);
+ $tCnt = count_chars($t, 1);
+ if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+ echo "<html><head>";
+ TITLE("Commit failed.");
+ echo "</head><body>";
+ W("Commit failed.");
+ W("The number of percent signs in source text and translation do not
match.");
+ W("Note that you must preserve all %%s expressions unchanged.");
+ W("Also, a single displayed %% sign must be translated into two (%%%%)
such signs.");
+ echo "</body></html>";
} else {
- echo "<html><body>";
- W("Commit ('%s') failed: ", $query);
- echo mysql_error();
- echo "</body></html>";
+ $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
+ $result = mysql_query($query, $connection);
+ if ($result) {
+ header("Location: " . $back); /* Redirect browser */
+ } else {
+ echo "<html><head>";
+ TITLE("Commit failed.");
+ echo "</head><body>";
+ W("Commit ('%s') failed: ", $query);
+ echo mysql_error();
+ echo "</body></html>";
+ }
}
}
?>
\ No newline at end of file
Modified: doodle-docs/WWW/editor.php
===================================================================
--- doodle-docs/WWW/editor.php 2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/editor.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -65,7 +65,7 @@
ARRAY($start, $end));
P();
- echo "<form action=\"" . $i18nHTMLbase . "commitMassTranslation.php\">";
+ echo "<form method=\"POST\" action=\"" . $i18nHTMLbase .
"commitMassTranslation.php\">";
echo "<input type=hidden name=\"xlang\" value=\"$xlang\">";
$endp = $end + 1;
echo "<input type=hidden name=\"start\" value=\"$endp\">";
Modified: doodle-docs/WWW/i18nhtml.inc
===================================================================
--- doodle-docs/WWW/i18nhtml.inc 2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/i18nhtml.inc 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff and other contributing authors.
+ (C) 2003, 2004, 2005 Christian Grothoff and other contributing authors.
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -27,6 +27,7 @@
// $editor can be set to create a translation
// tag even if a translation is already available.
// there is currently no security.
+ //
// An "_" is used for functions that return the
// translated string instead of printing it directly.
// These functions are used for "%s" printing with W().
@@ -39,6 +40,8 @@
// obtain user db specific configuration parameters
include("i18nhtml_config.inc");
+header("Content-type: text/html; charset=utf-8");
+
// establish default connection to database server
$connection = @mysql_connect($i18nHTMLsqlServer,
$i18nHTMLsqlUser,
@@ -119,6 +122,7 @@
if ($xlang)
$lang = $xlang;
$lang = ucfirst(strtolower($lang));
+$lang = mysql_real_escape_string($lang);
$editor = $_REQUEST['editor'];
@@ -241,6 +245,464 @@
}
}
+
+/**
+ * restore UTF-8 from HTML Unicode entities
+ *
+ * This function is triggered by the YACS handler during page
+ * rendering. It is aiming to transcode HTML Unicode entities
+ * (eg, &#8364;) back to actual UTF-8 encoding (eg, �).
+ *
+ * @param string a string with a mix of UTF-8 and of HTML Unicode entities
+ * @return an UTF-8 string
+ */
+function from_unicode($text) {
+ // translate extended ISO8859-1 chars, if any
+ $text = utf8_encode($text);
+
+ // translate Unicode entities
+ $areas = preg_split('/&#(\d+?);/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+ $text = '';
+ $index = 0;
+ foreach($areas as $area) {
+ switch($index%2) {
+ case 0: // before entity
+ $text .= $area;
+ break;
+ case 1: // the entity itself
+
+ // get the integer value
+ $unicode = intval($area);
+
+ // one byte
+ if($unicode < 0x80) {
+
+ $text .= chr($unicode);
+
+ // two bytes
+ } elseif($unicode < 0x800) {
+
+ $text .= chr( 0xC0 + ( ( $unicode - ( $unicode % 0x40 ) ) / 0x40 ) );
+ $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+
+ // three bytes
+ } elseif($unicode < 0x10000) {
+
+ $text .= chr( 0xE0 + ( ( $unicode - ( $unicode % 0x1000 ) ) / 0x1000 )
);
+ $text .= chr( 0x80 + ( ( ( $unicode % 0x1000 ) - ( $unicode % 0x40 ) )
/ 0x40 ) );
+ $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+
+ // more bytes, keep it as it is...
+ } else
+ $text .= '&#'.$unicode.';';
+
+ break;
+ }
+ $index++;
+ }
+
+ // the updated string
+ return $text;
+}
+
+
+/**
+ * transcode unicode entities to/from HTML entities
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get
UTF-8 instead.
+ *
+ * @link
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple
Character Entity Chart
+ *
+ * @param string the string to be transcoded
+ * @param boolean TRUE to transcode to Unicode, FALSE to transcode to HTML
+ * @return a transcoded string
+ */
+function transcode($input, $to_unicode=TRUE) {
+
+ // initialize tables only once
+ static $html_entities, $unicode_entities;
+ if(!is_array($html_entities)) {
+
+
+ // numerical order
+ $codes = array(
+ ' ' => ' ', // non-breaking space
+ '¡' => '¡', // inverted exclamation mark
+ '¢' => '¢', // cent sign
+ '£' => '£', // pound sign
+ '¤' => '¤', // currency sign
+ '¥' => '¥', // yen sign
+ '¦' => '¦', // broken bar
+ '§' => '§', // section sign
+ '¨' => '¨', // diaeresis
+ '©' => '©', // copyright sign
+ 'ª' => 'ª', // feminine ordinal indicator
+ '«' => '«', // left-pointing double angle
quotation mark
+ '¬' => '¬', // not sign
+ '­' => '­', // soft hyphen
+ '®' => '®', // registered sign
+ '¯' => '¯', // macron
+ '°' => '°', // degree sign
+ '±' => '±', // plus-minus sign
+ '²' => '²', // superscript two
+ '³' => '³', // superscript three
+ '´' => '´', // acute accent
+ 'µ' => 'µ', // micro sign
+ '¶' => '¶', // pilcrow sign
+ '·' => '·', // middle dot
+ '¸' => '¸', // cedilla
+ '¹' => '¹', // superscript one
+ 'º' => 'º', // masculine ordinal indicator
+ '»' => '»', // right-pointing double angle
quotation mark
+ '¼' => '¼', // vulgar fraction one quarter
+ '½' => '½', // vulgar fraction one half
+ '¾' => '¾', // vulgar fraction three
quarters
+ '¿' => '¿', // inverted question mark
+ 'À' => 'À', // latin capital letter A with
grave
+ 'Á' => 'Á', // latin capital letter A with
acute
+ 'Â' => 'Â', // latin capital letter A with
circumflex
+ 'Ã' => 'Ã', // latin capital letter A with
tilde
+ 'Ä' => 'Ä', // latin capital letter A with
diaeresis
+ 'Å' => 'Å', // latin capital letter A with
ring above
+ 'Æ' => 'Æ', // latin capital letter AE
+ 'Ç' => 'Ç', // latin capital letter C with
cedilla
+ 'È' => 'È', // latin capital letter E with
grave
+ 'É' => 'É', // latin capital letter E with
acute
+ 'Ê' => 'Ê', // latin capital letter E with
circumflex
+ 'Ë' => 'Ë', // latin capital letter E with
diaeresis
+ 'Ì' => 'Ì', // latin capital letter I with
grave
+ 'Í' => 'Í', // latin capital letter I with
acute
+ 'Î' => 'Î', // latin capital letter I with
circumflex
+ 'Ï' => 'Ï', // latin capital letter I with
diaeresis
+ 'Ð' => 'Ð', // latin capital letter
ETH
+ 'Ñ' => 'Ñ', // latin capital letter N with
tilde
+ 'Ò' => 'Ò', // latin capital letter O with
grave
+ 'Ó' => 'Ó', // latin capital letter O with
acute
+ 'Ô' => 'Ô', // latin capital letter O with
circumflex
+ 'Õ' => 'Õ', // latin capital letter O with
tilde
+ 'Ö' => 'Ö', // latin capital letter O with
diaeresis
+ '×' => '×', // multiplication sign
+ 'Ø' => 'Ø', // latin capital letter O with
stroke
+ 'Ù' => 'Ù', // latin capital letter U with
grave
+ 'Ú' => 'Ú', // latin capital letter U with
acute
+ 'Û' => 'Û', // latin capital letter U with
circumflex
+ 'Ü' => 'Ü', // latin capital letter U with
diaeresis
+ 'Ý' => 'Ý', // latin capital letter Y with
acute
+ 'Þ' => 'Þ', // latin capital letter THORN
+ 'ß' => 'ß', // latin small letter sharp s
+ 'à' => 'à', // latin small letter a with
grave
+ 'á' => 'á', // latin small letter a with
acute
+ 'â' => 'â', // latin small letter a with
circumflex
+ 'ã' => 'ã', // latin small letter a with
tilde
+ 'ä' => 'ä', // latin small letter a with
diaeresis
+ 'å' => 'å', // latin small letter a with
ring above
+ 'æ' => 'æ', // latin small letter ae
+ 'ç' => 'ç', // latin small letter c with
cedilla
+ 'è' => 'è', // latin small letter e with
grave
+ 'é' => 'é', // latin small letter e with
acute
+ 'ê' => 'ê', // latin small letter e with
circumflex
+ 'ë' => 'ë', // latin small letter e with
diaeresis
+ 'ì' => 'ì', // latin small letter i with
grave
+ 'í' => 'í', // latin small letter i with
acute
+ 'î' => 'î', // latin small letter i with
circumflex
+ 'ï' => 'ï', // latin small letter i with
diaeresis
+ 'ð' => 'ð', // latin small letter
eth
+ 'ñ' => 'ñ', // latin small letter n with
tilde
+ 'ò' => 'ò', // latin small letter o with
grave
+ 'ó' => 'ó', // latin small letter o with
acute
+ 'ô' => 'ô', // latin small letter o with
circumflex
+ 'õ' => 'õ', // latin small letter o with
tilde
+ 'ö' => 'ö', // latin small letter o with
diaeresis
+ '÷' => '÷', // division sign
+ 'ø' => 'ø', // latin small letter o with
stroke
+ 'ù' => 'ù', // latin small letter u with
grave
+ 'ú' => 'ú', // latin small letter u with
acute
+ 'û' => 'û', // latin small letter u with
circumflex
+ 'ü' => 'ü', // latin small letter u with
diaeresis
+ 'ý' => 'ý', // latin small letter y with
acute
+ 'þ' => 'þ', // latin small letter thorn
+ 'ÿ' => 'ÿ', //
+ 'Œ' => 'Œ', // latin capital ligature OE
+ 'œ' => 'œ', // latin small ligature oe
+ 'Š' => 'Š', // latin capital letter S with
caron
+ 'š' => 'š', // latin small letter s with
caron
+ 'Ÿ' => 'Ÿ', // latin capital letter Y with
diaeresis
+ 'ƒ' => 'ƒ' , // latin small f with hook
+ 'ˆ' => 'ˆ', // modifier letter circumflex
accent
+ '˜' => '˜', // small tilde
+ 'Α' => 'Α', // greek capital letter alpha
+ 'Β' => 'Β', // greek capital letter beta
+ 'Γ' => 'Γ', // greek capital letter gamma
+ 'Δ' => 'Δ', // greek capital letter delta
+ 'Ε' => 'Ε', // greek capital letter epsilon
+ 'Ζ' => 'Ζ', // greek capital letter zeta
+ 'Η' => 'Η', // greek capital letter
eta
+ 'Θ' => 'Θ', // greek capital letter theta
+ 'Ι' => 'Ι', // greek capital letter iota
+ 'Κ' => 'Κ', // greek capital letter kappa
+ 'Λ' => 'Λ', // greek capital letter lambda
+ 'Μ' => 'Μ', // greek capital letter
mu
+ 'Ν' => 'Ν', // greek capital letter
nu
+ 'Ξ' => 'Ξ', // greek capital letter
xi
+ 'Ο' => 'Ο', // greek capital letter omicron
+ 'Π' => 'Π', // greek capital letter
pi
+ 'Ρ' => 'Ρ', // greek capital letter
rho
+ 'Σ' => 'Σ', // greek capital letter sigma
+ 'Τ' => 'Τ', // greek capital letter
tau
+ 'Υ' => 'Υ', // greek capital letter upsilon
+ 'Φ' => 'Φ', // greek capital letter
phi
+ 'Χ' => 'Χ', // greek capital letter
chi
+ 'Ψ' => 'Ψ', // greek capital letter
psi
+ 'Ω' => 'Ω', // greek capital letter omega
+ 'α' => 'α', // greek small letter alpha
+ 'β' => 'β', // greek small letter beta
+ 'γ' => 'γ', // greek small letter gamma
+ 'δ' => 'δ', // greek small letter delta
+ 'ε' => 'ε', // greek small letter epsilon
+ 'ζ' => 'ζ', // greek small letter zeta
+ 'η' => 'η', // greek small letter
eta
+ 'θ' => 'θ', // greek small letter theta
+ 'ι' => 'ι', // greek small letter iota
+ 'κ' => 'κ', // greek small letter kappa
+ 'λ' => 'λ', // greek small letter lambda
+ 'μ' => 'μ', // greek small letter mu
+ 'ν' => 'ν', // greek small letter nu
+ 'ξ' => 'ξ', // greek small letter xi
+ 'ο' => 'ο', // greek small letter omicron
+ 'π' => 'π', // greek small letter pi
+ 'ρ' => 'ρ', // greek small letter
rho
+ 'ς' => 'ς', // greek small letter final
sigma
+ 'σ' => 'σ', // greek small letter sigma
+ 'τ' => 'τ', // greek small letter
tau
+ 'υ' => 'υ', // greek small letter upsilon
+ 'φ' => 'φ', // greek small letter
phi
+ 'χ' => 'χ', // greek small letter
chi
+ 'ψ' => 'ψ', // greek small letter
psi
+ 'ω' => 'ω', // greek small letter omega
+ 'ϑ' => 'ϑ', // greek small letter
theta symbol
+ 'ϒ' => 'ϒ', // greek upsilon with hook
symbol
+ 'ϖ' => 'ϖ', // greek pi symbol
+ ' ' => ' ', // en space
+ ' ' => ' ', // em space
+ ' ' => ' ', // thin space
+ '‌' => '‌', // zero width non-joiner
+ '‍' => '‍', // zero width joiner
+ '‎' => '‎', // left-to-right mark
+ '‏' => '‏', // right-to-left mark
+ '–' => '–', // en dash
+ '—' => '—', // em dash
+ '‘' => '‘', // left single quotation mark
+ '’' => '’', // right single quotation mark
+ '‚' => '‚', // single low-9 quotation mark
+ '“' => '“', // left double quotation mark
+ '”' => '”', // right double quotation mark
+ '„' => '„', // double low-9 quotation mark
+ '†' => '†', // dagger
+ '‡' => '‡', // double dagger
+ '•' => '•', // bullet
+ '…' => '…', // horizontal ellipsis
+ '‰' => '‰', // per mille sign
+ '′' => '′', // primeminutes
+ '″' => '″', // double prime
+ '‹' => '‹', // single left-pointing angle
quotation mark
+ '›' => '›', // single right-pointing angle
quotation mark
+ '‾' => '‾', // overline
+ '⁄' => '⁄', // fraction slash
+ '€' => '€', // euro sign
+ 'ℑ' => 'ℑ', // blackletter capital I
+ '℘' => '℘', // script capital P
+ 'ℜ' => 'ℜ', // blackletter capital R
+ '™' => '™', // trade mark sign
+ 'ℵ' => 'ℵ', // alef symbol
+ '←' => '←', // leftwards arrow
+ '↑' => '↑', // upwards arrow
+ '→' => '→', // rightwards arrow
+ '↓' => '↓', // downwards arrow
+ '↔' => '↔', // left right arrow
+ '↵' => '↵', // downwards arrow with corner
leftwards
+ '⇐' => '⇐', // leftwards double arrow
+ '⇑' => '⇑', // upwards double arrow
+ '⇒' => '⇒', // rightwards double arrow
+ '⇓' => '⇓', // downwards double arrow
+ '⇔' => '⇔', // left right double arrow
+ '∀' => '∀', // for all
+ '∂' => '∂', // partial differential
+ '∃' => '∃', // there exists
+ '∅' => '∅', // empty set
+ '∇' => '∇', // nabla
+ '∈' => '∈', // element of
+ '∉' => '∉', // not an element of
+ '∋' => '∋', // contains as member
+ '∏' => '∏', // n-ary product
+ '∑' => '∑', // n-ary sumation
+ '−' => '−', // minus sign
+ '∗' => '∗', // asterisk operator
+ '√' => '√', // square root
+ '∝' => '∝', // proportional to
+ '∞' => '∞', // infinity
+ '∠' => '∠', // angle
+ '∧' => '∧', // logical and
+ '∨' => '∨', // logical or
+ '∩' => '∩', // intersection
+ '∪' => '∪', // union
+ '∫' => '∫', // integral
+ '∴' => '∴', // therefore
+ '∼' => '∼', // tilde operator
+ '≅' => '≅', // approximately equal to
+ '≈' => '≈', // almost equal to
+ '≠' => '≠', // not equal to
+ '≡' => '≡', // identical to
+ '≤' => '≤', // less-than or equal to
+ '≥' => '≥', // greater-than or
equal to
+ '⊂' => '⊂', // subset of
+ '⊃' => '⊃', // superset of
+ '⊄' => '⊄', // not a subset of
+ '⊆' => '⊆', // subset of or equal to
+ '⊇' => '⊇', // superset of or equal to
+ '⊕' => '⊕', // circled plus
+ '⊗' => '⊗', // circled times
+ '⊥' => '⊥', // up tack
+ '⋅' => '⋅', // dot operator
+ '⌈' => '⌈', // left ceiling
+ '⌉' => '⌉', // right ceiling
+ '⌊' => '⌊', // left floor
+ '⌋' => '⌋', // right floor
+ '〈' => '⟨', // left-pointing angle bracket
+ '〉' => '⟩', // right-pointing angle bracket
+ '◊' => '◊', // lozenge
+ '♠' => '♠', // black spade suit
+ '♣' => '♣', // black club suit
+ '♥' => '♥', // black heart suit
+ '♦' => '♦' // black diam suit
+ );
+
+ // split entities for use in str_replace()
+ foreach($codes as $unicode_entity => $html_entity) {
+ $unicode_entities[] = $unicode_entity;
+ $html_entities[] = $html_entity;
+ }
+ }
+
+ // transcode HTML entities to Unicode
+ if($to_unicode)
+ return str_replace($html_entities, $unicode_entities, $input);
+
+ // transcode Unicode entities to HTML entities
+ else
+ return str_replace($unicode_entities, $html_entities, $input);
+}
+
+
+
+
+/**
+ * transcode multi-byte characters to HTML representations for Unicode
+ *
+ * This function is aiming to preserve Unicode characters through storage in a
ISO-8859-1 compliant system.
+ *
+ * Every multi-byte UTF-8 character is transformed to its equivalent HTML
numerical entity (eg, &#4568;)
+ * that may be handled safely by PHP and by MySQL.
+ *
+ * Of course, this solution does not allow for full-text search in the
database and therefore, is not a
+ * definitive solution to internationalization issues.
+ * It does enable, however, practical use of Unicode to build pages in foreign
languages.
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get
UTF-8 instead.
+ *
+ * @link
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple
Character Entity Chart
+ *
+ * @param string the original UTF-8 string
+ * @return a string acceptable in an ISO-8859-1 storage system (ie., PHP4 +
MySQl 3)
+ */
+function to_unicode($input) {
+
+ // transcode HTML entities to Unicode entities
+ $input = transcode($input);
+
+ // scan the whole string
+ $output = '';
+ $index = 0;
+ while($index < strlen($input)) {
+
+ // look at one char
+ $char = ord($input[$index]);
+
+ // one byte (0xxxxxxx)
+ if($char < 0x80) {
+
+ // some chars may be undefined
+ $output .= chr($char);
+ $index += 1;
+
+ // two bytes (110xxxxx 10xxxxxx)
+ } elseif($char < 0xE0) {
+
+ // strip weird sequences (eg, C0 80 -> NUL)
+ if($value = (($char % 0x20) * 0x40) + (ord($input[$index + 1]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 2;
+
+ // three bytes (1110xxxx 10xxxxxx 10xxxxxx) example: euro sign =
\xE2\x82\xAC -> €
+ } elseif($char < 0xF0) {
+
+ // strip weird sequences
+ if($value = (($char % 0x10) * 0x1000) + ((ord($input[$index + 1]) %
0x40) * 0x40) + (ord($input[$index + 2]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 3;
+
+ // four bytes (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } elseif($char < 0xF8) {
+
+ // strip weird sequences
+ if($value = (($char % 0x08) * 0x40000) + ((ord($input[$index + 1]) %
0x40) * 0x1000) + ((ord($input[$index + 2]) % 0x40) * 0x40)
+ + (ord($input[$index + 3]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 4;
+
+ // five bytes (111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } elseif($char < 0xFC) {
+
+ // strip weird sequences
+ if($value = (($char % 0x04) * 0x1000000) + ((ord($input[$index + 1]) %
0x40) * 0x40000) + ((ord($input[$index + 2]) % 0x40) * 0x1000)
+ + ((ord($input[$index + 3]) % 0x40) * 0x40) + (ord($input[$index + 4])
% 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 5;
+
+ // six bytes (1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } else {
+
+ // strip weird sequences
+ if($value = (($char % 0x02) * 0x40000000) + ((ord($input[$index + 1]) %
0x40) * 0x1000000) + ((ord($input[$index + 2]) % 0x40) * 0x40000)
+ + ((ord($input[$index + 3]) % 0x40) * 0x1000) + ((ord($input[$index +
4]) % 0x40) * 0x40) + (ord($input[$index + 4]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 6;
+ }
+
+ }
+
+ // return the translated string
+ return $output;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
// returns either the translated string
// or the original string. Assumes we
// are passed the original string as occurs
@@ -264,8 +726,7 @@
if ($a == "")
return $a;
- // $a = htmlentities($a, ENT_QUOTES, $charset);
- $u = urlencode($a);
+ $u = mysql_real_escape_string(urlencode($a));
if (!$connection) {
// database not available, just print English
@@ -319,7 +780,7 @@
return fix($a); // just return English string
} else { // translation available
$row = mysql_fetch_array($result);
- return fix(urldecode($row["translation"]));
+ return $row["translation"];
}
}
@@ -361,7 +822,16 @@
}
function TITLE($a,$b="") {
- echo "<title>" . W_($a,$b) . "</title>\n";
+ global $lang;
+ global $languagecodes;
+ echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"
>";
+ echo "<title>" . TRANSLATE_($a,$b) . "</title>\n";
+ if (isset($languagecodes[$lang])) {
+ echo "<meta name=\"content-language\" content=\"" .
+ $languagecodes[$lang] . "\">";
+ echo "<meta name=\"language\" content=\"" .
+ $languagecodes[$lang] . "\">";
+ }
}
Modified: doodle-docs/WWW/i18nhtml_config.inc
===================================================================
--- doodle-docs/WWW/i18nhtml_config.inc 2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/i18nhtml_config.inc 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -24,12 +24,18 @@
$i18nHTMLrecordMode = 2; // 1: only missing, 2: everything, 0: disable
$i18nHTMLsqlServer = "localhost";
-$i18nHTMLsqlUser = "GNUnetWWW";
-$i18nHTMLsqlPass = "garlic";
+$i18nHTMLsqlUser = "i18nHTML";
+$i18nHTMLsqlPass = "pass";
-$i18nHTMLsqlDB = "translations"; // default is "translation"
+$i18nHTMLsqlDB = "translation"; // default is "translation"
$i18nHTMLbase = ""; // base directory prepended to i18nHTML php pages used in
links
$i18nHTMLmarker = "*"; // default value if never changed
+// Note that if you enable debug, the scripts may print
+// warnings even if everything is ok!
+$i18nHTMLdebug = 0; // 0 = no, 1 = yes
+// for selectively cloning a DB
+// $i18nHTMLclone = "/tmp/cloneFile.sql";
+
?>
Modified: doodle-docs/WWW/index.php
===================================================================
--- doodle-docs/WWW/index.php 2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/index.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -2,198 +2,67 @@
include("i18nhtml.inc");
DOCTYPE("HTML", "Transitional");
echo "<html><head>\n";
-if ($title) {
- echo "<title>";
- TRANSLATE("Doodle: find information on your computer");
- echo "</title>";
- }
-if ($description) {
- echo "<meta name=\"description\" content=\"";
- TRANSLATE("A tool to index and search the files on your disk.");
- echo "\">";
- }
-echo "<meta name=\"content-language\" content=\"" .
- $languagecodes[$lang] . "\">";
-echo "<meta name=\"language\" content=\"" .
- $languagecodes[$lang] . "\">";
+TITLE("i18nHTML - enabling collaborative webpage translation");
+echo "<meta name=\"description\" content=\"";
+TRANSLATE("i18nHTML is a collection of PHP scripts that allow visitors of a
webpage to help translating it.");
+echo "\">";
?>
<meta name="author" content="Christian Grothoff">
-<meta name="keywords"
content="doodle,desktop,search,suffix,tree,meta,data,libextractor,keywords,fam,index,database,free,Linux,GNU,GPL">
+<meta name="keywords"
content="i18n,HTML,PHP,translation,languages,mysql,database,internationalization,www,free,GNU,GPL">
<meta name="robots" content="index,follow">
<meta name="revisit-after" content="28 days">
<meta name="publisher" content="Christian Grothoff">
-<meta name="date" content="2004-12-31">
-<meta name="rights" content="(C) 2004 by Christian Grothoff>";
+<meta name="date" content="2005-01-03">
+<meta name="rights" content="(C) 2004,2005 by Christian Grothoff>";
<meta http-equiv="expires" content="43200">
<meta http-equiv="content-type" content="text/html">
</head>
<body>
<?php
generateLanguageBar();
-
-H1("Doodle");
-ANCHOR("about");
+H1("i18nHTML");
H2("About");
-
-W("Doodle is a tool to quickly search the documents on a computer.");
-W("Doodle builds an index using meta-data contained in the documents and
allows fast searches on the resulting database.");
-W("Doodle uses %s to support obtaining meta-data from various file-formats.",
- extlink_("/libextractor/","libextractor"));
-W("The database used by doodle is a %s, resulting in fast lookups.",
- extlink_("http://www.nist.gov/dads/HTML/suffixtree.html","suffix tree"));
-W("Doodle supports approximate searches.");
-BR();
-W("Features that Doodle does not have at the moment include:");
-echo "<ul>";
-LI("A web interface");
-LI("Ordering of search results");
-LI("Spidering (indexing the Internet or websites)");
-echo "</ul>";
-W("If you need these features, have a look at the %s section.",
- extlink_("#links", "links"));
-
+W("i18nHTML is a collection of PHP files that can be used to write webpages
that visitors can translate into their respective native languages.");
+W("i18nHTML uses a database to match sentences from the webpage against
translations.");
+W("i18nHTML defines a set of PHP functions that generate either the translated
HTML sentences or the original (typically English) text with decorations that
allow users to provide translations.");
+W("i18nHTML requires the internationalized webpages to be written using the
provided PHP functions but does not constrain the page design in any way.");
+W("Webpages internationalized with i18nHTML can be updated without loosing
existing translations for sentences that were not changed.");
+W("Note that it is important that you use the i18nHTML <tt>TITLE</tt> command
in your documents in order to ensure that the character set and other meta-data
is set properly.");
P();
-W("Doodle is licensed under the %s.",
- extlink_("http://www.gnu.org/licenses/gpl.html","GNU GPL"));
-W("Indexing large volumes can take several hundred MBs of memory (depending on
the amount of meta-data found).");
-W("Searching should nevertheless require almost no memory.");
-W("Using the latest version of libextractor is recommended.");
-W("Doodle has so far only been tested under %s and %s %s %s.",
- ARRAY(extlink_("http://www.debian.org/", "Debian"),
- extlink_("http://www.redhat.com/", "RedHat"),
- extlink_("http://www.gnu.org/", "GNU"),
- extlink_("http://www.linux.org/", "Linux")));
-W("Doodle is expected it to work under any platform supported by %s.",
- extlink_("/libextractor/", "libextractor"));
-P();
-
-ANCHOR("download");
H2("Download");
-W("You can find the current release %s.",
- extlink_("download/doodle-0.6.2.tar.gz", "here"));
-W("Man-pages for %s, %s and %s are also on-line.",
- ARRAY(extlink_("man/doodle.html", "doodle"),
- extlink_("man/doodled.html", "doodled"),
- extlink_("man/libdoodle", "libdoodle")));
-
-W("The latest version can be obtained using");
-PRE("$ svn checkout https://gnunet.org/svn/doodle/");
+W("You can find the latest version %s.",
+ extlink_("https://gnunet.org/i18nHTML/download/", "here"));
+W("The latest CVS version can be obtained using");
+PRE("$ svn checkout https://gnunet.org/svn/i18nHTML/");
P();
W("If you want to be notified about updates, subscribe to %s",
- extlink_("http://freshmeat.net/projects/doodle/", "doodle on freshmeat"));
+ extlink_("http://freshmeat.net/projects/i18nHTML/", "i18nHTML on
freshmeat"));
P();
-W("Debian packages provided by Daniel Baumann can be found %s.",
- extlink_("http://packages.debian.org/doodle", "here"));
-W("RedHat/Fedora RPM packages provided by Dag Wieers can be found %s.",
- extlink_("http://dag.wieers.com/packages/doodle/","here"));
-P();
-ANCHOR("using");
-H2("Using doodle");
-W("First the doodle database needs to be created.");
-W("The simplest way to create the database is to run doodle with the
<tt>-b</tt> option on the directories that are to be indexed.");
-W("For example:");
-PRE("$ doodle -b $HOME");
-W("This will create the doodle database under <tt>~/.doodle</tt>.");
-BR();
-W("After creating the doodle database, you can search it.");
-W("For example:");
-PRE("$ doodle keyword");
-BR();
-H3("Keeping the database up-to-date");
-W("If you want to keep your doodle database up-to-date, you can either
periodically re-run doodle with the <tt>-b</tt> option, or you can use doodled,
the doodle daemon.");
-W("doodled uses %s to notice whenever a file is changed and instantly updates
the doodle database.",
- "fam");
-W("In order to use doodled, you must have famd running.");
-W("If famd is running, you can start doodled by passing the same arguments
that you would pass to doodle to construct the database, but without the
<tt>-b</tt> option:");
-PRE("$ doodled $HOME");
-W("You can also use doodled to construct the initial database.");
-W("While doodled is updating the database, any doodle search will block until
the update is complete.");
-W("Note that while you may want to index your entire disk (i.e., <tt>doodle -b
/</tt>), it is typically not a great idea to have doodled monitor your entire
system for changes -- especially since <tt>/usr</tt> is unlikely to change
frequently.");
-W("You can address this issue by first indexing <tt>/</tt> and then using
doodled to monitor only directories that change frequently:");
-PRE("$ doodle -b /\n" .
- "$ doodled $HOME");
-W("This way, your entire system will be in the index, and your home directory
will be always up-to-date.");
-P();
-H3("Full-text search");
-W("You can achieve a (limited) form of full-text search with doodle.");
-W("For that, the dictionary-based plaintext extractors from %s are used.",
- extlink_("/libextractor/","libextractor"));
-W("In order to use them, you need to pass the option <tt>-b LANG</tt> to
doodle.");
-W("LANG is a two letter language code that selects the dictionary.");
-W("Available languages at the moment are en, es, fr, it and no.");
-W("Words and sentences that are available in the respective dictionaries for
these languages will then be added to the index.");
-W("While libextractor attempts to avoid full-text extraction for certain kown
binary formats, it may still find words in non-text files.");
-W("Running with this option will dramatically increase the size of the index
and the time it takes to build the index.");
-W("Note that if you change the options used to build a database will not (!)
result in doodle re-indexing files that were processed with other options
previously.");
-W("The only way to force doodle to re-index files with different options is to
either touch the files (change modification timestamp) or to delete the old
database and start from scratch.");
-P();
-H3("Hints of the system administrator");
-W("If you are the system administrator, you might want to run doodle on the
entire system periodically (cron job) and have doodled monitor the home
directories in the background.");
-W("In that case, it is suggested to have the doodle database be group-readable
for a group doodle.");
-W("Set the permissions for the doodle binary to SGID to allow users to poll
the database.");
-W("Doodle will ensure that information about files not accessible to the user
are not leaked by checking if files found in the database are accessible to the
user.");
-W("doodled has to run as root since otherwise it would be impossible to index
the personal files of all users.");
-W("If that it too risky, doodled will still work, but only index the files
readable to the user that runs doodled.");
-
P();
-H3("Using different options for different directories");
-W("You can build a database from multiple doodle runs over distinct sets of
files with different options.");
-W("For example, the following can make sense:");
-PRE("$ doodle -B en -b /usr/share/doc # full-text index over documentation\n" .
- "$ doodle -b /usr /opt /bin /lib # normal index over other system files\n"
.
- "$ doodled -B en /home # monitor /home, with full-text support\n");
-W("A simple doodle search will then find files in all listed directories.");
-W("You can also build multiple disjoint databases and search all of them in
one run (see %s for option <tt>-d</tt>).",
- extlink_("man/doodle.html", "doodle"));
+if ( ($xlang) && ($xlang != "English")) {
+ H2("Mass translation");
+ W("The mass-translation page for translating many sentences at once is %s.",
+ intlink_("editor.php", "here"));
+ W("Note that the sentence database is shared with the %s, %s and %s
projects.",
+ ARRAY(extlink_("http://gnunet.org/", "GNUnet"),
+ extlink_("http://gnunet.org/doodle/", "doodle"),
+ extlink_("http://gnunet.org/libextractor/", "libExtractor")));
+ }
-P();
-ANCHOR("mantis");
H2("Bugtrack");
-W("Doodle uses Mantis for bugtracking.");
+W("i18nHTML uses Mantis for bugtracking.");
W("Visit %s to report bugs.",
extlink_("https://gnunet.org/mantis/","https://gnunet.org/mantis/"));
W("You need to sign up for a reporter account.");
-W("Please make sure you report bugs under <strong>Doodle</strong> and not
under any of the other projects.");
+W("Please make sure you report bugs under <strong>I18nHTML</strong> and not
under any of the other projects.");
P();
-W("If you dislike Mantis and need to report a bug contact %s via e-mail.",
+W("If you dislike Mantis and need to report a bug contact %s via e-mail (good
luck getting by the spam-filter).",
extlink_("mailto:address@hidden","address@hidden"));
-
-ANCHOR("links");
-H2("Links");
-echo "<ul>";
-LI(extlink_("http://members.cox.net/sinzui/medusa/",
- "Medusa, similar project for Gnome"));
-LI(extlink_("http://swish-e.org/",
- "Swish-e, indexing tool with focus on the WWW"));
-LI(extlink_("http://www.gnome.org/~seth/storage/",
- "GNOME Storage"));
-LI(extlink_("http://www.gnome.org/projects/beagle/",
- "Beagle"));
-LI(extlink_("http://sourceforge.net/projects/rlocate/",
- "rlocate, version of locate that is always up-to-date"));
-LI(extlink_("http://www.htdig.org/",
- "HTDig"));
-LI(extlink_("http://jakarta.apache.org/lucene/",
- "Lucene"));
-LI(extlink_("http://homepage.mac.com/pauljlucas/software/swish/",
- "Swish++"));
-LI(extlink_("http://webglimpse.net/",
- "Glimpse"));
-LI(extlink_("http://evidence.sf.net/",
- "Evidence, file-manager with support for doodle"));
-LI(extlink_("http://www.linux-magazin.de/Artikel/ausgabe/2004/09/bgw/bgw.html",
- "Article about doodle (and other things) in the German
Linux-Magazin"));
-echo "</ul>";
HR();
-echo "<address><a href=\"mailto:address@hidden">Christian
Grothoff</a></address>";
-PRE("Copyright (C) 2004 Christian Grothoff.\n" .
- "Verbatim copying and distribution of this entire article\n" .
- "is permitted in any medium, provided this notice is preserved.");
-BR();
generateFooter();
echo "</body></html>\n";
?>
Modified: doodle-docs/WWW/translate.php
===================================================================
--- doodle-docs/WWW/translate.php 2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/translate.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -27,9 +27,7 @@
DOCTYPE("HTML", "Transitional");
echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("WWW translation");
-echo "</title>";
+TITLE("WWW translation");
echo "<meta name=\"description\" content=\"";
TRANSLATE("Help translating this webpage.");
echo "\">";
@@ -45,7 +43,7 @@
W("Destination language: ");
W($lang);
P();
-echo "<form action=\"" . $i18nHTMLbase . "commitTranslation.php\">\n";
+echo "<form method=\"POST\" action=\"" . $i18nHTMLbase .
"commitTranslation.php\">\n";
echo "<input type=hidden name=\"text\" value=\"" . urlencode($text) . "\">\n";
echo "<input type=hidden name=\"xlang\" value=\"$xlang\">\n";
echo "<input type=hidden name=\"back\" value=\"$back\">\n";
@@ -93,9 +91,9 @@
printf("<tr><td>%s</td><td><a href=\"" . $i18nHTMLbase .
"vote.php?xlang=%s&text=%s&translation=%s\">%s</a></td></tr>\n",
W_($row["lang"]),
urlencode($row["lang"]),
- urlencode($text),
- $translation,
- urldecode($translation));
+ $u,
+ urlencode(from_unicode($translation)),
+ fix(from_unicode($translation)));
}
echo "</table>";
@@ -121,4 +119,4 @@
generateFooter();
echo "</body></html>";
-?>
\ No newline at end of file
+?>
Modified: doodle-docs/WWW/vote.php
===================================================================
--- doodle-docs/WWW/vote.php 2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/vote.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -21,17 +21,18 @@
// For sentences with multiple translations, the one with the most
// votes is displayed.
include("i18nhtml.inc");
-echo "<html><head><title>";
-W("WWW translation: vote");
-echo "</title></head><body>";
+echo "<html><head>";
+echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" >";
+TITLE("WWW translation: vote");
+echo "</head><body>";
if (!$connection) {
echo "Database is down. Cannot edit translations.";
die();
}
$text = $_REQUEST['text'];
$translation = $_REQUEST['translation'];
-$u = urlencode($text);
-$t = urlencode($translation);
+$u = mysql_real_escape_string($text);
+$t = mysql_real_escape_string(to_unicode($translation));
echo "text = " . $text . "<br>\n";
echo "translation = " . $translation . "<br>\n";
Modified: i18nHTML-docs/WWW/commitMassTranslation.php
===================================================================
--- i18nHTML-docs/WWW/commitMassTranslation.php 2005-04-04 06:47:24 UTC (rev
587)
+++ i18nHTML-docs/WWW/commitMassTranslation.php 2005-04-04 06:51:09 UTC (rev
588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -26,11 +26,13 @@
W("Translating to English currently not allowed.\n");
die();
}
-echo "<html><head><title>WWW translation: commit</title></head><body>";
+echo "<html><head>";
+TITLE("WWW translation: commit");
+echo "</head><body>";
W("Processing translations...");
P();
$done = 0;
-foreach($_GET as $dec=>$val) {
+foreach($_POST as $dec=>$val) {
if ($val == "")
continue;
if ( ($dec == "xlang") || ($dec == "start") )
@@ -44,7 +46,7 @@
$num--;
$row = mysql_fetch_array($result);
if ($dec == bin2hex(md5(urldecode($row["c"])))) {
- $enc = $row["c"];
+ $enc = mysql_real_escape_string($row["c"]);
break;
}
}
@@ -56,21 +58,30 @@
}
$query = "DELETE FROM pending WHERE lang=\"$lang\" AND c=\"$enc\"";
mysql_query($query, $connection);
- $t = urlencode($val);
- // $t = urlencode(htmlentities($val, ENT_QUOTES, $charset));
+ $t = mysql_real_escape_string(to_unicode($val));
$query = "SELECT ranking FROM map WHERE name=\"$enc\" AND lang=\"$lang\" AND
translation=\"$t\"";
$result = mysql_query($query, $connection);
$num = 0;
if ($result)
$num = mysql_numrows($result);
if ($num == 0) {
- $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
- mysql_query($query, $connection);
- $done++;
- W("Storing translation for "%s" = "%s".",
- ARRAY(urldecode($enc),
- urldecode($t)));
- BR();
+ $txtCnt = count_chars(urldecode($enc), 1);
+ $tCnt = count_chars($t, 1);
+ if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+ W("Commit '%s->%s' failed.", $enc, $t);
+ W("The number of percent signs in source text and translation do not
match.");
+ W("Note that you must preserve all %%s expressions unchanged.");
+ W("Also, a single displayed %% sign must be translated into two (%%%%)
such signs.");
+ P();
+ } else {
+ $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
+ mysql_query($query, $connection);
+ $done++;
+ W("Storing translation for "%s" = "%s".",
+ ARRAY(urldecode($enc),
+ urldecode($t)));
+ BR();
+ }
}
}
P();
Modified: i18nHTML-docs/WWW/commitTranslation.php
===================================================================
--- i18nHTML-docs/WWW/commitTranslation.php 2005-04-04 06:47:24 UTC (rev
587)
+++ i18nHTML-docs/WWW/commitTranslation.php 2005-04-04 06:51:09 UTC (rev
588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -18,7 +18,7 @@
Boston, MA 02111-1307, USA.
*/
include("i18nhtml.inc");
-$text = $_REQUEST['text'];
+$text = mysql_real_escape_string($_REQUEST['text']);
$translation = $_REQUEST['translation'];
$back = $_REQUEST['back'];
if (!$connection) {
@@ -30,12 +30,7 @@
die();
}
-// note: $text is already urlencoded (by submitting via form) and html
compatible
-// ensure translation is stored in encoded form and html compatible
-// if (get_magic_quotes_gpc()) $translation = stripslashes($translation);
-
-// $t = urlencode(htmlentities($translation, ENT_QUOTES, $charset));
-$t = urlencode($translation);
+$t = mysql_real_escape_string(to_unicode($translation));
// check for identical translation
$query = "SELECT ranking FROM map WHERE name=\"$text\" AND lang=\"$lang\" AND
translation=\"$t\"";
$result = mysql_query($query, $connection);
@@ -44,22 +39,38 @@
$num = mysql_numrows($result);
}
if ($num > 0) {
- echo "<html><body>";
+ echo "<html><head>";
+ TITLE("Translation exists.");
+ echo "</head><body>";
W("Translation exists.");
extlink($back, "Back...");
generateFooter();
echo "</body></html>";
} else {
- // if (!get_magic_quotes_gpc()) $t = addslashes($t); // ensure escaped
before adding to DB
- $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
- $result = mysql_query($query, $connection);
- if ($result) {
- header("Location: " . $back); /* Redirect browser */
+ $txtCnt = count_chars(urldecode($text), 1);
+ $tCnt = count_chars($t, 1);
+ if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+ echo "<html><head>";
+ TITLE("Commit failed.");
+ echo "</head><body>";
+ W("Commit failed.");
+ W("The number of percent signs in source text and translation do not
match.");
+ W("Note that you must preserve all %%s expressions unchanged.");
+ W("Also, a single displayed %% sign must be translated into two (%%%%)
such signs.");
+ echo "</body></html>";
} else {
- echo "<html><body>";
- W("Commit ('%s') failed: ", $query);
- echo mysql_error();
- echo "</body></html>";
+ $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" .
$_SERVER['REMOTE_ADDR'] . "\");";
+ $result = mysql_query($query, $connection);
+ if ($result) {
+ header("Location: " . $back); /* Redirect browser */
+ } else {
+ echo "<html><head>";
+ TITLE("Commit failed.");
+ echo "</head><body>";
+ W("Commit ('%s') failed: ", $query);
+ echo mysql_error();
+ echo "</body></html>";
+ }
}
}
?>
\ No newline at end of file
Modified: i18nHTML-docs/WWW/editor.php
===================================================================
--- i18nHTML-docs/WWW/editor.php 2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/editor.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -65,7 +65,7 @@
ARRAY($start, $end));
P();
- echo "<form action=\"" . $i18nHTMLbase . "commitMassTranslation.php\">";
+ echo "<form method=\"POST\" action=\"" . $i18nHTMLbase .
"commitMassTranslation.php\">";
echo "<input type=hidden name=\"xlang\" value=\"$xlang\">";
$endp = $end + 1;
echo "<input type=hidden name=\"start\" value=\"$endp\">";
Modified: i18nHTML-docs/WWW/i18nhtml.inc
===================================================================
--- i18nHTML-docs/WWW/i18nhtml.inc 2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/i18nhtml.inc 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff and other contributing authors.
+ (C) 2003, 2004, 2005 Christian Grothoff and other contributing authors.
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -27,6 +27,7 @@
// $editor can be set to create a translation
// tag even if a translation is already available.
// there is currently no security.
+ //
// An "_" is used for functions that return the
// translated string instead of printing it directly.
// These functions are used for "%s" printing with W().
@@ -39,6 +40,8 @@
// obtain user db specific configuration parameters
include("i18nhtml_config.inc");
+header("Content-type: text/html; charset=utf-8");
+
// establish default connection to database server
$connection = @mysql_connect($i18nHTMLsqlServer,
$i18nHTMLsqlUser,
@@ -119,6 +122,7 @@
if ($xlang)
$lang = $xlang;
$lang = ucfirst(strtolower($lang));
+$lang = mysql_real_escape_string($lang);
$editor = $_REQUEST['editor'];
@@ -241,6 +245,464 @@
}
}
+
+/**
+ * restore UTF-8 from HTML Unicode entities
+ *
+ * This function is triggered by the YACS handler during page
+ * rendering. It is aiming to transcode HTML Unicode entities
+ * (eg, &#8364;) back to actual UTF-8 encoding (eg, �).
+ *
+ * @param string a string with a mix of UTF-8 and of HTML Unicode entities
+ * @return an UTF-8 string
+ */
+function from_unicode($text) {
+ // translate extended ISO8859-1 chars, if any
+ $text = utf8_encode($text);
+
+ // translate Unicode entities
+ $areas = preg_split('/&#(\d+?);/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+ $text = '';
+ $index = 0;
+ foreach($areas as $area) {
+ switch($index%2) {
+ case 0: // before entity
+ $text .= $area;
+ break;
+ case 1: // the entity itself
+
+ // get the integer value
+ $unicode = intval($area);
+
+ // one byte
+ if($unicode < 0x80) {
+
+ $text .= chr($unicode);
+
+ // two bytes
+ } elseif($unicode < 0x800) {
+
+ $text .= chr( 0xC0 + ( ( $unicode - ( $unicode % 0x40 ) ) / 0x40 ) );
+ $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+
+ // three bytes
+ } elseif($unicode < 0x10000) {
+
+ $text .= chr( 0xE0 + ( ( $unicode - ( $unicode % 0x1000 ) ) / 0x1000 )
);
+ $text .= chr( 0x80 + ( ( ( $unicode % 0x1000 ) - ( $unicode % 0x40 ) )
/ 0x40 ) );
+ $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+
+ // more bytes, keep it as it is...
+ } else
+ $text .= '&#'.$unicode.';';
+
+ break;
+ }
+ $index++;
+ }
+
+ // the updated string
+ return $text;
+}
+
+
+/**
+ * transcode unicode entities to/from HTML entities
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get
UTF-8 instead.
+ *
+ * @link
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple
Character Entity Chart
+ *
+ * @param string the string to be transcoded
+ * @param boolean TRUE to transcode to Unicode, FALSE to transcode to HTML
+ * @return a transcoded string
+ */
+function transcode($input, $to_unicode=TRUE) {
+
+ // initialize tables only once
+ static $html_entities, $unicode_entities;
+ if(!is_array($html_entities)) {
+
+
+ // numerical order
+ $codes = array(
+ ' ' => ' ', // non-breaking space
+ '¡' => '¡', // inverted exclamation mark
+ '¢' => '¢', // cent sign
+ '£' => '£', // pound sign
+ '¤' => '¤', // currency sign
+ '¥' => '¥', // yen sign
+ '¦' => '¦', // broken bar
+ '§' => '§', // section sign
+ '¨' => '¨', // diaeresis
+ '©' => '©', // copyright sign
+ 'ª' => 'ª', // feminine ordinal indicator
+ '«' => '«', // left-pointing double angle
quotation mark
+ '¬' => '¬', // not sign
+ '­' => '­', // soft hyphen
+ '®' => '®', // registered sign
+ '¯' => '¯', // macron
+ '°' => '°', // degree sign
+ '±' => '±', // plus-minus sign
+ '²' => '²', // superscript two
+ '³' => '³', // superscript three
+ '´' => '´', // acute accent
+ 'µ' => 'µ', // micro sign
+ '¶' => '¶', // pilcrow sign
+ '·' => '·', // middle dot
+ '¸' => '¸', // cedilla
+ '¹' => '¹', // superscript one
+ 'º' => 'º', // masculine ordinal indicator
+ '»' => '»', // right-pointing double angle
quotation mark
+ '¼' => '¼', // vulgar fraction one quarter
+ '½' => '½', // vulgar fraction one half
+ '¾' => '¾', // vulgar fraction three
quarters
+ '¿' => '¿', // inverted question mark
+ 'À' => 'À', // latin capital letter A with
grave
+ 'Á' => 'Á', // latin capital letter A with
acute
+ 'Â' => 'Â', // latin capital letter A with
circumflex
+ 'Ã' => 'Ã', // latin capital letter A with
tilde
+ 'Ä' => 'Ä', // latin capital letter A with
diaeresis
+ 'Å' => 'Å', // latin capital letter A with
ring above
+ 'Æ' => 'Æ', // latin capital letter AE
+ 'Ç' => 'Ç', // latin capital letter C with
cedilla
+ 'È' => 'È', // latin capital letter E with
grave
+ 'É' => 'É', // latin capital letter E with
acute
+ 'Ê' => 'Ê', // latin capital letter E with
circumflex
+ 'Ë' => 'Ë', // latin capital letter E with
diaeresis
+ 'Ì' => 'Ì', // latin capital letter I with
grave
+ 'Í' => 'Í', // latin capital letter I with
acute
+ 'Î' => 'Î', // latin capital letter I with
circumflex
+ 'Ï' => 'Ï', // latin capital letter I with
diaeresis
+ 'Ð' => 'Ð', // latin capital letter
ETH
+ 'Ñ' => 'Ñ', // latin capital letter N with
tilde
+ 'Ò' => 'Ò', // latin capital letter O with
grave
+ 'Ó' => 'Ó', // latin capital letter O with
acute
+ 'Ô' => 'Ô', // latin capital letter O with
circumflex
+ 'Õ' => 'Õ', // latin capital letter O with
tilde
+ 'Ö' => 'Ö', // latin capital letter O with
diaeresis
+ '×' => '×', // multiplication sign
+ 'Ø' => 'Ø', // latin capital letter O with
stroke
+ 'Ù' => 'Ù', // latin capital letter U with
grave
+ 'Ú' => 'Ú', // latin capital letter U with
acute
+ 'Û' => 'Û', // latin capital letter U with
circumflex
+ 'Ü' => 'Ü', // latin capital letter U with
diaeresis
+ 'Ý' => 'Ý', // latin capital letter Y with
acute
+ 'Þ' => 'Þ', // latin capital letter THORN
+ 'ß' => 'ß', // latin small letter sharp s
+ 'à' => 'à', // latin small letter a with
grave
+ 'á' => 'á', // latin small letter a with
acute
+ 'â' => 'â', // latin small letter a with
circumflex
+ 'ã' => 'ã', // latin small letter a with
tilde
+ 'ä' => 'ä', // latin small letter a with
diaeresis
+ 'å' => 'å', // latin small letter a with
ring above
+ 'æ' => 'æ', // latin small letter ae
+ 'ç' => 'ç', // latin small letter c with
cedilla
+ 'è' => 'è', // latin small letter e with
grave
+ 'é' => 'é', // latin small letter e with
acute
+ 'ê' => 'ê', // latin small letter e with
circumflex
+ 'ë' => 'ë', // latin small letter e with
diaeresis
+ 'ì' => 'ì', // latin small letter i with
grave
+ 'í' => 'í', // latin small letter i with
acute
+ 'î' => 'î', // latin small letter i with
circumflex
+ 'ï' => 'ï', // latin small letter i with
diaeresis
+ 'ð' => 'ð', // latin small letter
eth
+ 'ñ' => 'ñ', // latin small letter n with
tilde
+ 'ò' => 'ò', // latin small letter o with
grave
+ 'ó' => 'ó', // latin small letter o with
acute
+ 'ô' => 'ô', // latin small letter o with
circumflex
+ 'õ' => 'õ', // latin small letter o with
tilde
+ 'ö' => 'ö', // latin small letter o with
diaeresis
+ '÷' => '÷', // division sign
+ 'ø' => 'ø', // latin small letter o with
stroke
+ 'ù' => 'ù', // latin small letter u with
grave
+ 'ú' => 'ú', // latin small letter u with
acute
+ 'û' => 'û', // latin small letter u with
circumflex
+ 'ü' => 'ü', // latin small letter u with
diaeresis
+ 'ý' => 'ý', // latin small letter y with
acute
+ 'þ' => 'þ', // latin small letter thorn
+ 'ÿ' => 'ÿ', //
+ 'Œ' => 'Œ', // latin capital ligature OE
+ 'œ' => 'œ', // latin small ligature oe
+ 'Š' => 'Š', // latin capital letter S with
caron
+ 'š' => 'š', // latin small letter s with
caron
+ 'Ÿ' => 'Ÿ', // latin capital letter Y with
diaeresis
+ 'ƒ' => 'ƒ' , // latin small f with hook
+ 'ˆ' => 'ˆ', // modifier letter circumflex
accent
+ '˜' => '˜', // small tilde
+ 'Α' => 'Α', // greek capital letter alpha
+ 'Β' => 'Β', // greek capital letter beta
+ 'Γ' => 'Γ', // greek capital letter gamma
+ 'Δ' => 'Δ', // greek capital letter delta
+ 'Ε' => 'Ε', // greek capital letter epsilon
+ 'Ζ' => 'Ζ', // greek capital letter zeta
+ 'Η' => 'Η', // greek capital letter
eta
+ 'Θ' => 'Θ', // greek capital letter theta
+ 'Ι' => 'Ι', // greek capital letter iota
+ 'Κ' => 'Κ', // greek capital letter kappa
+ 'Λ' => 'Λ', // greek capital letter lambda
+ 'Μ' => 'Μ', // greek capital letter
mu
+ 'Ν' => 'Ν', // greek capital letter
nu
+ 'Ξ' => 'Ξ', // greek capital letter
xi
+ 'Ο' => 'Ο', // greek capital letter omicron
+ 'Π' => 'Π', // greek capital letter
pi
+ 'Ρ' => 'Ρ', // greek capital letter
rho
+ 'Σ' => 'Σ', // greek capital letter sigma
+ 'Τ' => 'Τ', // greek capital letter
tau
+ 'Υ' => 'Υ', // greek capital letter upsilon
+ 'Φ' => 'Φ', // greek capital letter
phi
+ 'Χ' => 'Χ', // greek capital letter
chi
+ 'Ψ' => 'Ψ', // greek capital letter
psi
+ 'Ω' => 'Ω', // greek capital letter omega
+ 'α' => 'α', // greek small letter alpha
+ 'β' => 'β', // greek small letter beta
+ 'γ' => 'γ', // greek small letter gamma
+ 'δ' => 'δ', // greek small letter delta
+ 'ε' => 'ε', // greek small letter epsilon
+ 'ζ' => 'ζ', // greek small letter zeta
+ 'η' => 'η', // greek small letter
eta
+ 'θ' => 'θ', // greek small letter theta
+ 'ι' => 'ι', // greek small letter iota
+ 'κ' => 'κ', // greek small letter kappa
+ 'λ' => 'λ', // greek small letter lambda
+ 'μ' => 'μ', // greek small letter mu
+ 'ν' => 'ν', // greek small letter nu
+ 'ξ' => 'ξ', // greek small letter xi
+ 'ο' => 'ο', // greek small letter omicron
+ 'π' => 'π', // greek small letter pi
+ 'ρ' => 'ρ', // greek small letter
rho
+ 'ς' => 'ς', // greek small letter final
sigma
+ 'σ' => 'σ', // greek small letter sigma
+ 'τ' => 'τ', // greek small letter
tau
+ 'υ' => 'υ', // greek small letter upsilon
+ 'φ' => 'φ', // greek small letter
phi
+ 'χ' => 'χ', // greek small letter
chi
+ 'ψ' => 'ψ', // greek small letter
psi
+ 'ω' => 'ω', // greek small letter omega
+ 'ϑ' => 'ϑ', // greek small letter
theta symbol
+ 'ϒ' => 'ϒ', // greek upsilon with hook
symbol
+ 'ϖ' => 'ϖ', // greek pi symbol
+ ' ' => ' ', // en space
+ ' ' => ' ', // em space
+ ' ' => ' ', // thin space
+ '‌' => '‌', // zero width non-joiner
+ '‍' => '‍', // zero width joiner
+ '‎' => '‎', // left-to-right mark
+ '‏' => '‏', // right-to-left mark
+ '–' => '–', // en dash
+ '—' => '—', // em dash
+ '‘' => '‘', // left single quotation mark
+ '’' => '’', // right single quotation mark
+ '‚' => '‚', // single low-9 quotation mark
+ '“' => '“', // left double quotation mark
+ '”' => '”', // right double quotation mark
+ '„' => '„', // double low-9 quotation mark
+ '†' => '†', // dagger
+ '‡' => '‡', // double dagger
+ '•' => '•', // bullet
+ '…' => '…', // horizontal ellipsis
+ '‰' => '‰', // per mille sign
+ '′' => '′', // primeminutes
+ '″' => '″', // double prime
+ '‹' => '‹', // single left-pointing angle
quotation mark
+ '›' => '›', // single right-pointing angle
quotation mark
+ '‾' => '‾', // overline
+ '⁄' => '⁄', // fraction slash
+ '€' => '€', // euro sign
+ 'ℑ' => 'ℑ', // blackletter capital I
+ '℘' => '℘', // script capital P
+ 'ℜ' => 'ℜ', // blackletter capital R
+ '™' => '™', // trade mark sign
+ 'ℵ' => 'ℵ', // alef symbol
+ '←' => '←', // leftwards arrow
+ '↑' => '↑', // upwards arrow
+ '→' => '→', // rightwards arrow
+ '↓' => '↓', // downwards arrow
+ '↔' => '↔', // left right arrow
+ '↵' => '↵', // downwards arrow with corner
leftwards
+ '⇐' => '⇐', // leftwards double arrow
+ '⇑' => '⇑', // upwards double arrow
+ '⇒' => '⇒', // rightwards double arrow
+ '⇓' => '⇓', // downwards double arrow
+ '⇔' => '⇔', // left right double arrow
+ '∀' => '∀', // for all
+ '∂' => '∂', // partial differential
+ '∃' => '∃', // there exists
+ '∅' => '∅', // empty set
+ '∇' => '∇', // nabla
+ '∈' => '∈', // element of
+ '∉' => '∉', // not an element of
+ '∋' => '∋', // contains as member
+ '∏' => '∏', // n-ary product
+ '∑' => '∑', // n-ary sumation
+ '−' => '−', // minus sign
+ '∗' => '∗', // asterisk operator
+ '√' => '√', // square root
+ '∝' => '∝', // proportional to
+ '∞' => '∞', // infinity
+ '∠' => '∠', // angle
+ '∧' => '∧', // logical and
+ '∨' => '∨', // logical or
+ '∩' => '∩', // intersection
+ '∪' => '∪', // union
+ '∫' => '∫', // integral
+ '∴' => '∴', // therefore
+ '∼' => '∼', // tilde operator
+ '≅' => '≅', // approximately equal to
+ '≈' => '≈', // almost equal to
+ '≠' => '≠', // not equal to
+ '≡' => '≡', // identical to
+ '≤' => '≤', // less-than or equal to
+ '≥' => '≥', // greater-than or
equal to
+ '⊂' => '⊂', // subset of
+ '⊃' => '⊃', // superset of
+ '⊄' => '⊄', // not a subset of
+ '⊆' => '⊆', // subset of or equal to
+ '⊇' => '⊇', // superset of or equal to
+ '⊕' => '⊕', // circled plus
+ '⊗' => '⊗', // circled times
+ '⊥' => '⊥', // up tack
+ '⋅' => '⋅', // dot operator
+ '⌈' => '⌈', // left ceiling
+ '⌉' => '⌉', // right ceiling
+ '⌊' => '⌊', // left floor
+ '⌋' => '⌋', // right floor
+ '〈' => '⟨', // left-pointing angle bracket
+ '〉' => '⟩', // right-pointing angle bracket
+ '◊' => '◊', // lozenge
+ '♠' => '♠', // black spade suit
+ '♣' => '♣', // black club suit
+ '♥' => '♥', // black heart suit
+ '♦' => '♦' // black diam suit
+ );
+
+ // split entities for use in str_replace()
+ foreach($codes as $unicode_entity => $html_entity) {
+ $unicode_entities[] = $unicode_entity;
+ $html_entities[] = $html_entity;
+ }
+ }
+
+ // transcode HTML entities to Unicode
+ if($to_unicode)
+ return str_replace($html_entities, $unicode_entities, $input);
+
+ // transcode Unicode entities to HTML entities
+ else
+ return str_replace($unicode_entities, $html_entities, $input);
+}
+
+
+
+
+/**
+ * transcode multi-byte characters to HTML representations for Unicode
+ *
+ * This function is aiming to preserve Unicode characters through storage in a
ISO-8859-1 compliant system.
+ *
+ * Every multi-byte UTF-8 character is transformed to its equivalent HTML
numerical entity (eg, &#4568;)
+ * that may be handled safely by PHP and by MySQL.
+ *
+ * Of course, this solution does not allow for full-text search in the
database and therefore, is not a
+ * definitive solution to internationalization issues.
+ * It does enable, however, practical use of Unicode to build pages in foreign
languages.
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get
UTF-8 instead.
+ *
+ * @link
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple
Character Entity Chart
+ *
+ * @param string the original UTF-8 string
+ * @return a string acceptable in an ISO-8859-1 storage system (ie., PHP4 +
MySQl 3)
+ */
+function to_unicode($input) {
+
+ // transcode HTML entities to Unicode entities
+ $input = transcode($input);
+
+ // scan the whole string
+ $output = '';
+ $index = 0;
+ while($index < strlen($input)) {
+
+ // look at one char
+ $char = ord($input[$index]);
+
+ // one byte (0xxxxxxx)
+ if($char < 0x80) {
+
+ // some chars may be undefined
+ $output .= chr($char);
+ $index += 1;
+
+ // two bytes (110xxxxx 10xxxxxx)
+ } elseif($char < 0xE0) {
+
+ // strip weird sequences (eg, C0 80 -> NUL)
+ if($value = (($char % 0x20) * 0x40) + (ord($input[$index + 1]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 2;
+
+ // three bytes (1110xxxx 10xxxxxx 10xxxxxx) example: euro sign =
\xE2\x82\xAC -> €
+ } elseif($char < 0xF0) {
+
+ // strip weird sequences
+ if($value = (($char % 0x10) * 0x1000) + ((ord($input[$index + 1]) %
0x40) * 0x40) + (ord($input[$index + 2]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 3;
+
+ // four bytes (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } elseif($char < 0xF8) {
+
+ // strip weird sequences
+ if($value = (($char % 0x08) * 0x40000) + ((ord($input[$index + 1]) %
0x40) * 0x1000) + ((ord($input[$index + 2]) % 0x40) * 0x40)
+ + (ord($input[$index + 3]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 4;
+
+ // five bytes (111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } elseif($char < 0xFC) {
+
+ // strip weird sequences
+ if($value = (($char % 0x04) * 0x1000000) + ((ord($input[$index + 1]) %
0x40) * 0x40000) + ((ord($input[$index + 2]) % 0x40) * 0x1000)
+ + ((ord($input[$index + 3]) % 0x40) * 0x40) + (ord($input[$index + 4])
% 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 5;
+
+ // six bytes (1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+ } else {
+
+ // strip weird sequences
+ if($value = (($char % 0x02) * 0x40000000) + ((ord($input[$index + 1]) %
0x40) * 0x1000000) + ((ord($input[$index + 2]) % 0x40) * 0x40000)
+ + ((ord($input[$index + 3]) % 0x40) * 0x1000) + ((ord($input[$index +
4]) % 0x40) * 0x40) + (ord($input[$index + 4]) % 0x40))
+ $output .= '&#' . $value . ';';
+ $index += 6;
+ }
+
+ }
+
+ // return the translated string
+ return $output;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
// returns either the translated string
// or the original string. Assumes we
// are passed the original string as occurs
@@ -264,8 +726,7 @@
if ($a == "")
return $a;
- // $a = htmlentities($a, ENT_QUOTES, $charset);
- $u = urlencode($a);
+ $u = mysql_real_escape_string(urlencode($a));
if (!$connection) {
// database not available, just print English
@@ -319,7 +780,7 @@
return fix($a); // just return English string
} else { // translation available
$row = mysql_fetch_array($result);
- return fix(urldecode($row["translation"]));
+ return $row["translation"];
}
}
@@ -361,7 +822,16 @@
}
function TITLE($a,$b="") {
- echo "<title>" . W_($a,$b) . "</title>\n";
+ global $lang;
+ global $languagecodes;
+ echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"
>";
+ echo "<title>" . TRANSLATE_($a,$b) . "</title>\n";
+ if (isset($languagecodes[$lang])) {
+ echo "<meta name=\"content-language\" content=\"" .
+ $languagecodes[$lang] . "\">";
+ echo "<meta name=\"language\" content=\"" .
+ $languagecodes[$lang] . "\">";
+ }
}
Modified: i18nHTML-docs/WWW/i18nhtml_config.inc
===================================================================
--- i18nHTML-docs/WWW/i18nhtml_config.inc 2005-04-04 06:47:24 UTC (rev
587)
+++ i18nHTML-docs/WWW/i18nhtml_config.inc 2005-04-04 06:51:09 UTC (rev
588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -24,12 +24,18 @@
$i18nHTMLrecordMode = 2; // 1: only missing, 2: everything, 0: disable
$i18nHTMLsqlServer = "localhost";
-$i18nHTMLsqlUser = "GNUnetWWW";
-$i18nHTMLsqlPass = "garlic";
+$i18nHTMLsqlUser = "i18nHTML";
+$i18nHTMLsqlPass = "pass";
-$i18nHTMLsqlDB = "translations"; // default is "translation"
+$i18nHTMLsqlDB = "translation"; // default is "translation"
$i18nHTMLbase = ""; // base directory prepended to i18nHTML php pages used in
links
$i18nHTMLmarker = "*"; // default value if never changed
+// Note that if you enable debug, the scripts may print
+// warnings even if everything is ok!
+$i18nHTMLdebug = 0; // 0 = no, 1 = yes
+// for selectively cloning a DB
+// $i18nHTMLclone = "/tmp/cloneFile.sql";
+
?>
Modified: i18nHTML-docs/WWW/index.php
===================================================================
--- i18nHTML-docs/WWW/index.php 2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/index.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -2,17 +2,10 @@
include("i18nhtml.inc");
DOCTYPE("HTML", "Transitional");
echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("i18nHTML - enabling collaborative webpage translation");
-echo "</title>";
+TITLE("i18nHTML - enabling collaborative webpage translation");
echo "<meta name=\"description\" content=\"";
TRANSLATE("i18nHTML is a collection of PHP scripts that allow visitors of a
webpage to help translating it.");
echo "\">";
-
-echo "<meta name=\"content-language\" content=\"" .
- $languagecodes[$lang] . "\">";
-echo "<meta name=\"language\" content=\"" .
- $languagecodes[$lang] . "\">";
?>
<meta name="author" content="Christian Grothoff">
<meta name="keywords"
content="i18n,HTML,PHP,translation,languages,mysql,database,internationalization,www,free,GNU,GPL">
@@ -34,11 +27,12 @@
W("i18nHTML defines a set of PHP functions that generate either the translated
HTML sentences or the original (typically English) text with decorations that
allow users to provide translations.");
W("i18nHTML requires the internationalized webpages to be written using the
provided PHP functions but does not constrain the page design in any way.");
W("Webpages internationalized with i18nHTML can be updated without loosing
existing translations for sentences that were not changed.");
+W("Note that it is important that you use the i18nHTML <tt>TITLE</tt> command
in your documents in order to ensure that the character set and other meta-data
is set properly.");
P();
H2("Download");
W("You can find the latest version %s.",
- extlink_("download/i18nHTML-0.0.2.tar.gz", "here"));
-W("The latest version can be obtained using");
+ extlink_("https://gnunet.org/i18nHTML/download/", "here"));
+W("The latest CVS version can be obtained using");
PRE("$ svn checkout https://gnunet.org/svn/i18nHTML/");
P();
W("If you want to be notified about updates, subscribe to %s",
@@ -54,18 +48,18 @@
intlink_("editor.php", "here"));
W("Note that the sentence database is shared with the %s, %s and %s
projects.",
ARRAY(extlink_("http://gnunet.org/", "GNUnet"),
- extlink_("/doodle/", "doodle"),
- extlink_("/libextractor/", "libExtractor")));
+ extlink_("http://gnunet.org/doodle/", "doodle"),
+ extlink_("http://gnunet.org/libextractor/", "libExtractor")));
}
H2("Bugtrack");
-W("I18nHTML uses Mantis for bugtracking.");
+W("i18nHTML uses Mantis for bugtracking.");
W("Visit %s to report bugs.",
extlink_("https://gnunet.org/mantis/","https://gnunet.org/mantis/"));
W("You need to sign up for a reporter account.");
W("Please make sure you report bugs under <strong>I18nHTML</strong> and not
under any of the other projects.");
P();
-W("If you dislike Mantis and need to report a bug contact %s via e-mail.",
+W("If you dislike Mantis and need to report a bug contact %s via e-mail (good
luck getting by the spam-filter).",
extlink_("mailto:address@hidden","address@hidden"));
HR();
Modified: i18nHTML-docs/WWW/start.php
===================================================================
--- i18nHTML-docs/WWW/start.php 2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/start.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -2,9 +2,7 @@
include("i18nhtml.inc");
DOCTYPE("HTML", "Transitional");
echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("Documentation");
-echo "</title>";
+TITLE("Documentation");
echo "</head><body>";
generateLanguageBar();
H2("Documentation");
Modified: i18nHTML-docs/WWW/status.php
===================================================================
--- i18nHTML-docs/WWW/status.php 2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/status.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -30,9 +30,7 @@
DOCTYPE("HTML", "Transitional");
echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("Translation: status");
-echo "</title>";
+TITLE("Translation: status");
echo "</head><body>";
W("Language setting is %s.",
$lang);
Modified: i18nHTML-docs/WWW/translate.php
===================================================================
--- i18nHTML-docs/WWW/translate.php 2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/translate.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -27,9 +27,7 @@
DOCTYPE("HTML", "Transitional");
echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("WWW translation");
-echo "</title>";
+TITLE("WWW translation");
echo "<meta name=\"description\" content=\"";
TRANSLATE("Help translating this webpage.");
echo "\">";
@@ -45,7 +43,7 @@
W("Destination language: ");
W($lang);
P();
-echo "<form action=\"" . $i18nHTMLbase . "commitTranslation.php\">\n";
+echo "<form method=\"POST\" action=\"" . $i18nHTMLbase .
"commitTranslation.php\">\n";
echo "<input type=hidden name=\"text\" value=\"" . urlencode($text) . "\">\n";
echo "<input type=hidden name=\"xlang\" value=\"$xlang\">\n";
echo "<input type=hidden name=\"back\" value=\"$back\">\n";
@@ -93,9 +91,9 @@
printf("<tr><td>%s</td><td><a href=\"" . $i18nHTMLbase .
"vote.php?xlang=%s&text=%s&translation=%s\">%s</a></td></tr>\n",
W_($row["lang"]),
urlencode($row["lang"]),
- urlencode($text),
- $translation,
- urldecode($translation));
+ $u,
+ urlencode(from_unicode($translation)),
+ fix(from_unicode($translation)));
}
echo "</table>";
@@ -121,4 +119,4 @@
generateFooter();
echo "</body></html>";
-?>
\ No newline at end of file
+?>
Modified: i18nHTML-docs/WWW/vote.php
===================================================================
--- i18nHTML-docs/WWW/vote.php 2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/vote.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
<?php
/*
- (C) 2003, 2004 Christian Grothoff
+ (C) 2003, 2004, 2005 Christian Grothoff
This code is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
@@ -21,17 +21,18 @@
// For sentences with multiple translations, the one with the most
// votes is displayed.
include("i18nhtml.inc");
-echo "<html><head><title>";
-W("WWW translation: vote");
-echo "</title></head><body>";
+echo "<html><head>";
+echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" >";
+TITLE("WWW translation: vote");
+echo "</head><body>";
if (!$connection) {
echo "Database is down. Cannot edit translations.";
die();
}
$text = $_REQUEST['text'];
$translation = $_REQUEST['translation'];
-$u = urlencode($text);
-$t = urlencode($translation);
+$u = mysql_real_escape_string($text);
+$t = mysql_real_escape_string(to_unicode($translation));
echo "text = " . $text . "<br>\n";
echo "translation = " . $translation . "<br>\n";
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [GNUnet-SVN] r588 - Extractor-docs/WWW GNUnet-docs/WWW GNUnet-docs/WWW/papers doodle-docs/WWW i18nHTML-docs/WWW,
grothoff <=