[Eliot-dev] eliot/dic compdic.cpp encoding.cpp encoding.h h... [cppdic]

eliot-dev
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Eliot-dev] eliot/dic compdic.cpp encoding.cpp encoding.h h... [cppdic]

From:	eliot-dev
Subject:	[Eliot-dev] eliot/dic compdic.cpp encoding.cpp encoding.h h... [cppdic]
Date:	Mon, 10 Dec 2007 11:56:39 +0000
CVSROOT:        /cvsroot/eliot
Module name:    eliot
Branch:         cppdic
Changes by:     Olivier TeuliÃ¨re <ipkiss>      07/12/10 11:56:39

Modified files:
        dic            : compdic.cpp encoding.cpp encoding.h header.cpp 

Log message:
         - Win32 version of the various conversion functions. It should fix 
most problems on Windows when using a non-ascii dictionary (to be confirmed)
         - Simplified the prototype of one of the readFromUTF8 functions

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/compdic.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.14&r2=1.1.2.15
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/encoding.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.9&r2=1.1.2.10
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/encoding.h?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.4&r2=1.1.2.5
http://cvs.savannah.gnu.org/viewcvs/eliot/dic/header.cpp?cvsroot=eliot&only_with_tag=cppdic&r1=1.1.2.15&r2=1.1.2.16

Patches:
Index: compdic.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/compdic.cpp,v
retrieving revision 1.1.2.14
retrieving revision 1.1.2.15
diff -u -b -r1.1.2.14 -r1.1.2.15
--- compdic.cpp 9 Dec 2007 16:29:54 -0000       1.1.2.14
+++ compdic.cpp 10 Dec 2007 11:56:38 -0000      1.1.2.15
@@ -151,8 +151,7 @@
         char buff[MAX_SIZE];
         strncpy(buff, tokens[0].c_str(), MAX_SIZE);
 
-        wstring letter;
-        readFromUTF8(letter, buff, tokens[0].size(), "readLetters");
+        wstring letter = readFromUTF8(buff, tokens[0].size(), "readLetters");
 
         if (letter.size() != 1)
         {

Index: encoding.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/encoding.cpp,v
retrieving revision 1.1.2.9
retrieving revision 1.1.2.10
diff -u -b -r1.1.2.9 -r1.1.2.10
--- encoding.cpp        9 Dec 2007 16:29:55 -0000       1.1.2.9
+++ encoding.cpp        10 Dec 2007 11:56:39 -0000      1.1.2.10
@@ -36,12 +36,37 @@
 #include <string.h>
 #include <iconv.h>
 
+#ifdef WIN32
+#include <windows.h>
+#endif
+
 #include "encoding.h"
 #include "dic_exception.h"
 
 using namespace std;
 
 
+#ifdef WIN32
+// Utility function to get the last system error as a string
+static string GetWin32Error()
+{
+    char *lpMsgBuf;
+    DWORD dw = GetLastError();
+    cerr << dw << endl;
+    FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                  FORMAT_MESSAGE_FROM_SYSTEM |
+                  FORMAT_MESSAGE_IGNORE_INSERTS,
+                  NULL, dw,
+                  MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+                  (LPTSTR) &lpMsgBuf,
+                  0, NULL);
+    string msg = lpMsgBuf;
+    LocalFree(lpMsgBuf);
+    return msg;
+}
+#endif
+
+
 #if !HAVE_WCWIDTH
 // wcwidth replacement (for win32 in particular)
 // Inspired from the gnulib package, without some of the refinements
@@ -89,6 +114,10 @@
 #define _MAX_SIZE_FOR_STACK_ 30
 wstring convertToWc(const string& iStr)
 {
+#ifdef WIN32
+    // XXX: Assume the input is in UTF-8
+    return readFromUTF8(iStr.c_str(), iStr.size(), "convertToWc");
+#else
     // Get the needed length (we _can't_ use string::size())
     size_t len = mbstowcs(NULL, iStr.c_str(), 0);
     if (len == (size_t)-1)
@@ -110,11 +139,19 @@
         delete[] tmp;
         return res;
     }
+#endif
 }
 
 
 string convertToMb(const wstring& iWStr)
 {
+#ifdef WIN32
+    const unsigned int size = iWStr.size() * 4;
+    char buf[size];
+    // XXX: Assume the output is in UTF-8
+    int nb = writeInUTF8(iWStr, buf, size, "convertToMb");
+    return string(buf, nb);
+#else
     // Get the needed length (we _can't_ use wstring::size())
     size_t len = wcstombs(NULL, iWStr.c_str(), 0);
     if (len == (size_t)-1)
@@ -136,12 +173,16 @@
         delete[] tmp;
         return res;
     }
+#endif
 }
 #undef _MAX_SIZE_FOR_STACK_
 
 
 string convertToMb(wchar_t iWChar)
 {
+#ifdef WIN32
+    return convertToMb(wstring(1, iWChar));
+#else
     char res[MB_CUR_MAX + 1];
     int len = wctomb(res, iWChar);
     if (len == -1)
@@ -149,6 +190,7 @@
     res[len] = '\0';
 
     return res;
+#endif
 }
 
 
@@ -189,6 +231,17 @@
                           const char *iBuffer, unsigned int iBufSize,
                           const string &iContext)
 {
+#ifdef WIN32
+    int res = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, iBuffer,
+                                  iBufSize, oString, iWideSize);
+    if (res == 0)
+    {
+        // Retrieve the system error message for the last-error code
+        throw DicException("readFromUTF8: MultiByteToWideChar failed (" +
+                           iContext + "): " + GetWin32Error());
+    }
+    return res;
+#else
     iconv_t handle = iconv_open("WCHAR_T", "UTF-8");
     if (handle == (iconv_t)(-1))
         throw DicException("readFromUTF8: iconv_open failed");
@@ -207,11 +260,12 @@
                            iContext + "): " + string(strerror(errno)));
     }
     return iWideSize - outChars / sizeof(wchar_t);
+#endif
 }
 
 
-void readFromUTF8(wstring &oString, const char *iBuffer,
-                  unsigned int iBufSize, const string &iContext)
+wstring readFromUTF8(const char *iBuffer, unsigned int iBufSize,
+                     const string &iContext)
 {
     // Temporary buffer for output
     // We will have at most as many characters as in the UTF-8 string
@@ -228,25 +282,39 @@
         throw;
     }
     // Copy the string
-    oString = wstring(wideBuf, number);
+    wstring res(wideBuf, number);
     delete[] wideBuf;
+    return res;
 }
 
 
-unsigned int writeInUTF8(const wstring &iString, char *oBuffer,
+unsigned int writeInUTF8(const wstring &iWString, char *oBuffer,
                          unsigned int iBufSize, const string &iContext)
 {
+#ifdef WIN32
+    int res = WideCharToMultiByte(CP_UTF8, 0, iWString.c_str(), 
iWString.size(),
+                                  oBuffer, iBufSize, NULL, NULL);
+    if (res == 0)
+    {
+        DWORD dw = GetLastError();
+        cerr << dw << endl;
+        // Retrieve the system error message for the last-error code
+        throw DicException("writeInUTF8: WideCharToMultiByte failed (" +
+                           iContext + "): " + GetWin32Error());
+    }
+    return res;
+#else
     iconv_t handle = iconv_open("UTF-8", "WCHAR_T");
     if (handle == (iconv_t)(-1))
         throw DicException("writeInUTF8: iconv_open failed");
-    size_t length = iString.size();
+    size_t length = iWString.size();
     size_t inChars = sizeof(wchar_t) * length;
     size_t outChars = iBufSize;
     // Use the ICONV_CONST trick because the declaration of iconv()
     // differs depending on the implementations...
     // FIXME: bonus ugliness for doing 2 casts at once, and accessing string
     // internals...
-    ICONV_CONST char *in = (ICONV_CONST char*)(&iString[0]);
+    ICONV_CONST char *in = (ICONV_CONST char*)(&iWString[0]);
     char *out = oBuffer;
     size_t res = iconv(handle, &in, &inChars, &out, &outChars);
     iconv_close(handle);
@@ -258,5 +326,6 @@
     }
     // Return the number of written bytes
     return iBufSize - outChars;
+#endif
 }
 

Index: encoding.h
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/encoding.h,v
retrieving revision 1.1.2.4
retrieving revision 1.1.2.5
diff -u -b -r1.1.2.4 -r1.1.2.5
--- encoding.h  9 Dec 2007 16:29:55 -0000       1.1.2.4
+++ encoding.h  10 Dec 2007 11:56:39 -0000      1.1.2.5
@@ -77,23 +77,23 @@
  * Same as the other readFromUTF8 function, dealing with a wstring
  * instead of a wchar_t*. Note that it performs an additional copy
  * of the output string...
- * @param oString: where to write the converted string
  * @param iBuffer: UTF-8 string to convert
  * @param iBufSize: available size in iBuffer
  * @param iContext: free text used in case of exception
+ * @return: the converted wide string
  */
-void readFromUTF8(wstring &oString, const char *iBuffer,
-                  unsigned int iBufSize, const string &iContext);
+wstring readFromUTF8(const char *iBuffer, unsigned int iBufSize,
+                     const string &iContext);
 
 /**
  * Utility function to convert a wstring into an UTF-8 char* buffer
- * @param iString: the wide string to encode
+ * @param iWString: the wide string to encode
  * @param oBuffer: where to write the encoded string
  * @param iBufSize: available size in oBuffer
  * @param iContext: free text used in case of exception
  * @return: number of bytes actually written
  */
-unsigned int writeInUTF8(const wstring &iString, char *oBuffer,
+unsigned int writeInUTF8(const wstring &iWString, char *oBuffer,
                          unsigned int iBufSize, const string &iContext);
 
 #endif

Index: header.cpp
===================================================================
RCS file: /cvsroot/eliot/eliot/dic/Attic/header.cpp,v
retrieving revision 1.1.2.15
retrieving revision 1.1.2.16
diff -u -b -r1.1.2.15 -r1.1.2.16
--- header.cpp  9 Dec 2007 16:29:55 -0000       1.1.2.15
+++ header.cpp  10 Dec 2007 11:56:39 -0000      1.1.2.16
@@ -381,16 +381,16 @@
         else
             throw DicException("Header::read: unrecognized algorithm type");
 
-        readFromUTF8(m_userHost, aHeaderExt.userHost,
-                     aHeaderExt.userHostSize, "user and host information");
+        m_userHost = readFromUTF8(aHeaderExt.userHost, aHeaderExt.userHostSize,
+                                  "user and host information");
 
         // Convert the dictionary letters from UTF-8 to wchar_t*
-        readFromUTF8(m_dicName, aHeaderExt.dicName,
-                     aHeaderExt.dicNameSize, "dictionary name");
+        m_dicName = readFromUTF8(aHeaderExt.dicName, aHeaderExt.dicNameSize,
+                                 "dictionary name");
 
         // Convert the dictionary letters from UTF-8 to wchar_t*
-        readFromUTF8(m_letters, aHeaderExt.letters,
-                     aHeaderExt.lettersSize, "dictionary letters");
+        m_letters = readFromUTF8(aHeaderExt.letters, aHeaderExt.lettersSize,
+                                 "dictionary letters");
         // Safety check: correct number of letters?
         if (m_letters.size() != aHeaderExt.nbLetters)
         {
@@ -500,9 +500,9 @@
     {
         printf(_("compressed on: Unknown date (old format)\n"));
     }
-    printf(_("compressed using a binary compiled by: %ls\n"), 
m_userHost.c_str());
+    printf(_("compressed using a binary compiled by: %s\n"), 
convertToMb(m_userHost).c_str());
     printf(_("dictionary type: %s\n"), m_type == kDAWG ? "DAWG" : "GADDAG");
-    printf(_("letters: %ls\n"), m_letters.c_str());
+    printf(_("letters: %s\n"), convertToMb(m_letters).c_str());
     printf(_("number of letters: %d\n"), m_letters.size());
     printf(_("number of words: %d\n"), m_nbWords);
     printf(_("header size: %u bytes\n"), sizeof(Dict_header_old) +
[Prev in Thread]
Current Thread
[Next in Thread]
[Eliot-dev] eliot/dic compdic.cpp encoding.cpp encoding.h h... [cppdic], eliot-dev <=
Prev by Date: [Eliot-dev] eliot dic/compdic.cpp dic/encoding.cpp dic/enco... [cppdic]
Next by Date: [Eliot-dev] eliot configure.in dic/compdic.cpp dic/dic.cpp ... [cppdic]
Previous by thread: [Eliot-dev] eliot dic/compdic.cpp dic/encoding.cpp dic/enco... [cppdic]
Next by thread: [Eliot-dev] eliot TODO po/POTFILES.in po/eliot.pot po/fr.po... [cppdic]
Index(es):
- Date
- Thread