[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Pingus-CVS] r3982 - trunk/pingus/src
From: |
grumbel at BerliOS |
Subject: |
[Pingus-CVS] r3982 - trunk/pingus/src |
Date: |
Thu, 26 Feb 2009 22:01:14 +0100 |
Author: grumbel
Date: 2009-02-26 22:01:13 +0100 (Thu, 26 Feb 2009)
New Revision: 3982
Added:
trunk/pingus/src/utf8.cpp
trunk/pingus/src/utf8.hpp
Removed:
trunk/pingus/src/utf8_iterator.cpp
trunk/pingus/src/utf8_iterator.hpp
Modified:
trunk/pingus/src/font.cpp
trunk/pingus/src/story_screen.cpp
trunk/pingus/src/string_format.cpp
Log:
Renamed utf8_iterator to utf8
Modified: trunk/pingus/src/font.cpp
===================================================================
--- trunk/pingus/src/font.cpp 2009-02-26 19:45:49 UTC (rev 3981)
+++ trunk/pingus/src/font.cpp 2009-02-26 21:01:13 UTC (rev 3982)
@@ -22,7 +22,7 @@
#include "font.hpp"
#include "surface.hpp"
#include "line_iterator.hpp"
-#include "utf8_iterator.hpp"
+#include "utf8.hpp"
#include "font_description.hpp"
#include "display/framebuffer.hpp"
#include "display/display.hpp"
Modified: trunk/pingus/src/story_screen.cpp
===================================================================
--- trunk/pingus/src/story_screen.cpp 2009-02-26 19:45:49 UTC (rev 3981)
+++ trunk/pingus/src/story_screen.cpp 2009-02-26 21:01:13 UTC (rev 3982)
@@ -33,7 +33,7 @@
#include "stat_manager.hpp"
#include "credits.hpp"
#include "display/display.hpp"
-#include "utf8_iterator.hpp"
+#include "utf8.hpp"
#include "sound/sound.hpp"
class StoryScreenComponent : public GUI::Component
Modified: trunk/pingus/src/string_format.cpp
===================================================================
--- trunk/pingus/src/string_format.cpp 2009-02-26 19:45:49 UTC (rev 3981)
+++ trunk/pingus/src/string_format.cpp 2009-02-26 21:01:13 UTC (rev 3982)
@@ -17,7 +17,7 @@
#include <sstream>
#include "font.hpp"
#include "string_format.hpp"
-#include "utf8_iterator.hpp"
+#include "utf8.hpp"
std::string
StringFormat::normalize(std::string text)
Copied: trunk/pingus/src/utf8.cpp (from rev 3976,
trunk/pingus/src/utf8_iterator.cpp)
===================================================================
--- trunk/pingus/src/utf8_iterator.cpp 2009-02-26 14:54:37 UTC (rev 3976)
+++ trunk/pingus/src/utf8.cpp 2009-02-26 21:01:13 UTC (rev 3982)
@@ -0,0 +1,277 @@
+// Pingus - A free Lemmings clone
+// Copyright (C) 2008 Matthias Braun <address@hidden>,
+// Ingo Ruhnke <address@hidden>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+#include <iostream>
+#include <stdexcept>
+#include "utf8_iterator.hpp"
+
+/** Replacement character for invalid UTF-8 sequences */
+static const uint32_t INVALID_UTF8_SEQUENCE = 0xFFFD;
+
+bool
+UTF8::is_linebreak_character(uint32_t unicode)
+{
+ if (unicode == ' ' || unicode >= 0x3400)
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+std::string::size_type
+UTF8::length(const std::string& str)
+{
+ // Not checking for valid UTF-8 sequences should be ok, since
+ // incorrect ones are a character too.
+ std::string::size_type len = 0;
+ for(std::string::const_iterator i = str.begin(); i != str.end(); ++i)
+ {
+ unsigned char c = *i;
+ if (((c & 0xc0) == 0xc0) || (c < 0x80)) // 0xc0 == 1100_000
+ {
+ len += 1;
+ }
+ }
+
+ return len;
+}
+
+std::string
+UTF8::substr(const iterator& first, const iterator& last)
+{
+ return first.get_string().substr(first.get_index(),
+ last.get_index() - first.get_index());
+}
+
+std::string
+UTF8::substr(const std::string& text, std::string::size_type pos,
std::string::size_type n)
+{
+ std::string::const_iterator beg_it = UTF8::advance(text.begin(), pos);
+ std::string::const_iterator end_it = UTF8::advance(beg_it, n);
+
+ return std::string(beg_it, end_it);
+}
+
+std::string::const_iterator
+UTF8::advance(std::string::const_iterator it, std::string::size_type n)
+{
+ for(std::string::size_type i = 0; i < n; ++i)
+ {
+ // FIXME: Doesn't check if UTF8 sequence is valid
+ unsigned char c = *it;
+
+ if (c < 0x80)
+ {
+ it += 1;
+ }
+ else if ((c & 0xf0) == 0xf0)
+ {
+ it += 4;
+ }
+ else if ((c & 0xe0) == 0xe0)
+ {
+ it += 3;
+ }
+ else if ((c & 0xc0) == 0xc0)
+ {
+ it += 2;
+ }
+ else
+ {
+ std::cout << "UTF8: malformed UTF-8 sequence: " << (int)c <<
std::endl;
+ it += 1;
+ }
+ }
+
+ return it;
+}
+/**
+ * returns true if this byte matches a bitmask of 10xx.xxxx, i.e. it is the
2nd, 3rd or 4th byte of a multibyte utf8 string
+ */
+bool
+UTF8::has_multibyte_mark(unsigned char c)
+{
+ return ((c & 0300) == 0200);
+}
+
+uint32_t
+UTF8::decode_utf8(const std::string& text)
+{
+ size_t p = 0;
+ return decode_utf8(text, p);
+}
+
+/**
+ * gets unicode character at byte position @a p of UTF-8 encoded @a
+ * text, then advances @a p to the next character.
+ *
+ * @throws std::runtime_error if decoding fails.
+ * See unicode standard section 3.10 table 3-5 and 3-6 for details.
+ */
+uint32_t
+UTF8::decode_utf8(const std::string& text, size_t& p)
+{
+ uint32_t c1 = (unsigned char) text[p+0];
+
+ if (has_multibyte_mark(c1))
+ {
+ throw std::runtime_error("Malformed utf-8 sequence");
+ }
+ else if ((c1 & 0200) == 0000)
+ {
+ // 0xxx.xxxx: 1 byte sequence
+ p+=1;
+
+ return c1;
+ }
+ else if ((c1 & 0340) == 0300)
+ {
+ // 110x.xxxx: 2 byte sequence
+ if(p+1 >= text.size()) throw std::range_error("Malformed utf-8
sequence");
+ uint32_t c2 = (unsigned char) text[p+1];
+ if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8
sequence");
+ p+=2;
+
+ return (c1 & 0037) << 6 | (c2 & 0077);
+ }
+ else if ((c1 & 0360) == 0340)
+ {
+ // 1110.xxxx: 3 byte sequence
+ if(p+2 >= text.size()) throw std::range_error("Malformed utf-8
sequence");
+ uint32_t c2 = (unsigned char) text[p+1];
+ uint32_t c3 = (unsigned char) text[p+2];
+ if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8
sequence");
+ if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8
sequence");
+ p+=3;
+
+ return (c1 & 0017) << 12 | (c2 & 0077) << 6 | (c3 & 0077);
+ }
+ else if ((c1 & 0370) == 0360)
+ {
+ // 1111.0xxx: 4 byte sequence
+ if(p+3 >= text.size()) throw std::range_error("Malformed utf-8
sequence");
+ uint32_t c2 = (unsigned char) text[p+1];
+ uint32_t c3 = (unsigned char) text[p+2];
+ uint32_t c4 = (unsigned char) text[p+4];
+ if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8
sequence");
+ if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8
sequence");
+ if (!has_multibyte_mark(c4)) throw std::runtime_error("Malformed utf-8
sequence");
+ p+=4;
+
+ return (c1 & 0007) << 18 | (c2 & 0077) << 12 | (c3 & 0077) << 6 | (c4 &
0077);
+ }
+ else
+ {
+ throw std::runtime_error("Malformed utf-8 sequence");
+ }
+}
+
+// FIXME: Get rid of exceptions in this code
+UTF8::iterator::iterator(const std::string& text_)
+ : text(&text_),
+ pos(0),
+ idx(0)
+{
+ try
+ {
+ chr = decode_utf8(*text, pos);
+ }
+ catch (std::exception)
+ {
+ std::cout << "Malformed utf-8 sequence beginning with " <<
*((uint32_t*)(text->c_str() + pos)) << " found " << std::endl;
+ chr = INVALID_UTF8_SEQUENCE;
+ }
+}
+
+UTF8::iterator::iterator(const std::string& text_, const std::string::iterator
it)
+ : text(&text_),
+ pos(it - text->begin()),
+ idx(pos)
+{
+ try
+ {
+ chr = decode_utf8(*text, pos);
+ }
+ catch (std::exception)
+ {
+ std::cout << "Malformed utf-8 sequence beginning with " <<
*((uint32_t*)(text->c_str() + pos)) << " found " << std::endl;
+ chr = INVALID_UTF8_SEQUENCE;
+ }
+}
+
+bool
+UTF8::iterator::done() const
+{
+ return pos > text->size();
+}
+
+UTF8::iterator
+UTF8::iterator::operator+(int n)
+{
+ UTF8::iterator it = *this;
+ for(int i = 0; i < n; ++i)
+ ++it;
+ return it;
+}
+
+UTF8::iterator&
+UTF8::iterator::operator++()
+{
+ try
+ {
+ idx = pos;
+ chr = decode_utf8(*text, pos);
+ }
+ catch (std::exception)
+ {
+ std::cout << "Malformed utf-8 sequence beginning with " <<
*((uint32_t*)(text->c_str() + pos)) << " found " << std::endl;
+ chr = INVALID_UTF8_SEQUENCE;
+ ++pos;
+ }
+
+ return *this;
+}
+
+uint32_t
+UTF8::iterator::operator*() const
+{
+ return chr;
+}
+
+#ifdef __TEST__
+int main(int argc, char** argv)
+{
+ if (argc != 2)
+ {
+ std::cout << "Usage: " << argv[0] << " TEXT" << std::endl;
+ }
+ else
+ {
+ std::cout << "ASCII: " << std::string(argv[1]).length() << std::endl;
+ std::cout << "UTF8: " << UTF8::length(argv[1]) << std::endl;
+
+ std::string res = UTF8::substr(argv[1], 1, 1);
+ std::cout << "substr: " << res.length() << " " << res << std::endl;
+ }
+ return 0;
+}
+#endif
+
+/* EOF */
Property changes on: trunk/pingus/src/utf8.cpp
___________________________________________________________________
Name: svn:keywords
+ Id
Name: svn:mergeinfo
+
Name: svn:eol-style
+ native
Copied: trunk/pingus/src/utf8.hpp (from rev 3976,
trunk/pingus/src/utf8_iterator.hpp)
===================================================================
--- trunk/pingus/src/utf8_iterator.hpp 2009-02-26 14:54:37 UTC (rev 3976)
+++ trunk/pingus/src/utf8.hpp 2009-02-26 21:01:13 UTC (rev 3982)
@@ -0,0 +1,90 @@
+// Pingus - A free Lemmings clone
+// Copyright (C) 2008 Matthias Braun <address@hidden>,
+// Ingo Ruhnke <address@hidden>
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef HEADER_PINGUS_UTF8_HPP
+#define HEADER_PINGUS_UTF8_HPP
+
+#include <stdint.h>
+#include <string>
+
+class UTF8
+{
+public:
+ class iterator
+ {
+ private:
+ const std::string* text;
+
+ /** Position of the next Unicode character after \a chr */
+ std::string::size_type pos;
+
+ /** Position of \a chr */
+ std::string::size_type idx;
+
+ /** Current Unicode character */
+ uint32_t chr;
+
+ public:
+ /** Create a UTF8 iterator, note that \a text is stored as
+ pointer, thus it must remain valid for the lifetime of the
+ iterator. */
+ iterator(const std::string& text);
+ iterator(const std::string& text, std::string::iterator it);
+
+ bool done() const;
+ iterator& operator++();
+ iterator operator+(int n);
+ uint32_t operator*() const;
+
+ std::string::size_type get_index() const { return idx; }
+ const std::string& get_string() const { return *text; }
+ };
+
+ /**
+ * Returns the number of characters in a UTF-8 string
+ */
+ static std::string::size_type length(const std::string& str);
+
+ static std::string substr(const iterator& first, const iterator& last);
+ static std::string substr(const std::string& text, std::string::size_type
pos, std::string::size_type n);
+ static std::string::const_iterator advance(std::string::const_iterator it,
std::string::size_type n = 1);
+
+ /**
+ * return true if a linebreak is allowed after this character
+ */
+ static bool is_linebreak_character(uint32_t unicode);
+
+ /**
+ * returns true if this byte matches a bitmask of 10xx.xxxx, i.e. it is the
2nd, 3rd or 4th byte of a multibyte utf8 string
+ */
+ static bool has_multibyte_mark(unsigned char c);
+
+ /**
+ * gets unicode character at byte position @a p of UTF-8 encoded @a
+ * text, then advances @a p to the next character.
+ *
+ * @throws std::runtime_error if decoding fails.
+ * See unicode standard section 3.10 table 3-5 and 3-6 for details.
+ */
+ static uint32_t decode_utf8(const std::string& text, size_t& p);
+
+ static uint32_t decode_utf8(const std::string& text);
+};
+
+#endif
+
+/* EOF */
Property changes on: trunk/pingus/src/utf8.hpp
___________________________________________________________________
Name: svn:keywords
+ Id
Name: svn:mergeinfo
+
Name: svn:eol-style
+ native
Deleted: trunk/pingus/src/utf8_iterator.cpp
===================================================================
--- trunk/pingus/src/utf8_iterator.cpp 2009-02-26 19:45:49 UTC (rev 3981)
+++ trunk/pingus/src/utf8_iterator.cpp 2009-02-26 21:01:13 UTC (rev 3982)
@@ -1,277 +0,0 @@
-// Pingus - A free Lemmings clone
-// Copyright (C) 2008 Matthias Braun <address@hidden>,
-// Ingo Ruhnke <address@hidden>
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-#include <iostream>
-#include <stdexcept>
-#include "utf8_iterator.hpp"
-
-/** Replacement character for invalid UTF-8 sequences */
-static const uint32_t INVALID_UTF8_SEQUENCE = 0xFFFD;
-
-bool
-UTF8::is_linebreak_character(uint32_t unicode)
-{
- if (unicode == ' ' || unicode >= 0x3400)
- {
- return true;
- }
- else
- {
- return false;
- }
-}
-
-std::string::size_type
-UTF8::length(const std::string& str)
-{
- // Not checking for valid UTF-8 sequences should be ok, since
- // incorrect ones are a character too.
- std::string::size_type len = 0;
- for(std::string::const_iterator i = str.begin(); i != str.end(); ++i)
- {
- unsigned char c = *i;
- if (((c & 0xc0) == 0xc0) || (c < 0x80)) // 0xc0 == 1100_000
- {
- len += 1;
- }
- }
-
- return len;
-}
-
-std::string
-UTF8::substr(const iterator& first, const iterator& last)
-{
- return first.get_string().substr(first.get_index(),
- last.get_index() - first.get_index());
-}
-
-std::string
-UTF8::substr(const std::string& text, std::string::size_type pos,
std::string::size_type n)
-{
- std::string::const_iterator beg_it = UTF8::advance(text.begin(), pos);
- std::string::const_iterator end_it = UTF8::advance(beg_it, n);
-
- return std::string(beg_it, end_it);
-}
-
-std::string::const_iterator
-UTF8::advance(std::string::const_iterator it, std::string::size_type n)
-{
- for(std::string::size_type i = 0; i < n; ++i)
- {
- // FIXME: Doesn't check if UTF8 sequence is valid
- unsigned char c = *it;
-
- if (c < 0x80)
- {
- it += 1;
- }
- else if ((c & 0xf0) == 0xf0)
- {
- it += 4;
- }
- else if ((c & 0xe0) == 0xe0)
- {
- it += 3;
- }
- else if ((c & 0xc0) == 0xc0)
- {
- it += 2;
- }
- else
- {
- std::cout << "UTF8: malformed UTF-8 sequence: " << (int)c <<
std::endl;
- it += 1;
- }
- }
-
- return it;
-}
-/**
- * returns true if this byte matches a bitmask of 10xx.xxxx, i.e. it is the
2nd, 3rd or 4th byte of a multibyte utf8 string
- */
-bool
-UTF8::has_multibyte_mark(unsigned char c)
-{
- return ((c & 0300) == 0200);
-}
-
-uint32_t
-UTF8::decode_utf8(const std::string& text)
-{
- size_t p = 0;
- return decode_utf8(text, p);
-}
-
-/**
- * gets unicode character at byte position @a p of UTF-8 encoded @a
- * text, then advances @a p to the next character.
- *
- * @throws std::runtime_error if decoding fails.
- * See unicode standard section 3.10 table 3-5 and 3-6 for details.
- */
-uint32_t
-UTF8::decode_utf8(const std::string& text, size_t& p)
-{
- uint32_t c1 = (unsigned char) text[p+0];
-
- if (has_multibyte_mark(c1))
- {
- throw std::runtime_error("Malformed utf-8 sequence");
- }
- else if ((c1 & 0200) == 0000)
- {
- // 0xxx.xxxx: 1 byte sequence
- p+=1;
-
- return c1;
- }
- else if ((c1 & 0340) == 0300)
- {
- // 110x.xxxx: 2 byte sequence
- if(p+1 >= text.size()) throw std::range_error("Malformed utf-8
sequence");
- uint32_t c2 = (unsigned char) text[p+1];
- if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8
sequence");
- p+=2;
-
- return (c1 & 0037) << 6 | (c2 & 0077);
- }
- else if ((c1 & 0360) == 0340)
- {
- // 1110.xxxx: 3 byte sequence
- if(p+2 >= text.size()) throw std::range_error("Malformed utf-8
sequence");
- uint32_t c2 = (unsigned char) text[p+1];
- uint32_t c3 = (unsigned char) text[p+2];
- if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8
sequence");
- if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8
sequence");
- p+=3;
-
- return (c1 & 0017) << 12 | (c2 & 0077) << 6 | (c3 & 0077);
- }
- else if ((c1 & 0370) == 0360)
- {
- // 1111.0xxx: 4 byte sequence
- if(p+3 >= text.size()) throw std::range_error("Malformed utf-8
sequence");
- uint32_t c2 = (unsigned char) text[p+1];
- uint32_t c3 = (unsigned char) text[p+2];
- uint32_t c4 = (unsigned char) text[p+4];
- if (!has_multibyte_mark(c2)) throw std::runtime_error("Malformed utf-8
sequence");
- if (!has_multibyte_mark(c3)) throw std::runtime_error("Malformed utf-8
sequence");
- if (!has_multibyte_mark(c4)) throw std::runtime_error("Malformed utf-8
sequence");
- p+=4;
-
- return (c1 & 0007) << 18 | (c2 & 0077) << 12 | (c3 & 0077) << 6 | (c4 &
0077);
- }
- else
- {
- throw std::runtime_error("Malformed utf-8 sequence");
- }
-}
-
-// FIXME: Get rid of exceptions in this code
-UTF8::iterator::iterator(const std::string& text_)
- : text(&text_),
- pos(0),
- idx(0)
-{
- try
- {
- chr = decode_utf8(*text, pos);
- }
- catch (std::exception)
- {
- std::cout << "Malformed utf-8 sequence beginning with " <<
*((uint32_t*)(text->c_str() + pos)) << " found " << std::endl;
- chr = INVALID_UTF8_SEQUENCE;
- }
-}
-
-UTF8::iterator::iterator(const std::string& text_, const std::string::iterator
it)
- : text(&text_),
- pos(it - text->begin()),
- idx(pos)
-{
- try
- {
- chr = decode_utf8(*text, pos);
- }
- catch (std::exception)
- {
- std::cout << "Malformed utf-8 sequence beginning with " <<
*((uint32_t*)(text->c_str() + pos)) << " found " << std::endl;
- chr = INVALID_UTF8_SEQUENCE;
- }
-}
-
-bool
-UTF8::iterator::done() const
-{
- return pos > text->size();
-}
-
-UTF8::iterator
-UTF8::iterator::operator+(int n)
-{
- UTF8::iterator it = *this;
- for(int i = 0; i < n; ++i)
- ++it;
- return it;
-}
-
-UTF8::iterator&
-UTF8::iterator::operator++()
-{
- try
- {
- idx = pos;
- chr = decode_utf8(*text, pos);
- }
- catch (std::exception)
- {
- std::cout << "Malformed utf-8 sequence beginning with " <<
*((uint32_t*)(text->c_str() + pos)) << " found " << std::endl;
- chr = INVALID_UTF8_SEQUENCE;
- ++pos;
- }
-
- return *this;
-}
-
-uint32_t
-UTF8::iterator::operator*() const
-{
- return chr;
-}
-
-#ifdef __TEST__
-int main(int argc, char** argv)
-{
- if (argc != 2)
- {
- std::cout << "Usage: " << argv[0] << " TEXT" << std::endl;
- }
- else
- {
- std::cout << "ASCII: " << std::string(argv[1]).length() << std::endl;
- std::cout << "UTF8: " << UTF8::length(argv[1]) << std::endl;
-
- std::string res = UTF8::substr(argv[1], 1, 1);
- std::cout << "substr: " << res.length() << " " << res << std::endl;
- }
- return 0;
-}
-#endif
-
-/* EOF */
Deleted: trunk/pingus/src/utf8_iterator.hpp
===================================================================
--- trunk/pingus/src/utf8_iterator.hpp 2009-02-26 19:45:49 UTC (rev 3981)
+++ trunk/pingus/src/utf8_iterator.hpp 2009-02-26 21:01:13 UTC (rev 3982)
@@ -1,90 +0,0 @@
-// Pingus - A free Lemmings clone
-// Copyright (C) 2008 Matthias Braun <address@hidden>,
-// Ingo Ruhnke <address@hidden>
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-#ifndef HEADER_PINGUS_UTF8_ITERATOR_HPP
-#define HEADER_PINGUS_UTF8_ITERATOR_HPP
-
-#include <stdint.h>
-#include <string>
-
-class UTF8
-{
-public:
- class iterator
- {
- private:
- const std::string* text;
-
- /** Position of the next Unicode character after \a chr */
- std::string::size_type pos;
-
- /** Position of \a chr */
- std::string::size_type idx;
-
- /** Current Unicode character */
- uint32_t chr;
-
- public:
- /** Create a UTF8 iterator, note that \a text is stored as
- pointer, thus it must remain valid for the lifetime of the
- iterator. */
- iterator(const std::string& text);
- iterator(const std::string& text, std::string::iterator it);
-
- bool done() const;
- iterator& operator++();
- iterator operator+(int n);
- uint32_t operator*() const;
-
- std::string::size_type get_index() const { return idx; }
- const std::string& get_string() const { return *text; }
- };
-
- /**
- * Returns the number of characters in a UTF-8 string
- */
- static std::string::size_type length(const std::string& str);
-
- static std::string substr(const iterator& first, const iterator& last);
- static std::string substr(const std::string& text, std::string::size_type
pos, std::string::size_type n);
- static std::string::const_iterator advance(std::string::const_iterator it,
std::string::size_type n = 1);
-
- /**
- * return true if a linebreak is allowed after this character
- */
- static bool is_linebreak_character(uint32_t unicode);
-
- /**
- * returns true if this byte matches a bitmask of 10xx.xxxx, i.e. it is the
2nd, 3rd or 4th byte of a multibyte utf8 string
- */
- static bool has_multibyte_mark(unsigned char c);
-
- /**
- * gets unicode character at byte position @a p of UTF-8 encoded @a
- * text, then advances @a p to the next character.
- *
- * @throws std::runtime_error if decoding fails.
- * See unicode standard section 3.10 table 3-5 and 3-6 for details.
- */
- static uint32_t decode_utf8(const std::string& text, size_t& p);
-
- static uint32_t decode_utf8(const std::string& text);
-};
-
-#endif
-
-/* EOF */
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Pingus-CVS] r3982 - trunk/pingus/src,
grumbel at BerliOS <=