From dc2773944154b305c893b7459829bde21c5a6182 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka
Date: Fri, 5 Aug 2016 08:28:20 +0900 Subject: [PATCH 1/2] sed: cache results of mbrtowc for speed * sed/mbcs.c (mbrtowc_cache, mbrlen_cache): New vars. (initialize_mbcs): Initialize the cache. * sed/sed.h: Include limits.h (MBRTOWC, MBRLEN): Use the cache. --- sed/mbcs.c | 14 ++++++++++++++ sed/sed.h | 11 ++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/sed/mbcs.c b/sed/mbcs.c index bce39fa..8105ecd 100644 --- a/sed/mbcs.c +++ b/sed/mbcs.c @@ -24,6 +24,9 @@ int mb_cur_max; bool is_utf8; +size_t mbrlen_cache[UCHAR_MAX + 1]; +wint_t mbrtowc_cache[UCHAR_MAX + 1]; + /* Return non-zero if CH is part of a valid multibyte sequence: Either incomplete yet valid sequence (in case of a leading byte), or the last byte of a valid multibyte sequence. @@ -73,4 +76,15 @@ initialize_mbcs (void) is_utf8 = (strcmp (codeset_name, "UTF-8") == 0); mb_cur_max = MB_CUR_MAX; + + for (int i = CHAR_MIN; i <= CHAR_MAX; ++i) + { + char c = i; + unsigned char uc = i; + mbstate_t mbs = { 0 }; + wchar_t wc; + size_t len = mbrtowc (&wc, &c, 1, &mbs); + mbrlen_cache[uc] = len ? len : 1; + mbrtowc_cache[uc] = len == 1 ? wc : WEOF; + } } diff --git a/sed/sed.h b/sed/sed.h index bbddd25..3716bcb 100644 --- a/sed/sed.h +++ b/sed/sed.h @@ -19,6 +19,7 @@ #include "basicdefs.h" #include "regex.h" #include