test string :: apple\353\200\200A\364\220\200\200XYZ\366\230\237\277JR\355\271\272z\335\255\232F\300\210Q\343\207W 0 17 36 command is :: gprintf "${str1}" | gawk -e 'function hexencode(str,chr) { for(chr in b2hex) { if (chr!~/[[:alnum:]%\]/) { gsub(chr,b2hex[chr],str) } }; return str } function octencode(str,chr) { gsub(/\/,b2oct["\\"],str); gsub(/[0-7]/,"\06&",str); for(chr in b2oct) { if(chr!~/[0-7\]/) { gsub(chr,b2oct[chr],str) str } }; return str } BEGIN { offset=-4^4;for(x=0;x<256;x++) { byte=sprintf("%c",x+offset);b2hex[byte]=sprintf("\x%.2X",x);b2oct[byte]=sprintf("\%03o",x) }; spc1="/\^[]";spc2="~!@#%&_-{}:;\42\47\140 <>,$.|()*+=?"; for(x=length(spc1);x;x--) { byte=substr(spc1,x,1); b2hex[("\"(byte))]=b2hex[byte]; b2oct[("\"(byte))]=b2oct[byte]; delete b2hex[byte]; delete b2oct[byte] }; for(x=length(spc2);x;x--) { byte=substr(spc2,x,1); b2hex[("["(byte)"]")]=b2hex[byte]; b2oct[("["(byte)"]")]=b2oct[byte]; delete b2hex[byte]; delete b2oct[byte] } } function printtables() { PROCINFO["sorted_in"]="@val_num_asc";cnt=4; for(x in b2oct) { printf(" %-4s:%s:%s |%s",(x~/[ -\176]/) ? x : "[.]",b2hex[x],b2oct[x],--cnt?"":ORS); if(!cnt) { cnt=4 } } } { printf("%cinput :: |%s|%c%c non-ALNUM-hex :: %s%c%cfull-octal :: %s%c%c", 10, $0, 10, 10, hexencode($0), 10, 10, octencode($0), 10, 10); print "byte count via match($0,/$/)-1 :: " , match($0,/$/)-1; print "gsub(/./,\"&\") :: " , gsub(/./,"&"); match($0,/.*/); print "match($0,/.*/) :: ",RSTART, RLENGTH; print "length() :: ",length($0); print "split to array using empty-RE :: ", nx=split($0, arr, //); print ORS; print "($0~/^.+$/) :: " ($0~/^.+$/); print ORS; print "match($0,/.?$/) :: ",match($0,/.?$/); print ORS; for(x=1;x<=nx;x++) { printf("array cell # [ %2d ] <| %-6s | %16s | %16s |> ", x, xa = arr[x], hexencode(xa), octencode(xa)); xa=""} } END { printtables() } ' 2>&1 | gcat -n ; echo; uname -a; echo; locale; echo; gawk -V; echo 1 2 input :: |apple뀀A????XYZ????JR???zݭ?F??Q?W| 3 4 non-ALNUM-hex :: apple\xEB\x80\x80A\xF4\x90\x80\x80XYZ\xF6\x98\x9F\xBFJR\xED\xB9\xBAz\xDD\xAD\x9AF\xC0\x88Q\xE3\x87W 5 6 full-octal :: \141\160\160\154\145\353\200\200\101\364\220\200\200\130\131\132\366\230\237\277\112\122\355\271\272\172\335\255\232\106\300\210\121\343\207\127 7 8 byte count via match($0,/$/)-1 :: 36 9 gsub(/./,"&") :: 20 10 gawk: cmd. line:1: (FILENAME=- FNR=1) warning: Invalid multibyte data detected. There may be a mismatch between your data and your locale 11 match($0,/.*/) :: 1 25 12 length() :: 33 13 split to array using empty-RE :: 33 14 15 16 ($0~/^.+$/) :: 0 17 18 19 match($0,/.?$/) :: 33 20 21 22 array cell # [ 1 ] <| a | a | \141 |> 23 array cell # [ 2 ] <| p | p | \160 |> 24 array cell # [ 3 ] <| p | p | \160 |> 25 array cell # [ 4 ] <| l | l | \154 |> 26 array cell # [ 5 ] <| e | e | \145 |> 27 array cell # [ 6 ] <| 뀀 | \xEB\x80\x80 | \353\200\200 |> 28 array cell # [ 7 ] <| A | A | \101 |> 29 array cell # [ 8 ] <| ? | \xF4 | \364 |> 30 array cell # [ 9 ] <| ? | \x90 | \220 |> 31 array cell # [ 10 ] <| ? | \x80 | \200 |> 32 array cell # [ 11 ] <| ? | \x80 | \200 |> 33 array cell # [ 12 ] <| X | X | \130 |> 34 array cell # [ 13 ] <| Y | Y | \131 |> 35 array cell # [ 14 ] <| Z | Z | \132 |> 36 array cell # [ 15 ] <| ? | \xF6 | \366 |> 37 array cell # [ 16 ] <| ? | \x98 | \230 |> 38 array cell # [ 17 ] <| ? | \x9F | \237 |> 39 array cell # [ 18 ] <| ? | \xBF | \277 |> 40 array cell # [ 19 ] <| J | J | \112 |> 41 array cell # [ 20 ] <| R | R | \122 |> 42 array cell # [ 21 ] <| ? | \xED | \355 |> 43 array cell # [ 22 ] <| ? | \xB9 | \271 |> 44 array cell # [ 23 ] <| ? | \xBA | \272 |> 45 array cell # [ 24 ] <| z | z | \172 |> 46 array cell # [ 25 ] <| ݭ | \xDD\xAD | \335\255 |> 47 array cell # [ 26 ] <| ? | \x9A | \232 |> 48 array cell # [ 27 ] <| F | F | \106 |> 49 array cell # [ 28 ] <| ? | \xC0 | \300 |> 50 array cell # [ 29 ] <| ? | \x88 | \210 |> 51 array cell # [ 30 ] <| Q | Q | \121 |> 52 array cell # [ 31 ] <| ? | \xE3 | \343 |> 53 array cell # [ 32 ] <| ? | \x87 | \207 |> 54 array cell # [ 33 ] <| W | W | \127 |> 55 [.] :\x00:\000 | [.] :\x01:\001 | [.] :\x02:\002 | [.] :\x03:\003 | 56 [.] :\x04:\004 | [.] :\x05:\005 | [.] :\x06:\006 | [.] :\x07:\007 | 57 [.] :\x08:\010 | [.] :\x09:\011 | [.] :\x0A:\012 | [.] :\x0B:\013 | 58 [.] :\x0C:\014 | [.] :\x0D:\015 | [.] :\x0E:\016 | [.] :\x0F:\017 | 59 [.] :\x10:\020 | [.] :\x11:\021 | [.] :\x12:\022 | [.] :\x13:\023 | 60 [.] :\x14:\024 | [.] :\x15:\025 | [.] :\x16:\026 | [.] :\x17:\027 | 61 [.] :\x18:\030 | [.] :\x19:\031 | [.] :\x1A:\032 | [.] :\x1B:\033 | 62 [.] :\x1C:\034 | [.] :\x1D:\035 | [.] :\x1E:\036 | [.] :\x1F:\037 | 63 [ ] :\x20:\040 | [!] :\x21:\041 | ["] :\x22:\042 | [#] :\x23:\043 | 64 [$] :\x24:\044 | [%] :\x25:\045 | [&] :\x26:\046 | ['] :\x27:\047 | 65 [(] :\x28:\050 | [)] :\x29:\051 | [*] :\x2A:\052 | [+] :\x2B:\053 | 66 [,] :\x2C:\054 | [-] :\x2D:\055 | [.] :\x2E:\056 | \/ :\x2F:\057 | 67 0 :\x30:\060 | 1 :\x31:\061 | 2 :\x32:\062 | 3 :\x33:\063 | 68 4 :\x34:\064 | 5 :\x35:\065 | 6 :\x36:\066 | 7 :\x37:\067 | 69 8 :\x38:\070 | 9 :\x39:\071 | [:] :\x3A:\072 | [;] :\x3B:\073 | 70 [<] :\x3C:\074 | [=] :\x3D:\075 | [>] :\x3E:\076 | [?] :\x3F:\077 | 71 [@] :\x40:\100 | A :\x41:\101 | B :\x42:\102 | C :\x43:\103 | 72 D :\x44:\104 | E :\x45:\105 | F :\x46:\106 | G :\x47:\107 | 73 H :\x48:\110 | I :\x49:\111 | J :\x4A:\112 | K :\x4B:\113 | 74 L :\x4C:\114 | M :\x4D:\115 | N :\x4E:\116 | O :\x4F:\117 | 75 P :\x50:\120 | Q :\x51:\121 | R :\x52:\122 | S :\x53:\123 | 76 T :\x54:\124 | U :\x55:\125 | V :\x56:\126 | W :\x57:\127 | 77 X :\x58:\130 | Y :\x59:\131 | Z :\x5A:\132 | \[ :\x5B:\133 | 78 \\ :\x5C:\134 | \] :\x5D:\135 | \^ :\x5E:\136 | [_] :\x5F:\137 | 79 [`] :\x60:\140 | a :\x61:\141 | b :\x62:\142 | c :\x63:\143 | 80 d :\x64:\144 | e :\x65:\145 | f :\x66:\146 | g :\x67:\147 | 81 h :\x68:\150 | i :\x69:\151 | j :\x6A:\152 | k :\x6B:\153 | 82 l :\x6C:\154 | m :\x6D:\155 | n :\x6E:\156 | o :\x6F:\157 | 83 p :\x70:\160 | q :\x71:\161 | r :\x72:\162 | s :\x73:\163 | 84 t :\x74:\164 | u :\x75:\165 | v :\x76:\166 | w :\x77:\167 | 85 x :\x78:\170 | y :\x79:\171 | z :\x7A:\172 | [{] :\x7B:\173 | 86 [|] :\x7C:\174 | [}] :\x7D:\175 | [~] :\x7E:\176 | [.] :\x7F:\177 | 87 [.] :\x80:\200 | [.] :\x81:\201 | [.] :\x82:\202 | [.] :\x83:\203 | 88 [.] :\x84:\204 | [.] :\x85:\205 | [.] :\x86:\206 | [.] :\x87:\207 | 89 [.] :\x88:\210 | [.] :\x89:\211 | [.] :\x8A:\212 | [.] :\x8B:\213 | 90 [.] :\x8C:\214 | [.] :\x8D:\215 | [.] :\x8E:\216 | [.] :\x8F:\217 | 91 [.] :\x90:\220 | [.] :\x91:\221 | [.] :\x92:\222 | [.] :\x93:\223 | 92 [.] :\x94:\224 | [.] :\x95:\225 | [.] :\x96:\226 | [.] :\x97:\227 | 93 [.] :\x98:\230 | [.] :\x99:\231 | [.] :\x9A:\232 | [.] :\x9B:\233 | 94 [.] :\x9C:\234 | [.] :\x9D:\235 | [.] :\x9E:\236 | [.] :\x9F:\237 | 95 [.] :\xA0:\240 | [.] :\xA1:\241 | [.] :\xA2:\242 | [.] :\xA3:\243 | 96 [.] :\xA4:\244 | [.] :\xA5:\245 | [.] :\xA6:\246 | [.] :\xA7:\247 | 97 [.] :\xA8:\250 | [.] :\xA9:\251 | [.] :\xAA:\252 | [.] :\xAB:\253 | 98 [.] :\xAC:\254 | [.] :\xAD:\255 | [.] :\xAE:\256 | [.] :\xAF:\257 | 99 [.] :\xB0:\260 | [.] :\xB1:\261 | [.] :\xB2:\262 | [.] :\xB3:\263 | 100 [.] :\xB4:\264 | [.] :\xB5:\265 | [.] :\xB6:\266 | [.] :\xB7:\267 | 101 [.] :\xB8:\270 | [.] :\xB9:\271 | [.] :\xBA:\272 | [.] :\xBB:\273 | 102 [.] :\xBC:\274 | [.] :\xBD:\275 | [.] :\xBE:\276 | [.] :\xBF:\277 | 103 [.] :\xC0:\300 | [.] :\xC1:\301 | [.] :\xC2:\302 | [.] :\xC3:\303 | 104 [.] :\xC4:\304 | [.] :\xC5:\305 | [.] :\xC6:\306 | [.] :\xC7:\307 | 105 [.] :\xC8:\310 | [.] :\xC9:\311 | [.] :\xCA:\312 | [.] :\xCB:\313 | 106 [.] :\xCC:\314 | [.] :\xCD:\315 | [.] :\xCE:\316 | [.] :\xCF:\317 | 107 [.] :\xD0:\320 | [.] :\xD1:\321 | [.] :\xD2:\322 | [.] :\xD3:\323 | 108 [.] :\xD4:\324 | [.] :\xD5:\325 | [.] :\xD6:\326 | [.] :\xD7:\327 | 109 [.] :\xD8:\330 | [.] :\xD9:\331 | [.] :\xDA:\332 | [.] :\xDB:\333 | 110 [.] :\xDC:\334 | [.] :\xDD:\335 | [.] :\xDE:\336 | [.] :\xDF:\337 | 111 [.] :\xE0:\340 | [.] :\xE1:\341 | [.] :\xE2:\342 | [.] :\xE3:\343 | 112 [.] :\xE4:\344 | [.] :\xE5:\345 | [.] :\xE6:\346 | [.] :\xE7:\347 | 113 [.] :\xE8:\350 | [.] :\xE9:\351 | [.] :\xEA:\352 | [.] :\xEB:\353 | 114 [.] :\xEC:\354 | [.] :\xED:\355 | [.] :\xEE:\356 | [.] :\xEF:\357 | 115 [.] :\xF0:\360 | [.] :\xF1:\361 | [.] :\xF2:\362 | [.] :\xF3:\363 | 116 [.] :\xF4:\364 | [.] :\xF5:\365 | [.] :\xF6:\366 | [.] :\xF7:\367 | 117 [.] :\xF8:\370 | [.] :\xF9:\371 | [.] :\xFA:\372 | [.] :\xFB:\373 | 118 [.] :\xFC:\374 | [.] :\xFD:\375 | [.] :\xFE:\376 | [.] :\xFF:\377 | Darwin JCK-MBP18-Retina-13.local 20.6.0 Darwin Kernel Version 20.6.0: Mon Aug 30 06:12:21 PDT 2021; root:xnu-7195.141.6~3/RELEASE_X86_64 x86_64 LANG="en_US.UTF-8" LC_COLLATE="en_US.UTF-8" LC_CTYPE="en_US.UTF-8" LC_MESSAGES="en_US.UTF-8" LC_MONETARY="en_US.UTF-8" LC_NUMERIC="en_US.UTF-8" LC_TIME="en_US.UTF-8" LC_ALL= GNU Awk 5.1.1, API: 3.1 (GNU MPFR 4.1.0, GNU MP 6.2.1) Copyright (C) 1989, 1991-2021 Free Software Foundation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/. %