diff -u nano-patch/files.c nano-utf8/files.c --- nano-patch/files.c Wed Mar 5 01:53:20 2003 +++ nano-utf8/files.c Wed Mar 5 21:40:27 2003 @@ -1926,7 +1926,7 @@ curs_set(0); for(match = 0; match < num_matches; match++) { - char *disp; + wchar_t *disp; wmove(edit, editline, (longest_name + 2) * (match % columns)); if (match % columns == 0 && editline == editwinrows - 1 @@ -1934,8 +1934,8 @@ waddstr(edit, _("(more)")); break; } - disp = display_string(matches[match], 0, longest_name); - waddstr(edit, disp); + disp = display_string(matches[match], 0, longest_name, FALSE); + waddwstr(edit, disp); free(disp); if ((match + 1) % columns == 0) editline++; @@ -2104,7 +2104,7 @@ filelist = browser_init(path, &longest, &numents, dir); assert(filelist != NULL); - foo = charealloc(foo, longest + 8); + foo = charealloc(foo, 8); /* Sort the list by directory first, then alphabetically */ qsort(filelist, numents, sizeof(char *), diralphasort); @@ -2332,40 +2332,44 @@ wmove(edit, 0, 0); for (; j < numents && editline <= editwinrows - 1; j++) { - char *expan_file = display_string(tail(filelist[j]), 0, longest); + wchar_t *expan_file = display_string(tail(filelist[j]), 0, longest, FALSE); - snprintf(foo, longest + 1, "%s%s", expan_file, hblank); + /* Highlight the currently selected file/dir */ + if (j == selected) + wattron(edit, A_REVERSE); + + mvwaddnstr(edit, editline, col, hblank, longest); + mvwaddwstr(edit, editline, col, expan_file); free(expan_file); + col += longest; filecols++; - /* Put file info in the string also. We don't want to report - file size for links, so we use lstat. Also, stat and - lstat return an error if, for example, the user deleted - the file while the file browser was open. In that case, - we report "--" as the file information. */ + /* Show file info also. We don't want to report file size for + * links, so we use lstat. Also, stat and lstat return an + * error if, for example, the user deleted the file while the + * file browser was open. In that case, we report "--" as the + * file information. */ if (lstat(filelist[j], &st) == -1 || S_ISLNK(st.st_mode)) { /* Aha! It's a symlink! Now, is it a dir? If so, * mark it as such */ if (stat(filelist[j], &st) == 0 && S_ISDIR(st.st_mode)) - strncpy(foo + longest - 5, _("(dir)"), 5); + strncpy(foo, _("(dir)"), 7); else - strcpy(foo + longest - 2, "--"); + strcpy(foo, "--"); } else if (S_ISDIR(st.st_mode)) - strncpy(foo + longest - 5, _("(dir)"), 5); + strncpy(foo, _("(dir)"), 7); else if (st.st_size < (1 << 10)) /* less than 1 K */ - sprintf(foo + longest - 7, "%4d B", (int) st.st_size); + sprintf(foo, "%4d B", (int) st.st_size); else if (st.st_size < (1 << 20)) /* less than 1 meg */ - sprintf(foo + longest - 7, "%4d KB", (int) st.st_size >> 10); + sprintf(foo, "%4d KB", (int) (st.st_size >> 10)); else if (st.st_size < (1 << 30)) /* less than 1 gig */ - sprintf(foo + longest - 7, "%4d MB", (int) st.st_size >> 20); + sprintf(foo, "%4d MB", (int) (st.st_size >> 20)); else - sprintf(foo + longest - 7, "%4d GB", (int) st.st_size >> 30); + sprintf(foo, "%4d GB", (int) (st.st_size >> 30)); + + mvwaddnstr(edit, editline, col - strlen(foo), foo, 7); - /* Highlight the currently selected file/dir */ - if (j == selected) - wattron(edit, A_REVERSE); - waddstr(edit, foo); if (j == selected) wattroff(edit, A_REVERSE); diff -u nano-patch/move.c nano-utf8/move.c --- nano-patch/move.c Thu Feb 13 20:23:03 2003 +++ nano-utf8/move.c Wed Mar 5 16:15:57 2003 @@ -148,7 +148,7 @@ void do_left(void) { if (current_x > 0) - current_x--; + current_x = move_left(current->data, current_x); else if (current != fileage) { do_up(); current_x = strlen(current->data); @@ -163,7 +163,7 @@ assert(current_x <= strlen(current->data)); if (current->data[current_x] != '\0') - current_x++; + current_x += parse_char(current->data + current_x, NULL, NULL); else if (current->next != NULL) { do_down(); current_x = 0; diff -u nano-patch/nano.c nano-utf8/nano.c --- nano-patch/nano.c Wed Mar 5 22:14:08 2003 +++ nano-utf8/nano.c Thu Mar 6 11:48:47 2003 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "proto.h" #include "nano.h" @@ -993,9 +994,11 @@ #endif /* !DISABLE_MOUSE */ /* The user typed a printable character; add it to the edit buffer. */ -void do_char(char ch) +void do_char(wchar_t ch) { size_t current_len = strlen(current->data); + char *mbch; + int len; #if !defined(DISABLE_WRAPPING) || defined(ENABLE_COLOR) int refresh = FALSE; /* Do we have to run edit_refresh(), or can we get away with @@ -1009,19 +1012,25 @@ if (filebot == current) new_magicline(); + /* Apparently MB_CUR_MAX is not a #defined constant. Yuck. */ + mbch = charalloc(MB_CUR_MAX); + len = wctomb(mbch, ch); + assert(len >= 1); + /* more dangerousness fun =) */ - current->data = charealloc(current->data, current_len + 2); + current->data = charealloc(current->data, current_len + len + 1); assert(current_x <= current_len); - charmove(¤t->data[current_x + 1], ¤t->data[current_x], + charmove(¤t->data[current_x + len], ¤t->data[current_x], current_len - current_x + 1); - current->data[current_x] = ch; - totsize++; + charmove(current->data + current_x, mbch, len); + free(mbch); + totsize += len; set_modified(); #ifndef NANO_SMALL /* note that current_x has not yet been incremented */ if (current == mark_beginbuf && current_x < mark_beginx) - mark_beginx++; + mark_beginx += len; #endif do_right(); @@ -1058,18 +1067,21 @@ if (current->data[current_x] != '\0') { size_t linelen = strlen(current->data + current_x); + int charsize = parse_char(current->data + current_x, NULL, NULL); assert(current_x < strlen(current->data)); + assert(charsize >= 1); /* Let's get dangerous */ - charmove(¤t->data[current_x], ¤t->data[current_x + 1], - linelen); + charmove(¤t->data[current_x], + ¤t->data[current_x + charsize], linelen - charsize + 1); - null_at(¤t->data, linelen + current_x - 1); + null_at(¤t->data, current_x + linelen - charsize); #ifndef NANO_SMALL if (current_x < mark_beginx && mark_beginbuf == current) - mark_beginx--; + mark_beginx -= charsize; #endif + totsize -= charsize; } else if (current != filebot && (current->next != filebot || current->data[0] == '\0')) { /* We can delete the line before filebot only if it is blank: it @@ -1092,11 +1104,11 @@ delete_node(foo); renumber(current); totlines--; + totsize--; wrap_reset(); } else return; - totsize--; set_modified(); edit_refresh(); } @@ -1464,9 +1476,11 @@ /* We find the first whole-word occurrence of word. */ while (findnextstr(TRUE, TRUE, fileage, 0, word)) if (is_whole_word(current_x, current->data, word)) { + wchar_t *foo = display_string(word, 0, COLS, FALSE); + edit_refresh(); - do_replace_highlight(TRUE, word); + do_replace_highlight(TRUE, foo); /* allow replace word to be corrected */ i = statusq(FALSE, spell_list, word, @@ -1475,7 +1489,8 @@ #endif _("Edit a replacement")); - do_replace_highlight(FALSE, word); + do_replace_highlight(FALSE, foo); + free(foo); if (i != -1 && strcmp(word, answer)) { search_last_line = FALSE; @@ -1979,14 +1994,14 @@ /* Is it possible to break line at or before goal? */ int breakable(const char *line, int goal) { - for(; *line != '\0' && goal >= 0; line++) { + while (*line != '\0' && goal >= 0) { + size_t pos = 0; + if (*line == ' ' || *line == '\t') return TRUE; - if (is_cntrl_char(*line)) - goal -= 2; - else - goal--; + line += parse_char(line, NULL, &pos); + goal -= pos; } /* If goal is not negative, the whole line (one word) was short * enough. */ @@ -2009,11 +2024,18 @@ /* Current index in line */ assert(line != NULL); - for(; *line != '\0' && goal >= 0; line++, cur_loc++) { + while (*line != '\0' && goal >= 0) { + size_t pos = 0; + int n; + if (*line == ' ') space_loc = cur_loc; assert(*line != '\t'); - goal -= is_cntrl_char(*line) ? 2 : 1; + + n = parse_char(line, NULL, &pos); + goal -= pos; + line += n; + cur_loc += n; } if (goal >= 0) /* In fact, the whole line displays shorter than goal. */ @@ -2093,10 +2115,10 @@ filestruct *mark_beginbuf_save = mark_beginbuf; int mark_beginx_save = mark_beginx; #endif - + wint_t kbinput; + int fcnkey; size_t indent_len; /* generic indentation length */ filestruct *line; /* generic line of text */ - size_t i; /* generic loop variable */ #ifdef HAVE_REGEX_H regex_t qreg; /* qreg is the compiled quotation regexp. @@ -2378,18 +2400,23 @@ /* Now get a keystroke and see if it's unjustify; if not, unget the * keystroke and return. */ - i = wgetch(edit); + fcnkey = wget_wch(edit, &kbinput); + fcnkey = (fcnkey == KEY_CODE_YES || kbinput < 256); #ifndef DISABLE_MOUSE /* If it was a mouse click, parse it with do_mouse() and it might * become the unjustify key. Else give it back to the input stream. */ - if (i == KEY_MOUSE) { + if (fcnkey && kbinput == KEY_MOUSE) { do_mouse(); - i = wgetch(edit); + fcnkey = wget_wch(edit, &kbinput); + fcnkey = (fcnkey == KEY_CODE_YES || kbinput < 256); } #endif - if (i != NANO_UNJUSTIFY_KEY) { - ungetch(i); + if (kbinput != NANO_UNJUSTIFY_KEY) { + if (fcnkey) + ungetch(kbinput); + else + unget_wch(kbinput); /* Did we back up anything at all? */ if (cutbuffer != cutbuffer_save) free_filestruct(cutbuffer); @@ -3110,7 +3137,10 @@ while (TRUE) { int keyhandled = FALSE; /* Have we handled the keystroke yet? */ - int kbinput; /* Input from keyboard */ + wint_t kbinput; /* Input from keyboard */ + int fcnkey; + /* FALSE for regular char, TRUE if kbinput might be a Control + * or Meta key, or a command key like Home or F1. */ if (ISSET(CONSTUPDATE)) do_cursorpos(TRUE); @@ -3125,25 +3155,34 @@ raw(); #endif - kbinput = wgetch(edit); + fcnkey = wget_wch(edit, &kbinput); + if (fcnkey == ERR) + continue; + fcnkey = (fcnkey == KEY_CODE_YES || kbinput < 256); + /* If !fcnkey, then kbinput definitely represents a literal key, + * that should be sent to do_char. Otherwise it might be a + * control key that represents some other function. */ + #ifdef DEBUG fprintf(stderr, "%d, %c (%d)\n", __LINE__, (char) kbinput, (int) kbinput); + fprintf(stderr, "fcnkey = %d\n", fcnkey); #endif if (kbinput == 27) { /* Grab Alt-key stuff first */ - kbinput = wgetch(edit); + wget_wch(edit, &kbinput); + fcnkey = TRUE; #ifdef DEBUG fprintf(stderr, "%d, %c (%d)\n", __LINE__, (char) kbinput, (int) kbinput); #endif switch (kbinput) { /* Alt-O, suddenly very important ;) */ case 'O': - kbinput = wgetch(edit); + wget_wch(edit, &kbinput); #ifdef DEBUG fprintf(stderr, "%d, %c (%d)\n", __LINE__, (char) kbinput, (int) kbinput); #endif /* Shift or Ctrl + Arrows are Alt-O-[2,5,6]-[A,B,C,D] on some terms */ if (kbinput == '2' || kbinput == '5' || kbinput == '6') - kbinput = wgetch(edit); + wget_wch(edit, &kbinput); if ((kbinput <= 'D' && kbinput >= 'A') || (kbinput <= 'd' && kbinput >= 'a')) kbinput = abcd(kbinput); @@ -3164,19 +3203,19 @@ keyhandled = TRUE; break; case '[': - kbinput = wgetch(edit); + wget_wch(edit, &kbinput); #ifdef DEBUG fprintf(stderr, "%d, %c (%d)\n", __LINE__, (char) kbinput, (int) kbinput); #endif switch (kbinput) { case '1': /* Alt-[-1-[0-5,7-9] = F1-F8 in X at least */ - kbinput = wgetch(edit); + wget_wch(edit, &kbinput); if (kbinput >= '1' && kbinput <= '5') { kbinput = KEY_F(kbinput - 48); - wgetch(edit); + wget_wch(edit, NULL); } else if (kbinput >= '7' && kbinput <= '9') { kbinput = KEY_F(kbinput - 49); - wgetch(edit); + wget_wch(edit, NULL); } else if (kbinput == '~') kbinput = KEY_HOME; #ifdef DEBUG @@ -3186,23 +3225,23 @@ #endif break; case '2': /* Alt-[-2-[0,1,3,4] = F9-F12 in many terms */ - kbinput = wgetch(edit); + wget_wch(edit, &kbinput); switch (kbinput) { case '0': kbinput = KEY_F(9); - wgetch(edit); + wget_wch(edit, NULL); break; case '1': kbinput = KEY_F(10); - wgetch(edit); + wget_wch(edit, NULL); break; case '3': kbinput = KEY_F(11); - wgetch(edit); + wget_wch(edit, NULL); break; case '4': kbinput = KEY_F(12); - wgetch(edit); + wget_wch(edit, NULL); break; case '~': kbinput = NANO_INSERTFILE_KEY; @@ -3210,22 +3249,22 @@ break; default: fprintf(stderr, "%d, Alt-[-2-%c! (%d)\n", __LINE__, - char) kbinput, (int) kbinput); + (char) kbinput, (int) kbinput); break; #endif } break; case '3': /* Alt-[-3 = Delete? */ kbinput = NANO_DELETE_KEY; - wgetch(edit); + wget_wch(edit, NULL); break; case '4': /* Alt-[-4 = End? */ kbinput = NANO_END_KEY; - wgetch(edit); + wget_wch(edit, NULL); break; case '5': /* Alt-[-5 = Page Up */ kbinput = KEY_PPAGE; - wgetch(edit); + wget_wch(edit, NULL); break; case 'V': /* Alt-[-V = Page Up in Hurd Console */ case 'I': /* Alt-[-I = Page Up - FreeBSD Console */ @@ -3233,7 +3272,7 @@ break; case '6': /* Alt-[-6 = Page Down */ kbinput = KEY_NPAGE; - wgetch(edit); + wget_wch(edit, NULL); break; case 'U': /* Alt-[-U = Page Down in Hurd Console */ case 'G': /* Alt-[-G = Page Down - FreeBSD Console */ @@ -3241,11 +3280,11 @@ break; case '7': kbinput = KEY_HOME; - wgetch(edit); + wget_wch(edit, NULL); break; case '8': kbinput = KEY_END; - wgetch(edit); + wget_wch(edit, NULL); break; case '9': /* Alt-[-9 = Delete in Hurd Console */ kbinput = KEY_DC; @@ -3255,7 +3294,7 @@ kbinput = NANO_INSERTFILE_KEY; break; case '[': /* Alt-[-[-[A-E], F1-F5 in Linux console */ - kbinput = wgetch(edit); + wget_wch(edit, &kbinput); if (kbinput >= 'A' && kbinput <= 'E') kbinput = KEY_F(kbinput - 64); break; @@ -3328,8 +3367,10 @@ break; } } - /* Hack, make insert key do something useful, like insert file */ - else if (kbinput == KEY_IC) + /* Hack, make insert key do something useful, like insert file. + * Since 0513 == KEY_IC is also a Unicode value, interpret it as + * the insert key only if ncurses tells us to. */ + else if (fcnkey && kbinput == KEY_IC) kbinput = NANO_INSERTFILE_KEY; /* If modify_control_seq is set, we received an Alt-Alt @@ -3348,7 +3389,7 @@ /* Look through the main shortcut list to see if we've hit a shortcut key */ - if (!keyhandled) + if (fcnkey && !keyhandled) for (s = main_list; s != NULL; s = s->next) if (kbinput == s->val || (s->misc1 != 0 && kbinput == s->misc1) || @@ -3385,7 +3426,7 @@ /* Catch ^Z by hand when triggered also 407 == ^Z in Linux console when keypad() is used? */ - if (kbinput == 26 || kbinput == 407) { + if (kbinput == 26 || (fcnkey && kbinput == 407)) { if (ISSET(SUSPEND)) do_suspend(0); keyhandled = TRUE; @@ -3410,7 +3451,9 @@ case 544: case 545: /* Right alt key */ #endif - break; + if (fcnkey) + break; + /* else fall through */ default: #ifdef DEBUG fprintf(stderr, "%d, %c (%d)!\n", __LINE__, (char) kbinput, (int) kbinput); diff -u nano-patch/nano.h nano-utf8/nano.h --- nano-patch/nano.h Mon Feb 24 15:10:28 2003 +++ nano-utf8/nano.h Mon Mar 3 11:21:21 2003 @@ -36,6 +36,9 @@ #define charalloc(howmuch) (char *)nmalloc((howmuch) * sizeof(char)) #define charealloc(ptr, howmuch) (char *)nrealloc(ptr, (howmuch) * sizeof(char)) #define charmove(dest, src, n) memmove(dest, src, (n) * sizeof(char)) +#define wcharalloc(howmuch) (wchar_t *)nmalloc((howmuch) * sizeof(wchar_t)) +#define wcharealloc(ptr, howmuch) (wchar_t *)nrealloc(ptr, (howmuch) * sizeof(wchar_t)) +#define wcharmove(dest, src, n) memmove(dest, src, (n) * sizeof(wchar_t)) #ifndef DISABLE_WRAPPING #define wrap_reset() UNSET(SAMELINEWRAP) #else diff -u nano-patch/proto.h nano-utf8/proto.h --- nano-patch/proto.h Thu Mar 6 18:55:06 2003 +++ nano-utf8/proto.h Thu Mar 6 18:00:38 2003 @@ -302,7 +302,7 @@ #ifndef DISABLE_MOUSE void do_mouse(void); #endif -void do_char(char ch); +void do_char(wchar_t ch); void do_backspace(void); void do_delete(void); void do_tab(void); @@ -432,12 +432,16 @@ #endif /* Public functions in utils.c */ +unsigned char control_rep(unsigned char foo); void skip_tilde(void); +size_t move_left(const char *str, size_t pos); +int parse_char(const char *str, wchar_t *chr, size_t *col); const char *gethomedir(void); int parse_int(const char *str, int *val); -int is_cntrl_char(int c) __attribute__((const)); +int wis_cntrl_char(wchar_t c) __attribute__((const)); void align(char **strp); void null_at(char **data, size_t index); +void wnull_at(wchar_t **data, size_t index); void unsunder(char *str, size_t true_len); void sunder(char *str); #ifndef NANO_SMALL @@ -455,22 +459,25 @@ void *nmalloc(size_t howmuch) __attribute__((malloc)); void *nrealloc(void *ptr, size_t howmuch); char *mallocstrcpy(char *dest, const char *src) __attribute__((malloc)); +wchar_t *wmallocstrcpy(wchar_t *dest, const wchar_t *src) __attribute__((malloc)); char *mallocstrassn(char *dest, char *src); void new_magicline(void); /* Public functions in winio.c */ void do_first_line(void); void do_last_line(void); -size_t xplustabs(void) __attribute__((pure)); -size_t actual_x(const char *str, size_t xplus) __attribute__((pure)); -size_t strnlenpt(const char *buf, size_t size) __attribute__((pure)); -size_t strlenpt(const char *buf) __attribute__((pure)); +size_t xplustabs(void); +size_t actual_x(const char *str, size_t xplus); +size_t wactual_x(const wchar_t *str, size_t xplus); +size_t strnlenpt(const char *buf, size_t size); +size_t strlenpt(const char *buf); void blank_bottombars(void); void blank_edit(void); void blank_statusbar(void); void blank_statusbar_refresh(void); void check_statblank(void); -char *display_string(const char *buf, size_t start_col, int len) __attribute__((malloc)); +size_t display_string_len(const char *buf, size_t start_col, size_t end_col); +wchar_t *display_string(const char *buf, size_t start_col, int len, int dollars) __attribute__((malloc)); void nanoget_repaint(const char *buf, const char *inputbuf, size_t x); int nanogetstr(int allowtabs, const char *buf, const char *def, #ifndef NANO_SMALL @@ -487,7 +494,7 @@ void onekey(const char *keystroke, const char *desc, int len); size_t get_page_start(size_t column) __attribute__((pure)); void reset_cursor(void); -void edit_add(const filestruct *fileptr, const char *converted, +void edit_add(const filestruct *fileptr, const wchar_t *converted, int yval, size_t start #ifndef NANO_SMALL , size_t virt_mark_beginx, size_t virt_cur_x @@ -523,7 +530,7 @@ #define do_help nano_disabled_msg #endif int keypad_on(WINDOW *win, int newval); -void do_replace_highlight(int highlight_flag, const char *word); +void do_replace_highlight(int highlight_flag, const wchar_t *word); #ifdef DEBUG void dump_buffer(const filestruct *inptr); void dump_buffer_reverse(void); diff -u nano-patch/search.c nano-utf8/search.c --- nano-patch/search.c Wed Mar 5 01:55:22 2003 +++ nano-utf8/search.c Thu Mar 6 18:21:42 2003 @@ -68,7 +68,7 @@ void not_found_msg(const char *str) { - if (strlen(str) <= COLS / 2) + if (strlenpt(str) <= COLS / 2) statusbar(_("\"%s\" not found"), str); else statusbar(_("\"%.*s...\" not found"), COLS / 2, str); @@ -129,11 +129,11 @@ #endif if (last_search[0] != '\0') { - char *disp = display_string(last_search, 0, COLS / 3); + wchar_t *disp = display_string(last_search, 0, COLS / 3, FALSE); - buf = charalloc(COLS / 3 + 7); + buf = charalloc(wcstombs(NULL, disp, 0) + 7); /* We use COLS / 3 here because we need to see more on the line */ - sprintf(buf, " [%s%s]", disp, + sprintf(buf, " [%ls%s]", disp, strlenpt(last_search) > COLS / 3 ? "..." : ""); free(disp); } else { @@ -528,11 +528,11 @@ numreplaced = 0; if (!replaceall) { - char *exp_word; + wchar_t *exp_word; size_t xpt = xplustabs(); exp_word = display_string(current->data, xpt, - strnlenpt(current->data, match_len + current_x) - xpt); + strnlenpt(current->data, match_len + current_x) - xpt, FALSE); curs_set(0); do_replace_highlight(TRUE, exp_word); diff -u nano-patch/utils.c nano-utf8/utils.c --- nano-patch/utils.c Thu Mar 6 18:55:09 2003 +++ nano-utf8/utils.c Thu Mar 6 18:05:50 2003 @@ -27,12 +27,25 @@ #include #include #include +#include #include #include #include #include "proto.h" #include "nano.h" +/* foo is a control character. It displays as ^@, ^?, or ^[ch] where ch + * is foo + 64. We return that character. */ +unsigned char control_rep(unsigned char foo) +{ + if (foo == 127) + return '?'; + else if (foo == '\n') + return '@'; + else + return foo + 64; +} + void skip_tilde(void) { nodelay(edit, TRUE); @@ -40,6 +53,58 @@ nodelay(edit, FALSE); } +/* Return the index in str of the beginning of the character before the + * one at pos. */ +size_t move_left(const char *str, size_t pos) +{ + size_t ret = pos; + + /* Using UTF-8, it is possible to decrement pos and then resync. + * There is no library function to do this. Here is the naive, O(pos) + * way. */ + + assert(str != NULL && pos <= strlen(str)); + while (TRUE) { + int len; + + len = parse_char(str + pos - ret, NULL, NULL); + if (ret <= len) + break; + ret -= len; + } + return pos - ret; +} + +/* Parse a multi-byte character from str. Return the number of bytes + * used. If chr != NULL, store the wide character. If col != NULL, store + * the new display width in col. In case *str == '\t', we expect col to + * have the current display width. */ +int parse_char(const char *str, wchar_t *chr, size_t *col) +{ + wchar_t foo; + int n; + + assert(str != NULL); + n = mbtowc(&foo, str, MB_CUR_MAX); + if (n <= 0) { + /* str is not valid UTF-8. Interpret this byte literally. + * This is not a good way to handle this error. */ + foo = (unsigned char) *str; + n = 1; + } + if (chr != NULL) + *chr = foo; + if (col != NULL) { + if (foo == '\t') + *col += tabsize - *col % tabsize; + else if (wis_cntrl_char(foo)) + *col += 2; + else + *col += wcwidth(foo); + } + return n; +} + /* * Return the user's home directory. We use $HOME, and if that fails try * getpwuid(). */ @@ -76,10 +141,9 @@ return 0; } -int is_cntrl_char(int c) +int wis_cntrl_char(wchar_t c) { - return (-128 <= c && c < -96) || (0 <= c && c < 32) || - (127 <= c && c < 160); + return (0 <= c && c < 32) || (127 <= c && c < 160); } /* Fix the memory allocation for a string. */ @@ -98,6 +162,14 @@ (*data)[index] = '\0'; } +/* Null a string at a certain index and align it. */ +void wnull_at(wchar_t **data, size_t index) +{ + assert(data != NULL); + *data = wcharealloc(*data, index + 1); + (*data)[index] = '\0'; +} + /* For non-null-terminated lines. A line, by definition, shouldn't * normally have newlines in it, so encode its nulls as newlines. */ void unsunder(char *str, size_t true_len) @@ -277,6 +349,22 @@ return dest; } + +/* Copy one malloc()ed string to another pointer. Should be used as: + * dest = mallocstrcpy(dest, src); */ +wchar_t *wmallocstrcpy(wchar_t *dest, const wchar_t *src) +{ + if (src == NULL) + src = L""; + + if (src != dest) + free(dest); + + dest = wcharalloc(wcslen(src) + 1); + wcscpy(dest, src); + + return dest; +} /* Usage: answer = mallocstrassn(answer, real_dir_from_tilde(answer)); */ char *mallocstrassn(char *dest, char *src) diff -u nano-patch/winio.c nano-utf8/winio.c --- nano-patch/winio.c Thu Mar 6 18:59:37 2003 +++ nano-utf8/winio.c Thu Mar 6 19:00:02 2003 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "proto.h" #include "nano.h" @@ -71,19 +72,40 @@ assert(str != NULL); - for (; length < xplus && *str != '\0'; i++, str++) { + while (*str != '\0') { + int n; + + n = parse_char(str, NULL, &length); + if (length > xplus) + break; + i += n; + str += n; + } + + return i; +} + +size_t wactual_x(const wchar_t *str, size_t xplus) +{ + size_t i = 0; + /* the position in str, returned */ + size_t length = 0; + /* the screen display width to str[i] */ + + assert(str != NULL); + + while (*str != '\0') { if (*str == '\t') length += tabsize - length % tabsize; - else if (is_cntrl_char((int) *str)) + else if (wis_cntrl_char(*str)) length += 2; else - length++; + length += wcwidth(*str); + if (length > xplus) + break; + i++; + str++; } - assert(length == strnlenpt(str - i, i)); - assert(i <= strlen(str - i)); - - if (length > xplus) - i--; return i; } @@ -94,14 +116,17 @@ { size_t length = 0; + if (size == 0) + return 0; assert(buf != NULL); - for (; *buf != '\0' && size != 0; size--, buf++) { - if (*buf == '\t') - length += tabsize - (length % tabsize); - else if (is_cntrl_char((int) *buf)) - length += 2; - else - length++; + while (*buf != '\0') { + int n; + + n = parse_char(buf, NULL, &length); + buf += n; + if (size <= n) + break; + size -= n; } return length; } @@ -149,64 +174,142 @@ } } +/* buf is a multi-byte string to be displayed. We need to expand Tabs and + * control characters. How many wide characters do we need to send to + * ncurses? start_col is the column of *buf (usually 0). We display to + * end_col - 1. */ +size_t display_string_len(const char *buf, size_t start_col, size_t end_col) +{ + size_t retval = 0; + + assert(buf != NULL); + /* Throughout the loop, we maintain the fact that *buf displays at + * column start_col. */ + while (start_col <= end_col && *buf != '\0') { + wchar_t foo; + /* The current wide character. */ + int n; + /* How many bytes wide is this character? */ + size_t old_col = start_col; + + n = parse_char(buf, &foo, &start_col); + if (foo == '\t') + retval += start_col - old_col; + else if (wis_cntrl_char(foo)) + retval += 2; + else + /* The point of this whole function is that wcwidth might be + * zero, but we still have to send the character to ncurses. */ + retval++; + buf += n; + } + return retval; +} + /* Convert buf into a string that can be displayed on screen. The caller - * wants to display buf starting with column start_col, and extending for - * at most len columns. start_col is zero-based. len is one-based (so len - * == 0 means you get "" returned. The returned string is dynamically - * allocated, and should be freed. */ -char *display_string(const char *buf, size_t start_col, int len) + * wants to display buf starting with "column", and extending for at most + * len columns. column is zero-based. len is one-based (so len == 0 + * means you get "" returned. The returned string is dynamically + * allocated, and should be freed. If dollars is TRUE the caller might + * put $ at the beginning or end of the line if it is long. */ +wchar_t *display_string(const char *buf, size_t column, int len, int dollars) { size_t start_index; /* Index in buf of first character shown in return value. */ - size_t column; - /* Screen column start_index corresponds to. */ - size_t end_index; - /* Index in buf of last character shown in return value. */ + size_t start_col; + /* Screen column start_index corresponds to. zero-based. */ size_t alloc_len; /* The length of memory allocated for converted. */ - char *converted; + wchar_t *converted; /* The string we return. */ - size_t index; + size_t conv_index; /* Current position in converted. */ - if (len == 0) - return mallocstrcpy(NULL, ""); + /* When displaying multicolumn characters, such as the Chinese + * character ç«¥ (in UTF-8), ncurses refuses to show only half of the + * character at the end of the line. Also, when writing $ at the + * beginning of the line, ncurses *shifts the whole line left* if the + * character in the first column is two columns wide. These are both + * annoying "features" that we have to work around. */ + + /* Make room for the $ at the end of the line. */ + if (dollars && strlenpt(buf) > column + len) + len--; + + if (len <= 0) + return wmallocstrcpy(NULL, L""); + + start_index = actual_x(buf, column); + if (buf[start_index] == '\t') + start_col = column; + else + start_col = strnlenpt(buf, start_index); + /* Fix this: used two passes already. */ + assert(start_col <= column); + + alloc_len = display_string_len(buf + start_index, start_col, + column + len) + 2; + converted = wcharalloc(alloc_len + 1); + + conv_index = 0; + if (column > start_col || (column > 0 && dollars && + buf[start_index] != '\t')) { + int n; + wchar_t foo; + + /* We don't display all of buf[start_index] since it starts to the + * left of the screen. */ + n = parse_char(buf + start_index, &foo, NULL); + if (wis_cntrl_char(foo)) { + if (column > start_col) { + converted[0] = control_rep(foo); + conv_index = 1; + start_index += n; + } + } else if (wcwidth(foo) > 1) { + converted[0] = ' '; + conv_index = 1; + if (dollars && column == start_col) { + converted[1] = ' '; + conv_index = 2; + } + start_index += n; + } + } - start_index = actual_x(buf, start_col); - column = strnlenpt(buf, start_index); - assert(column <= start_col); - end_index = actual_x(buf, start_col + len - 1); - alloc_len = strnlenpt(buf, end_index + 1) - column; - if (len > alloc_len + column - start_col) - len = alloc_len + column - start_col; - converted = charalloc(alloc_len + 1); - buf += start_index; - index = 0; + /* We maintain the condition that buf[start_index] displays at column + * start_col. + * + * The <= here is important (vs just <), so that accented characters + * at the end of the line are displayed correctly. */ + while (start_col <= column + len && buf[start_index] != '\0') { + wchar_t foo; + int n; - for (; index < alloc_len; buf++) { - if (*buf == '\t') + n = parse_char(buf + start_index, &foo, NULL); + if (foo == '\t') do { - converted[index++] = ' '; - } while ((column + index) % tabsize); - else if (is_cntrl_char(*buf)) { - converted[index++] = '^'; - if (*buf == 127) - converted[index++] = '?'; - else if (*buf == '\n') - /* Treat newlines (ASCII 10's) embedded in a line as encoded - * nulls (ASCII 0's); the line in question should be run - * through unsunder() before reaching here */ - converted[index++] = '@'; - else - converted[index++] = *buf + 64; - } else - converted[index++] = *buf; + converted[conv_index++] = ' '; + start_col++; + } while (start_col % tabsize != 0); + else if (wis_cntrl_char(foo)) { + converted[conv_index++] = '^'; + converted[conv_index++] = control_rep(foo); + start_col += 2; + } else { + converted[conv_index++] = foo; + start_col += wcwidth(foo); + } + start_index += n; } - assert(len <= alloc_len + column - start_col); - charmove(converted, converted + start_col - column, len); - null_at(&converted, len); + assert(conv_index <= alloc_len); + converted[conv_index] = '\0'; + conv_index = wactual_x(converted, len); + wnull_at(&converted, conv_index); + assert(0 <= wcswidth(converted, -1)); + assert(wcswidth(converted, -1) <= len); - return charealloc(converted, len + 1); + return converted; } /* Repaint the statusbar when getting a character in nanogetstr(). buf @@ -217,7 +320,7 @@ void nanoget_repaint(const char *buf, const char *inputbuf, size_t x) { size_t x_real = strnlenpt(inputbuf, x); - int wid = COLS - strlen(buf) - 2; + int wid = COLS - strlenpt(buf) - 2; assert(0 <= x && x <= strlen(inputbuf)); @@ -230,11 +333,10 @@ waddch(bottomwin, x_real < wid ? ' ' : '$'); if (COLS > 2) { size_t page_start = x_real - x_real % wid; - char *expanded = display_string(inputbuf, page_start, wid); + wchar_t *expanded = display_string(inputbuf, page_start, wid, FALSE); assert(wid > 0); - assert(strlen(expanded) <= wid); - waddstr(bottomwin, expanded); + waddwstr(bottomwin, expanded); free(expanded); wmove(bottomwin, 0, COLS - wid + x_real - page_start); } else @@ -254,7 +356,14 @@ #endif ) { - int kbinput; + wint_t kbinput; + int fcnkey; + /* FALSE means kbinput is a literal character. TRUE means it + * might be a function key. */ + int charlen; + /* Number of bytes kbinput expands to as a multi-byte char. */ + char *mbch; + /* The multi-byte character. */ static size_t x; /* the cursor position in 'answer' */ size_t xend; @@ -295,7 +404,13 @@ input */ wrefresh(edit); - while ((kbinput = wgetch(bottomwin)) != 13) { + while (TRUE) { + fcnkey = wget_wch(bottomwin, &kbinput); + if (fcnkey == ERR) + continue; + fcnkey = (fcnkey == KEY_CODE_YES || kbinput < 256); + if (kbinput == 13) + break; #ifdef DEBUG fprintf(stderr, "kbinput = '%c' (%d)\n", (char) kbinput, (int) kbinput); #endif @@ -320,12 +435,15 @@ tabbed = FALSE; #endif + /* !fcnkey means kbinput is definitely a literal key. */ + if (!fcnkey) + goto literal_tab; switch (kbinput) { /* Stuff we want to equate with , ASCII 13 */ case 343: /* I wish C had labelled break like Java. */ goto got_enter; /* Enter on iris-ansi $TERM, sometimes */ - /* Stuff we want to ignore */ + /* Stuff we want to ignore */ #ifdef PDCURSES case 541: case 542: @@ -353,13 +471,13 @@ case NANO_BACK_KEY: do_leftkey: if (x > 0) - x--; + x = move_left(answer, x); break; case KEY_RIGHT: case NANO_FORWARD_KEY: do_rightkey: if (x < xend) - x++; + x += parse_char(answer + x, NULL, NULL); break; case NANO_CONTROL_K: case NANO_CONTROL_U: @@ -373,14 +491,16 @@ if (x == 0) break; - x--; + x = move_left(answer, x); /* Fall through. */ case KEY_DC: case NANO_CONTROL_D: do_deletekey: if (x < xend) { - charmove(answer + x, answer + x + 1, xend - x); - xend--; + int n = parse_char(answer + x, NULL, NULL); + + charmove(answer + x, answer + x + n, xend - x - n + 1); + xend -= n; } break; case NANO_CONTROL_I: @@ -501,11 +621,14 @@ if (kbinput < 32) break; literal_tab: - answer = charealloc(answer, xend + 2); - charmove(answer + x + 1, answer + x, xend - x + 1); - xend++; - answer[x] = kbinput; - x++; + mbch = charalloc(MB_CUR_MAX); + charlen = wctomb(mbch, kbinput); + assert(charlen >= 1); + answer = charealloc(answer, xend + charlen + 1); + charmove(answer + x + charlen, answer + x, xend - x + 1); + xend += charlen; + charmove(answer + x, mbch, charlen); + x += charlen; } /* switch (kbinput) */ #ifndef NANO_SMALL last_kbinput = kbinput; @@ -543,10 +666,8 @@ /* Modified, View, or "". Tells the state of this buffer. */ int statelen; /* strlen(state) */ - char *exppath = NULL; + wchar_t *exppath = NULL; /* The file name, expanded for display. */ - size_t explen = 0; - /* strlen(exppath) */ int newbuffer = FALSE; /* Do we say "New Buffer"? */ int modlen = 0; @@ -611,16 +732,25 @@ size_t lenpt = strlenpt(path); size_t start_col; - if (lenpt > space) - start_col = actual_x(path, lenpt - space); - else - start_col = 0; - exppath = display_string(path, start_col, space); dots = (lenpt > space); - explen = strlen(exppath); + if (dots) { + start_col = lenpt - space + 3; + space -= 3; + } else + start_col = 0; + + exppath = display_string(path, start_col, space, FALSE); } if (!dots) { + size_t explen; + /* The number of columns path take up. */ + + if (!newbuffer) + explen = wcswidth(exppath, -1); + else + explen = 0; + /* There is room for the whole filename, so we center it. If * state is "", we pretend state is "Modified", so the file name * won't move when the buffer gets modified. */ @@ -630,20 +760,18 @@ if (!newbuffer) { assert(strlen(prefix) + 1 == prefixlen); waddch(topwin, ' '); - waddstr(topwin, exppath); + waddwstr(topwin, exppath); } } else { /* We will say something like "File: ...ename". */ waddnstr(topwin, prefix, prefixlen); - if (space <= 0 || newbuffer) + if (space <= -3 || newbuffer) goto the_end; waddch(topwin, ' '); - waddnstr(topwin, "...", space); - if (space <= 3) + waddnstr(topwin, "...", space + 3); + if (space <= 0) goto the_end; - space -= 3; - assert(explen = space + 3); - waddnstr(topwin, exppath + 3, space); + waddwstr(topwin, exppath); } the_end: @@ -776,7 +904,7 @@ * * virt_mark_beginx is the position in converted of where the mark * starts, and similarly for virt_cur_x. */ -void edit_add(const filestruct *fileptr, const char *converted, +void edit_add(const filestruct *fileptr, const wchar_t *converted, int yval, size_t start #ifndef NANO_SMALL , size_t virt_mark_beginx, size_t virt_cur_x @@ -784,11 +912,11 @@ ) { assert(fileptr != NULL && converted != NULL); - assert(strlen(converted) <= COLS); + assert(wcswidth(converted, -1) <= COLS); /* Just paint the string in any case (we'll add color or reverse on just the text that needs it */ - mvwaddstr(edit, yval, 0, converted); + mvwaddwstr(edit, yval, 0, converted); #ifdef ENABLE_COLOR if (colorstrings != NULL && !ISSET(NO_COLOR_SYNTAX)) { @@ -809,6 +937,8 @@ int paintlen; /* number of chars to paint on this line. There are COLS * characters on a whole line. */ + int conv_index; + /* Index in converted where we paint. */ regmatch_t startmatch; /* match position for start_regexp*/ regmatch_t endmatch; /* match position for end_regexp*/ @@ -847,17 +977,18 @@ if (startmatch.rm_so <= startpos) x_start = 0; else - x_start = strnlenpt(fileptr->data, startmatch.rm_so) - - start; - paintlen = strnlenpt(fileptr->data, startmatch.rm_eo) - - start - x_start; - if (paintlen > COLS - x_start) - paintlen = COLS - x_start; - - assert(0 <= x_start && 0 < paintlen && - x_start + paintlen <= COLS); - mvwaddnstr(edit, yval, x_start, - converted + x_start, paintlen); + x_start = strnlenpt(fileptr->data, + startmatch.rm_so) - start; + + conv_index = wactual_x(converted, x_start); + + paintlen = wactual_x(converted + conv_index, + strnlenpt(fileptr->data + startmatch.rm_so, + startmatch.rm_eo - startmatch.rm_so)); + + assert(0 <= x_start && 0 <= paintlen); + mvwaddnwstr(edit, yval, x_start, + converted + conv_index, paintlen); } k = startmatch.rm_eo; } @@ -935,17 +1066,10 @@ goto step_two; /* Now paint the start of fileptr. */ - paintlen = end_line != fileptr ? COLS : - strnlenpt(fileptr->data, endmatch.rm_eo) - start; - if (paintlen > COLS) - paintlen = COLS; - - assert(0 < paintlen && paintlen <= COLS); - mvwaddnstr(edit, yval, 0, converted, paintlen); - - /* We have already painted the whole line. */ - if (paintlen == COLS) - goto skip_step_two; + paintlen = end_line != fileptr ? -1 : + wactual_x(converted, + strnlenpt(fileptr->data, endmatch.rm_eo) - start); + mvwaddnwstr(edit, yval, 0, converted, paintlen); step_two: /* Second step, we look for starts on this line. */ @@ -967,6 +1091,7 @@ else x_start = strnlenpt(fileptr->data, startmatch.rm_so) - start; + conv_index = wactual_x(converted, x_start); if (regexec(tmpcolor->end, fileptr->data + startmatch.rm_eo, 1, &endmatch, startmatch.rm_eo == 0 ? 0 : REG_NOTBOL) == 0) { @@ -979,15 +1104,12 @@ zero characters long? */ if (endmatch.rm_eo > startpos && endmatch.rm_eo > startmatch.rm_so) { - paintlen = strnlenpt(fileptr->data, endmatch.rm_eo) - - start - x_start; - if (x_start + paintlen > COLS) - paintlen = COLS - x_start; - - assert(0 <= x_start && 0 < paintlen && - x_start + paintlen <= COLS); - mvwaddnstr(edit, yval, x_start, - converted + x_start, paintlen); + paintlen = wactual_x(converted + conv_index, + strnlenpt(fileptr->data + startmatch.rm_so, + endmatch.rm_eo - startmatch.rm_so)); + assert(0 <= x_start && x_start < COLS); + mvwaddnwstr(edit, yval, x_start, + converted + conv_index, paintlen); } } else if (!searched_later_lines) { searched_later_lines = 1; @@ -1000,9 +1122,8 @@ end_line = end_line->next; if (end_line != NULL) { assert(0 <= x_start && x_start < COLS); - mvwaddnstr(edit, yval, x_start, - converted + x_start, - COLS - x_start); + mvwaddnwstr(edit, yval, x_start, + converted + conv_index, -1); /* We painted to the end of the line, so * don't bother checking any more starts. */ break; @@ -1012,7 +1133,6 @@ } /* while start_col < endpos */ } /* if (tmp_color->end != NULL) */ - skip_step_two: wattroff(edit, A_BOLD); wattroff(edit, COLOR_PAIR(tmpcolor->pairnum)); } /* for tmpcolor in colorstrings */ @@ -1029,6 +1149,8 @@ int x_start; /* Starting column for mvwaddnstr. Zero-based. */ + size_t conv_index; + /* Index in converted where we paint. */ int paintlen; /* number of chars to paint on this line. There are COLS * characters on a whole line. */ @@ -1065,11 +1187,11 @@ } assert(x_start >= 0); - if (x_start + paintlen > COLS) - paintlen = COLS - x_start; + conv_index = wactual_x(converted, x_start); + paintlen = wactual_x(converted + conv_index, paintlen); if (paintlen > 0) { wattron(edit, A_REVERSE); - mvwaddnstr(edit, yval, x_start, converted + x_start, + mvwaddnwstr(edit, yval, x_start, converted + conv_index, paintlen); wattroff(edit, A_REVERSE); } @@ -1103,7 +1225,7 @@ int virt_cur_x; int virt_mark_beginx; #endif - char *converted; + wchar_t *converted; /* fileptr->data converted to have tabs and control characters * expanded. */ size_t page_start; @@ -1138,7 +1260,7 @@ /* Expand the line, replacing Tab by spaces, and control characters * by their display form. */ - converted = display_string(fileptr->data, page_start, COLS); + converted = display_string(fileptr->data, page_start, COLS, TRUE); /* Now, paint the line */ edit_add(fileptr, converted, line, page_start @@ -1426,24 +1548,24 @@ if (COLS >= 4) { char *bar; - char *foo; + wchar_t *foo; int start_x = 0; size_t foo_len; - bar = charalloc(COLS - 3); - vsnprintf(bar, COLS - 3, msg, ap); + bar = charalloc((COLS - 3) * MB_CUR_MAX); + vsnprintf(bar, (COLS - 3) * MB_CUR_MAX, msg, ap); va_end(ap); - foo = display_string(bar, 0, COLS - 4); + foo = display_string(bar, 0, COLS - 4, FALSE); free(bar); - foo_len = strlen(foo); + foo_len = wcswidth(foo, -1); start_x = (COLS - foo_len - 4) / 2; wmove(bottomwin, 0, start_x); wattron(bottomwin, A_REVERSE); waddstr(bottomwin, "[ "); - waddstr(bottomwin, foo); + waddwstr(bottomwin, foo); free(foo); waddstr(bottomwin, " ]"); @@ -1732,25 +1854,25 @@ /* Highlight the current word being replaced or spell checked. We expect * word to have tabs and control characters expanded. */ -void do_replace_highlight(int highlight_flag, const char *word) +void do_replace_highlight(int highlight_flag, const wchar_t *word) { - int y = xplustabs(); - size_t word_len = strlen(word); + size_t y = xplustabs(); + size_t word_len = wcswidth(word, -1); y = get_page_start(y) + COLS - y; - /* Now y is the number of characters we can display on this line. */ + /* Now y is the number of columns we can display on this line. */ + assert(y > 0); + if (word_len > y) + y--; reset_cursor(); if (highlight_flag) wattron(edit, A_REVERSE); - waddnstr(edit, word, y - 1); - + waddnwstr(edit, word, wactual_x(word, y)); if (word_len > y) waddch(edit, '$'); - else if (word_len == y) - waddch(edit, word[word_len - 1]); if (highlight_flag) wattroff(edit, A_REVERSE);