qemacs-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemacs-commit] qemacs buffer.c unihex.c tests/TestPage.txt


From: Charlie Gordon
Subject: [Qemacs-commit] qemacs buffer.c unihex.c tests/TestPage.txt
Date: Sat, 04 Jan 2014 15:54:49 +0000

CVSROOT:        /sources/qemacs
Module name:    qemacs
Changes by:     Charlie Gordon <chqrlie>        14/01/04 15:54:49

Modified files:
        .              : buffer.c unihex.c 
        tests          : TestPage.txt 

Log message:
        improve unicode hex display mode
        
        * filter control characters and non BMP1 code points
        * new mode specific modeline
        * do not space out single width glyphs (not very readable)
        * eb_get_char_offset: add validation on offset argument
        * eb_get_char_offset: align offset argument on character boundary

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/qemacs/buffer.c?cvsroot=qemacs&r1=1.47&r2=1.48
http://cvs.savannah.gnu.org/viewcvs/qemacs/unihex.c?cvsroot=qemacs&r1=1.16&r2=1.17
http://cvs.savannah.gnu.org/viewcvs/qemacs/tests/TestPage.txt?cvsroot=qemacs&r1=1.1.1.1&r2=1.2

Patches:
Index: buffer.c
===================================================================
RCS file: /sources/qemacs/qemacs/buffer.c,v
retrieving revision 1.47
retrieving revision 1.48
diff -u -b -r1.47 -r1.48
--- buffer.c    23 Dec 2013 23:26:43 -0000      1.47
+++ buffer.c    4 Jan 2014 15:54:48 -0000       1.48
@@ -2,7 +2,7 @@
  * Buffer handling for QEmacs
  *
  * Copyright (c) 2000 Fabrice Bellard.
- * Copyright (c) 2002-2013 Charlie Gordon.
+ * Copyright (c) 2002-2014 Charlie Gordon.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -1092,9 +1092,24 @@
     int pos;
     Page *p, *p_end;
 
+    if (offset < 0)
+        offset = 0;
+
     if (!b->charset->variable_size) {
+        /* offset is round down to character boundary */
         pos = min(offset, b->total_size) / b->charset->char_size;
     } else {
+        if (b->charset == &charset_utf8) {
+            /* Round offset down to character boundary */
+            u8 buf[1];
+            while (offset > 0 && eb_read(b, offset, buf, 1) == 1 &&
+                   (buf[0] & 0xC0) == 0x80) {
+                /* backtrack over trailing bytes */
+                offset--;
+            }
+        } else {
+            /* CG: XXX: offset rounding to character boundary is undefined */
+        }
         pos = 0;
         p = b->page_table;
         p_end = p + b->nb_pages;

Index: unihex.c
===================================================================
RCS file: /sources/qemacs/qemacs/unihex.c,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -b -r1.16 -r1.17
--- unihex.c    22 Apr 2008 09:04:20 -0000      1.16
+++ unihex.c    4 Jan 2014 15:54:48 -0000       1.17
@@ -1,8 +1,8 @@
 /*
  * Unicode Hexadecimal mode for QEmacs.
  *
- * Copyright (c) 2000, 2001 Fabrice Bellard.
- * Copyright (c) 2002-2008 Charlie Gordon.
+ * Copyright (c) 2000-2001 Fabrice Bellard.
+ * Copyright (c) 2002-2014 Charlie Gordon.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -38,16 +38,13 @@
     return 0;
 }
 
-static int to_disp(int c)
+static int unihex_to_disp(int c)
 {
-#if 1
-    /* Do not allow characters in range 160-255 to show as graphics */
-    if ((c & 127) < ' ' || c == 127)
-        c = '.';
-#else
-    if (c < ' ' || c >= 127)
+    /* Do not allow characters in range 127-160 to show as graphics
+     * nor characters beyond the BMP plane
+     */
+    if (c < ' ' || c == 127 || (c >= 128 && c < 160) || c > 0xFFFF)
         c = '.';
-#endif
     return c;
 }
 
@@ -55,6 +52,7 @@
 {
     int pos;
 
+    /* CG: beware: offset may fall inside a character */
     pos = eb_get_char_offset(s->b, offset);
     pos = align(pos, s->disp_width);
     return eb_goto_char(s->b, pos);
@@ -62,12 +60,10 @@
 
 static int unihex_display(EditState *s, DisplayState *ds, int offset)
 {
-    int j, len, ateof;
+    int j, len, ateof, disp_width;
     int offset1, offset2, charpos;
     unsigned int b;
-    /* CG: array size is incorrect, should be smaller and should clip
-     * disp_width too.
-     */
+    /* CG: array size is incorrect, should be smaller */
     unsigned int buf[LINE_MAX_SIZE];
     unsigned int pos[LINE_MAX_SIZE];
 
@@ -75,28 +71,34 @@
 
     ds->style = QE_STYLE_COMMENT;
     charpos = eb_get_char_offset(s->b, offset);
-    display_printf(ds, -1, -1, "%08x %08x ", charpos, offset);
+    display_printf(ds, -1, -1, "%08x ", charpos);
+    //display_printf(ds, -1, -1, "%08x %08x ", charpos, offset);
 
+    disp_width = min(LINE_MAX_SIZE - 1, s->disp_width);
     ateof = 0;
     len = 0;
-    for (j = 0; j < s->disp_width; j++) {
-        if (offset < s->b->total_size) {
+    for (j = 0; j < disp_width && offset < s->b->total_size; j++) {
             pos[len] = offset;
             buf[len] = eb_nextc(s->b, offset, &offset);
             len++;
         }
-    }
     pos[len] = offset;
 
     ds->style = QE_STYLE_FUNCTION;
 
-    for (j = 0; j < s->disp_width; j++) {
+    for (j = 0; j < disp_width; j++) {
         display_char(ds, -1, -1, ' ');
         offset1 = pos[j];
         offset2 = pos[j + 1];
         if (j < len) {
+            if (buf[j] < 0x10000) {
             display_printhex(ds, offset1, offset2, buf[j], 4);
         } else {
+                ds->cur_hex_mode = 1;
+                display_printf(ds, offset1, offset2, "%x", buf[j]);
+                ds->cur_hex_mode = 0;
+            }
+        } else {
             if (!ateof) {
                 ateof = 1;
                 offset2 = offset1 + 1;
@@ -117,13 +119,12 @@
     display_char(ds, -1, -1, ' ');
 
     ateof = 0;
-    for (j = 0; j < s->disp_width; j++) {
+    for (j = 0; j < disp_width; j++) {
         offset1 = pos[j];
         offset2 = pos[j + 1];
         if (j < len) {
             b = buf[j];
-            /* CG: should handle double width glyphs */
-            b = to_disp(b);
+            b = unihex_to_disp(b);
         } else {
             b = ' ';
             if (!ateof) {
@@ -134,10 +135,15 @@
             }
         }
         display_char(ds, offset1, offset2, b);
+#if 0
+        /* CG: spacing out single width glyphs is less readable */
+        if (unicode_glyph_tty_width(b) == 1)
+            display_char(ds, -1, -1, ' ');
+#endif
     }
     display_eol(ds, -1, -1);
 
-    if (len >= s->disp_width)
+    if (len >= disp_width)
         return offset;
     else
         return -1;
@@ -184,8 +190,24 @@
     s->offset = eb_goto_char(s->b, pos);
 }
 
+static int unihex_mode_line(EditState *s, char *buf, int buf_size)
+{
+    int percent, pos, cpos;
+
+    cpos = eb_get_char_offset(s->b, s->offset);
+
+    pos = basic_mode_line(s, buf, buf_size, '-');
+    pos += snprintf(buf + pos, buf_size - pos, "0x%x--0x%x--%s",
+                    cpos, s->offset, s->b->charset->name);
+    percent = 0;
+    if (s->b->total_size > 0)
+        percent = (s->offset * 100) / s->b->total_size;
+    pos += snprintf(buf + pos, buf_size - pos, "--%d%%", percent);
+    return pos;
+}
+
 static ModeDef unihex_mode = {
-    "unihex",
+    .name = "unihex",
     .instance_size = 0,
     .mode_probe = NULL,
     .mode_init = unihex_mode_init,
@@ -200,6 +222,7 @@
     .scroll_up_down = text_scroll_up_down,
     .write_char = hex_write_char,
     .mouse_goto = text_mouse_goto,
+    .get_mode_line = unihex_mode_line,
 };
 
 

Index: tests/TestPage.txt
===================================================================
RCS file: /sources/qemacs/qemacs/tests/TestPage.txt,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -b -r1.1.1.1 -r1.2
--- tests/TestPage.txt  29 May 2004 10:19:30 -0000      1.1.1.1
+++ tests/TestPage.txt  4 Jan 2014 15:54:48 -0000       1.2
@@ -35,3 +35,10 @@
 ************ French - Français ****************
 Juste un petit exemple pour dire que les français aussi
 ont à cœur de pouvoir utiliser tous leurs caractères ! :)
+************ Non BMP1 characters ****************
+𐀀𐀁𐀂𐀃🿿
+𐀀𠀀񀀀򀀀󿿿
+ô€€€õ€€€ö€€€÷€€€÷¿¿¿
+øˆ€€€ø€€€ø €€€ø¿¿¿¿
+ù€€€€ú€€€€ü„€€€€üˆ€€€€
+ü€€€€ü €€€€ý€€€€€ý¿¿¿¿¿



reply via email to

[Prev in Thread] Current Thread [Next in Thread]