Unicode character support in screen tab names

Explorer09 · Explorer09 · commit 2ca232d724ba · 2025-03-22T03:58:46.000+08:00
diff --git a/Action.c b/Action.c
@@ -408,13 +408,14 @@ Htop_Reaction Action_setScreenTab(State* st, int x) {
          return 0;
       }
       const char* tab = settings->screens[i]->heading;
-      int len = strlen(tab);
-      if (x < s + len + 2) {
+      const char* ptr = tab;
+      int width = String_mbswidth(&ptr, SIZE_MAX, INT_MAX);
+      if (x < s + width + 2) {
          settings->ssIndex = i;
          setActiveScreen(settings, st, i);
          return HTOP_UPDATE_PANELHDR | HTOP_REFRESH | HTOP_REDRAW_BAR;
       }
-      s += len + 2 + SCREEN_TAB_COLUMN_GAP;
+      s += width + 2 + SCREEN_TAB_COLUMN_GAP;
    }
    return 0;
 }
diff --git a/ScreenManager.c b/ScreenManager.c
@@ -166,11 +166,11 @@ static inline bool drawTab(const int* y, int* x, int l, const char* name, bool c
    (*x)++;
    if (*x >= l)
       return false;
-   int nameLen = strlen(name);
-   int n = MINIMUM(l - *x, nameLen);
+   const char* ptr = name;
+   int nameWidth = String_mbswidth(&ptr, SIZE_MAX, l - *x);
    attrset(CRT_colors[cur ? SCREENS_CUR_TEXT : SCREENS_OTH_TEXT]);
-   mvaddnstr(*y, *x, name, n);
-   *x += n;
+   mvaddnstr(*y, *x, name, (int)(ptr - name));
+   *x += nameWidth;
    if (*x >= l)
       return false;
    attrset(CRT_colors[cur ? SCREENS_CUR_BORDER : SCREENS_OTH_BORDER]);
diff --git a/XUtils.c b/XUtils.c
@@ -10,8 +10,10 @@ in the source distribution for its full text.
 #include "XUtils.h"
 
 #include <assert.h>
+#include <ctype.h> // IWYU pragma: keep
 #include <errno.h>
 #include <fcntl.h>
+#include <limits.h> // IWYU pragma: keep
 #include <math.h>
 #include <stdarg.h>
 #include <stdint.h>
@@ -224,6 +226,257 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t
    return i;
 }
 
+#ifdef HAVE_LIBNCURSESW
+static void String_encodeWChar(WCharEncoderState* ps, wchar_t wc) {
+   assert(!ps->buf || ps->pos < ps->size);
+
+   char tempBuf[MB_LEN_MAX];
+   char* dest = ps->buf ? (char*)ps->buf + ps->pos : tempBuf;
+
+   // It is unnecessarily expensive to fix the output string if the caller
+   // gives an incorrect buffer size. This function would not support any
+   // truncation of the output string.
+   size_t len = wcrtomb(dest, wc, &ps->mbState);
+   assert(len > 0);
+   if (len == (size_t)-1) {
+      assert(len != (size_t)-1);
+      fail();
+   }
+   if (ps->buf && len > ps->size - ps->pos) {
+      assert(!ps->buf || len <= ps->size - ps->pos);
+      fail();
+   }
+
+   ps->pos += len;
+}
+#else
+static void String_encodeWChar(WCharEncoderState* ps, int c) {
+   assert(!ps->buf || ps->pos < ps->size);
+
+   char* buf = ps->buf;
+   if (buf) {
+      buf[ps->pos] = (char)c;
+   }
+
+   ps->pos += 1;
+}
+#endif
+
+void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar) {
+   assert(src || maxLen == 0);
+
+   size_t pos = 0;
+   bool wasReplaced = false;
+
+#ifdef HAVE_LIBNCURSESW
+   const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?';
+   wchar_t ch;
+
+   mbstate_t decState;
+   memset(&decState, 0, sizeof(decState));
+#else
+   const char replacementChar = '?';
+   char ch;
+#endif
+
+   do {
+      size_t len = 0;
+      bool shouldReplace = false;
+      ch = 0;
+
+      if (pos < maxLen) {
+         // Read the next character from the byte sequence
+#ifdef HAVE_LIBNCURSESW
+         mbstate_t newState;
+         memcpy(&newState, &decState, sizeof(newState));
+         len = mbrtowc(&ch, &src[pos], maxLen - pos, &newState);
+
+         assert(len != 0 || ch == 0);
+         switch (len) {
+         case (size_t)-2:
+            errno = EILSEQ;
+            shouldReplace = true;
+            len = maxLen - pos;
+            break;
+
+         case (size_t)-1:
+            shouldReplace = true;
+            len = 1;
+            break;
+
+         default:
+            memcpy(&decState, &newState, sizeof(decState));
+         }
+#else
+         len = 1;
+         ch = src[pos];
+#endif
+      }
+
+      pos += len;
+
+      // Filter unprintable characters
+      if (!shouldReplace && ch != 0) {
+#ifdef HAVE_LIBNCURSESW
+         shouldReplace = !iswprint(ch);
+#else
+         shouldReplace = !isprint((unsigned char)ch);
+#endif
+      }
+
+      if (shouldReplace) {
+         ch = replacementChar;
+         if (wasReplaced) {
+            continue;
+         }
+      }
+      wasReplaced = shouldReplace;
+
+      encodeWChar(ps, ch);
+   } while (ch != 0);
+}
+
+char* String_makePrintable(const char* str, size_t maxLen) {
+   WCharEncoderState encState;
+
+   memset(&encState, 0, sizeof(encState));
+   EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
+   size_t size = encState.pos;
+   assert(size > 0);
+
+   memset(&encState, 0, sizeof(encState));
+   char* buf = xMalloc(size);
+   encState.size = size;
+   encState.buf = buf;
+   EncodePrintableString(&encState, str, maxLen, String_encodeWChar);
+   assert(encState.pos == size);
+
+   return buf;
+}
+
+bool String_decodeNextWChar(MBStringDecoderState* ps) {
+   if (!ps->str || ps->maxLen == 0) {
+      return false;
+   }
+
+   // If the previous call of this function encounters an invalid sequence,
+   // do not continue (because the "mbState" object for mbrtowc() is
+   // undefined). The caller is supposed to reset the state.
+#ifdef HAVE_LIBNCURSESW
+   bool isStateDefined = ps->ch != WEOF;
+#else
+   bool isStateDefined = ps->ch != EOF;
+#endif
+   if (!isStateDefined) {
+      return false;
+   }
+
+#ifdef HAVE_LIBNCURSESW
+   wchar_t wc;
+   size_t len = mbrtowc(&wc, ps->str, ps->maxLen, &ps->mbState);
+   switch (len) {
+   case (size_t)-1:
+      // Invalid sequence
+      ps->ch = WEOF;
+      return false;
+
+   case (size_t)-2:
+      // Incomplete sequence
+      ps->str += ps->maxLen;
+      ps->maxLen = 0;
+      return false;
+
+   case 0:
+      assert(wc == 0);
+
+      ps->str = NULL;
+      ps->maxLen = 0;
+      ps->ch = wc;
+      return true;
+
+   default:
+      ps->str += len;
+      ps->maxLen -= len;
+      ps->ch = wc;
+   }
+   return true;
+#else
+   ps->ch = *ps->str;
+   if (ps->ch == 0) {
+      ps->str = NULL;
+      ps->maxLen = 0;
+   } else {
+      ps->str++;
+      ps->maxLen--;
+   }
+   return true;
+#endif
+}
+
+#ifndef HAVE_STRNLEN
+static size_t strnlen(const char* str, size_t maxLen) {
+   for (size_t len = 0; len < maxLen; len++) {
+      if (!str[len]) {
+         return len;
+      }
+   }
+   return maxLen;
+}
+#endif
+
+int String_mbswidth(const char** str, size_t maxLen, int maxWidth) {
+   assert(*str || maxLen == 0);
+
+   if (maxWidth < 0)
+      maxWidth = INT_MAX;
+
+#ifdef HAVE_LIBNCURSESW
+   MBStringDecoderState state;
+   memset(&state, 0, sizeof(state));
+   state.str = *str;
+   state.maxLen = maxLen;
+
+   int totalWidth = 0;
+
+   while (String_decodeNextWChar(&state)) {
+      if (state.ch == 0)
+         break;
+
+      int w = wcwidth((wchar_t)state.ch);
+      if (w < 0) {
+         assert(w >= 0);
+         break;
+      }
+
+      if (w > maxWidth - totalWidth)
+         break;
+
+      totalWidth += w;
+
+      // If the character takes zero columns, include the character in the
+      // substring if the working encoding is UTF-8, and ignore it otherwise.
+      // In Unicode, combining characters are always placed after the base
+      // character, but some legacy 8-bit encodings instead place combining
+      // characters before the base character.
+      if (w <= 0 && !CRT_utf8) {
+         continue;
+      }
+
+      // (*str - start) will represent the length of the substring bounded
+      // by the width limit.
+      *str = state.str;
+   }
+
+   assert(state.ch != WEOF);
+   return totalWidth;
+#else
+   maxLen = MINIMUM((unsigned int)maxWidth, maxLen);
+   size_t len = strnlen(*str, maxLen);
+   *str += len;
+   return (int)len;
+#endif
+}
+
 int xAsprintf(char** strp, const char* fmt, ...) {
    va_list vl;
    va_start(vl, fmt);
diff --git a/XUtils.h b/XUtils.h
@@ -23,7 +23,32 @@ in the source distribution for its full text.
 
 #include "Compat.h"
 #include "Macros.h"
+#include "ProvideCurses.h"
+
+
+typedef struct WCharEncoderState_ {
+   size_t pos;
+   size_t size;
+   void* buf;
+   mbstate_t mbState;
+} WCharEncoderState;
+
+typedef struct MBStringDecoderState_ {
+   const char* str;
+   size_t maxLen;
+#ifdef HAVE_LIBNCURSESW
+   wint_t ch;
+   mbstate_t mbState;
+#else
+   int ch;
+#endif
+} MBStringDecoderState;
 
+#ifdef HAVE_LIBNCURSESW
+typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, wchar_t wc);
+#else
+typedef ATTR_NONNULL void (*EncodeWChar)(WCharEncoderState* ps, int c);
+#endif
 
 ATTR_NORETURN
 void fail(void);
@@ -102,6 +127,18 @@ static inline char* String_strchrnul(const char* s, int c) {
 ATTR_NONNULL ATTR_ACCESS3_W(1, 3) ATTR_ACCESS3_R(2, 3)
 size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t size);
 
+ATTR_NONNULL_N(1, 4) ATTR_ACCESS2_W(1) ATTR_ACCESS3_R(2, 3)
+void EncodePrintableString(WCharEncoderState* ps, const char* src, size_t maxLen, EncodeWChar encodeWChar);
+
+ATTR_RETNONNULL ATTR_MALLOC ATTR_ACCESS3_R(1, 2)
+char* String_makePrintable(const char* str, size_t maxLen);
+
+ATTR_NONNULL
+bool String_decodeNextWChar(MBStringDecoderState* ps);
+
+ATTR_NONNULL ATTR_ACCESS2_RW(1)
+int String_mbswidth(const char** str, size_t maxLen, int maxWidth);
+
 ATTR_FORMAT(printf, 2, 3) ATTR_NONNULL_N(1, 2)
 int xAsprintf(char** strp, const char* fmt, ...);
 
diff --git a/configure.ac b/configure.ac
@@ -372,6 +372,7 @@ AC_CHECK_FUNCS([ \
     sched_getscheduler \
     sched_setscheduler \
     strchrnul \
+    strnlen \
    ])
 
 if test "$my_htop_platform" = darwin; then

Original file line number	Diff line number	Diff line change
`@@ -408,13 +408,14 @@ Htop_Reaction Action_setScreenTab(State* st, int x) {`
`408`	`408`	`return 0;`
`409`	`409`	`}`
`410`	`410`	`const char* tab = settings->screens[i]->heading;`
`411`		`- int len = strlen(tab);`
`412`		`- if (x < s + len + 2) {`
	`411`	`+ const char* ptr = tab;`
	`412`	`+ int width = String_mbswidth(&ptr, SIZE_MAX, INT_MAX);`
	`413`	`+ if (x < s + width + 2) {`
`413`	`414`	`settings->ssIndex = i;`
`414`	`415`	`setActiveScreen(settings, st, i);`
`415`	`416`	`return HTOP_UPDATE_PANELHDR \| HTOP_REFRESH \| HTOP_REDRAW_BAR;`
`416`	`417`	`}`
`417`		`- s += len + 2 + SCREEN_TAB_COLUMN_GAP;`
	`418`	`+ s += width + 2 + SCREEN_TAB_COLUMN_GAP;`
`418`	`419`	`}`
`419`	`420`	`return 0;`
`420`	`421`	`}`