@@ -10,8 +10,10 @@ in the source distribution for its full text.
10
10
#include "XUtils.h"
11
11
12
12
#include <assert.h>
13
+ #include <ctype.h> // IWYU pragma: keep
13
14
#include <errno.h>
14
15
#include <fcntl.h>
16
+ #include <limits.h> // IWYU pragma: keep
15
17
#include <math.h>
16
18
#include <stdarg.h>
17
19
#include <stdint.h>
@@ -224,6 +226,257 @@ size_t String_safeStrncpy(char* restrict dest, const char* restrict src, size_t
224
226
return i ;
225
227
}
226
228
229
+ #ifdef HAVE_LIBNCURSESW
230
+ static void String_encodeWChar (WCharEncoderState * ps , wchar_t wc ) {
231
+ assert (!ps -> buf || ps -> pos < ps -> size );
232
+
233
+ char tempBuf [MB_LEN_MAX ];
234
+ char * dest = ps -> buf ? (char * )ps -> buf + ps -> pos : tempBuf ;
235
+
236
+ // It is unnecessarily expensive to fix the output string if the caller
237
+ // gives an incorrect buffer size. This function would not support any
238
+ // truncation of the output string.
239
+ size_t len = wcrtomb (dest , wc , & ps -> mbState );
240
+ assert (len > 0 );
241
+ if (len == (size_t )-1 ) {
242
+ assert (len != (size_t )-1 );
243
+ fail ();
244
+ }
245
+ if (ps -> buf && len > ps -> size - ps -> pos ) {
246
+ assert (!ps -> buf || len <= ps -> size - ps -> pos );
247
+ fail ();
248
+ }
249
+
250
+ ps -> pos += len ;
251
+ }
252
+ #else
253
+ static void String_encodeWChar (WCharEncoderState * ps , int c ) {
254
+ assert (!ps -> buf || ps -> pos < ps -> size );
255
+
256
+ char * buf = ps -> buf ;
257
+ if (buf ) {
258
+ buf [ps -> pos ] = (char )c ;
259
+ }
260
+
261
+ ps -> pos += 1 ;
262
+ }
263
+ #endif
264
+
265
+ void EncodePrintableString (WCharEncoderState * ps , const char * src , size_t maxLen , EncodeWChar encodeWChar ) {
266
+ assert (src || maxLen == 0 );
267
+
268
+ size_t pos = 0 ;
269
+ bool wasReplaced = false;
270
+
271
+ #ifdef HAVE_LIBNCURSESW
272
+ const wchar_t replacementChar = CRT_utf8 ? L'\xFFFD' : L'?' ;
273
+ wchar_t ch ;
274
+
275
+ mbstate_t decState ;
276
+ memset (& decState , 0 , sizeof (decState ));
277
+ #else
278
+ const char replacementChar = '?' ;
279
+ char ch ;
280
+ #endif
281
+
282
+ do {
283
+ size_t len = 0 ;
284
+ bool shouldReplace = false;
285
+ ch = 0 ;
286
+
287
+ if (pos < maxLen ) {
288
+ // Read the next character from the byte sequence
289
+ #ifdef HAVE_LIBNCURSESW
290
+ mbstate_t newState ;
291
+ memcpy (& newState , & decState , sizeof (newState ));
292
+ len = mbrtowc (& ch , & src [pos ], maxLen - pos , & newState );
293
+
294
+ assert (len != 0 || ch == 0 );
295
+ switch (len ) {
296
+ case (size_t )-2 :
297
+ errno = EILSEQ ;
298
+ shouldReplace = true;
299
+ len = maxLen - pos ;
300
+ break ;
301
+
302
+ case (size_t )-1 :
303
+ shouldReplace = true;
304
+ len = 1 ;
305
+ break ;
306
+
307
+ default :
308
+ memcpy (& decState , & newState , sizeof (decState ));
309
+ }
310
+ #else
311
+ len = 1 ;
312
+ ch = src [pos ];
313
+ #endif
314
+ }
315
+
316
+ pos += len ;
317
+
318
+ // Filter unprintable characters
319
+ if (!shouldReplace && ch != 0 ) {
320
+ #ifdef HAVE_LIBNCURSESW
321
+ shouldReplace = !iswprint (ch );
322
+ #else
323
+ shouldReplace = !isprint ((unsigned char )ch );
324
+ #endif
325
+ }
326
+
327
+ if (shouldReplace ) {
328
+ ch = replacementChar ;
329
+ if (wasReplaced ) {
330
+ continue ;
331
+ }
332
+ }
333
+ wasReplaced = shouldReplace ;
334
+
335
+ encodeWChar (ps , ch );
336
+ } while (ch != 0 );
337
+ }
338
+
339
+ char * String_makePrintable (const char * str , size_t maxLen ) {
340
+ WCharEncoderState encState ;
341
+
342
+ memset (& encState , 0 , sizeof (encState ));
343
+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
344
+ size_t size = encState .pos ;
345
+ assert (size > 0 );
346
+
347
+ memset (& encState , 0 , sizeof (encState ));
348
+ char * buf = xMalloc (size );
349
+ encState .size = size ;
350
+ encState .buf = buf ;
351
+ EncodePrintableString (& encState , str , maxLen , String_encodeWChar );
352
+ assert (encState .pos == size );
353
+
354
+ return buf ;
355
+ }
356
+
357
+ bool String_decodeNextWChar (MBStringDecoderState * ps ) {
358
+ if (!ps -> str || ps -> maxLen == 0 ) {
359
+ return false;
360
+ }
361
+
362
+ // If the previous call of this function encounters an invalid sequence,
363
+ // do not continue (because the "mbState" object for mbrtowc() is
364
+ // undefined). The caller is supposed to reset the state.
365
+ #ifdef HAVE_LIBNCURSESW
366
+ bool isStateDefined = ps -> ch != WEOF ;
367
+ #else
368
+ bool isStateDefined = ps -> ch != EOF ;
369
+ #endif
370
+ if (!isStateDefined ) {
371
+ return false;
372
+ }
373
+
374
+ #ifdef HAVE_LIBNCURSESW
375
+ wchar_t wc ;
376
+ size_t len = mbrtowc (& wc , ps -> str , ps -> maxLen , & ps -> mbState );
377
+ switch (len ) {
378
+ case (size_t )-1 :
379
+ // Invalid sequence
380
+ ps -> ch = WEOF ;
381
+ return false;
382
+
383
+ case (size_t )-2 :
384
+ // Incomplete sequence
385
+ ps -> str += ps -> maxLen ;
386
+ ps -> maxLen = 0 ;
387
+ return false;
388
+
389
+ case 0 :
390
+ assert (wc == 0 );
391
+
392
+ ps -> str = NULL ;
393
+ ps -> maxLen = 0 ;
394
+ ps -> ch = wc ;
395
+ return true;
396
+
397
+ default :
398
+ ps -> str += len ;
399
+ ps -> maxLen -= len ;
400
+ ps -> ch = wc ;
401
+ }
402
+ return true;
403
+ #else
404
+ ps -> ch = * ps -> str ;
405
+ if (ps -> ch == 0 ) {
406
+ ps -> str = NULL ;
407
+ ps -> maxLen = 0 ;
408
+ } else {
409
+ ps -> str ++ ;
410
+ ps -> maxLen -- ;
411
+ }
412
+ return true;
413
+ #endif
414
+ }
415
+
416
+ #ifndef HAVE_STRNLEN
417
+ static size_t strnlen (const char * str , size_t maxLen ) {
418
+ for (size_t len = 0 ; len < maxLen ; len ++ ) {
419
+ if (!str [len ]) {
420
+ return len ;
421
+ }
422
+ }
423
+ return maxLen ;
424
+ }
425
+ #endif
426
+
427
+ int String_mbswidth (const char * * str , size_t maxLen , int maxWidth ) {
428
+ assert (* str || maxLen == 0 );
429
+
430
+ if (maxWidth < 0 )
431
+ maxWidth = INT_MAX ;
432
+
433
+ #ifdef HAVE_LIBNCURSESW
434
+ MBStringDecoderState state ;
435
+ memset (& state , 0 , sizeof (state ));
436
+ state .str = * str ;
437
+ state .maxLen = maxLen ;
438
+
439
+ int totalWidth = 0 ;
440
+
441
+ while (String_decodeNextWChar (& state )) {
442
+ if (state .ch == 0 )
443
+ break ;
444
+
445
+ int w = wcwidth ((wchar_t )state .ch );
446
+ if (w < 0 ) {
447
+ assert (w >= 0 );
448
+ break ;
449
+ }
450
+
451
+ if (w > maxWidth - totalWidth )
452
+ break ;
453
+
454
+ totalWidth += w ;
455
+
456
+ // If the character takes zero columns, include the character in the
457
+ // substring if the working encoding is UTF-8, and ignore it otherwise.
458
+ // In Unicode, combining characters are always placed after the base
459
+ // character, but some legacy 8-bit encodings instead place combining
460
+ // characters before the base character.
461
+ if (w <= 0 && !CRT_utf8 ) {
462
+ continue ;
463
+ }
464
+
465
+ // (*str - start) will represent the length of the substring bounded
466
+ // by the width limit.
467
+ * str = state .str ;
468
+ }
469
+
470
+ assert (state .ch != WEOF );
471
+ return totalWidth ;
472
+ #else
473
+ maxLen = MINIMUM ((unsigned int )maxWidth , maxLen );
474
+ size_t len = strnlen (* str , maxLen );
475
+ * str += len ;
476
+ return (int )len ;
477
+ #endif
478
+ }
479
+
227
480
int xAsprintf (char * * strp , const char * fmt , ...) {
228
481
va_list vl ;
229
482
va_start (vl , fmt );
0 commit comments