60 #define UNI_SUR_HIGH_START (UTF32)0xD800
61 #define UNI_SUR_HIGH_END (UTF32)0xDBFF
62 #define UNI_SUR_LOW_START (UTF32)0xDC00
63 #define UNI_SUR_LOW_END (UTF32)0xDFFF
77 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
78 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
79 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
80 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
81 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
82 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
83 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
84 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
93 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
118 const UTF32** sourceStart,
const UTF32* sourceEnd,
121 const UTF32* source = *sourceStart;
122 UTF16* target = *targetStart;
123 while (source < sourceEnd) {
125 if (target >= targetEnd) {
140 *target++ = (
UTF16)ch;
150 if (target + 1 >= targetEnd) {
159 *sourceStart = source;
160 *targetStart = target;
167 const UTF16** sourceStart,
const UTF16* sourceEnd,
170 const UTF16* source = *sourceStart;
171 UTF32* target = *targetStart;
173 while (source < sourceEnd) {
174 const UTF16* oldSource = source;
179 if (source < sourceEnd) {
204 if (target >= targetEnd) {
210 *sourceStart = source;
211 *targetStart = target;
214 fprintf(stderr,
"ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
221 const UTF16** sourceStart,
const UTF16* sourceEnd,
224 const UTF16* source = *sourceStart;
225 UTF8* target = *targetStart;
226 while (source < sourceEnd) {
228 unsigned short bytesToWrite = 0;
229 const UTF32 byteMask = 0xBF;
230 const UTF32 byteMark = 0x80;
231 const UTF16* oldSource = source;
236 if (source < sourceEnd) {
262 if (ch < (
UTF32)0x80) { bytesToWrite = 1;
263 }
else if (ch < (
UTF32)0x800) { bytesToWrite = 2;
264 }
else if (ch < (
UTF32)0x10000) { bytesToWrite = 3;
265 }
else if (ch < (
UTF32)0x110000) { bytesToWrite = 4;
266 }
else { bytesToWrite = 3;
270 target += bytesToWrite;
271 if (target > targetEnd) {
275 switch (bytesToWrite) {
276 case 4: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
277 case 3: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
278 case 2: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
281 target += bytesToWrite;
283 *sourceStart = source;
284 *targetStart = target;
291 const UTF32** sourceStart,
const UTF32* sourceEnd,
294 const UTF32* source = *sourceStart;
295 UTF8* target = *targetStart;
296 while (source < sourceEnd) {
298 unsigned short bytesToWrite = 0;
299 const UTF32 byteMask = 0xBF;
300 const UTF32 byteMark = 0x80;
314 if (ch < (
UTF32)0x80) { bytesToWrite = 1;
315 }
else if (ch < (
UTF32)0x800) { bytesToWrite = 2;
316 }
else if (ch < (
UTF32)0x10000) { bytesToWrite = 3;
318 }
else { bytesToWrite = 3;
323 target += bytesToWrite;
324 if (target > targetEnd) {
328 switch (bytesToWrite) {
329 case 4: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
330 case 3: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
331 case 2: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
334 target += bytesToWrite;
336 *sourceStart = source;
337 *targetStart = target;
356 const UTF8 *srcptr = source+length;
358 default:
return false;
360 case 4:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
361 case 3:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
362 case 2:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
366 case 0xE0:
if (a < 0xA0)
return false;
break;
367 case 0xED:
if (a > 0x9F)
return false;
break;
368 case 0xF0:
if (a < 0x90)
return false;
break;
369 case 0xF4:
if (a > 0x8F)
return false;
break;
370 default:
if (a < 0x80)
return false;
373 case 1:
if (*source >= 0x80 && *source < 0xC2)
return false;
375 if (*source > 0xF4)
return false;
387 if (length > sourceEnd - source) {
410 while (*source != sourceEnd) {
412 if (length > sourceEnd - *source || !
isLegalUTF8(*source, length))
422 const UTF8** sourceStart,
const UTF8* sourceEnd,
425 const UTF8* source = *sourceStart;
426 UTF16* target = *targetStart;
427 while (source < sourceEnd) {
430 if (extraBytesToRead >= sourceEnd - source) {
441 switch (extraBytesToRead) {
442 case 5: ch += *source++; ch <<= 6;
443 case 4: ch += *source++; ch <<= 6;
444 case 3: ch += *source++; ch <<= 6;
445 case 2: ch += *source++; ch <<= 6;
446 case 1: ch += *source++; ch <<= 6;
447 case 0: ch += *source++;
451 if (target >= targetEnd) {
452 source -= (extraBytesToRead+1);
459 source -= (extraBytesToRead+1);
466 *target++ = (
UTF16)ch;
471 source -= (extraBytesToRead+1);
478 if (target + 1 >= targetEnd) {
479 source -= (extraBytesToRead+1);
487 *sourceStart = source;
488 *targetStart = target;
495 const UTF8** sourceStart,
const UTF8* sourceEnd,
498 const UTF8* source = *sourceStart;
499 UTF32* target = *targetStart;
500 while (source < sourceEnd) {
503 if (extraBytesToRead >= sourceEnd - source) {
514 switch (extraBytesToRead) {
515 case 5: ch += *source++; ch <<= 6;
516 case 4: ch += *source++; ch <<= 6;
517 case 3: ch += *source++; ch <<= 6;
518 case 2: ch += *source++; ch <<= 6;
519 case 1: ch += *source++; ch <<= 6;
520 case 0: ch += *source++;
524 if (target >= targetEnd) {
525 source -= (extraBytesToRead+1);
535 source -= (extraBytesToRead+1);
549 *sourceStart = source;
550 *targetStart = target;
static const UTF8 firstByteMark[7]
#define UNI_MAX_LEGAL_UTF32
#define UNI_REPLACEMENT_CHAR
ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)
#define UNI_SUR_LOW_START
ConversionResult ConvertUTF16toUTF32(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
unsigned getNumBytesForUTF8(UTF8 first)
#define UNI_SUR_HIGH_START
ConversionResult ConvertUTF32toUTF16(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)
static const int halfShift
int fprintf(FILE *stream, const char *format, ...);
int fflush(FILE *stream);
static const UTF32 offsetsFromUTF8[6]
static const char trailingBytesForUTF8[256]
static const UTF32 halfMask
ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
static const UTF32 halfBase
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd)
static Boolean isLegalUTF8(const UTF8 *source, int length)