You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
66 lines
1.8 KiB
66 lines
1.8 KiB
/**
|
|
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*
|
|
* @flow strict
|
|
* @format
|
|
*/
|
|
|
|
'use strict';
|
|
|
|
/**
|
|
* Decode a UTF-8 encoded string from Hermes with a known length.
|
|
* Based on Emscripten's UTF8ToString with the following differences:
|
|
* - Always reads all bytes up to the given length, including null bytes. This
|
|
* means that we can decode strings that contain null bytes in the middle.
|
|
* - Allow UTF-8 encoded code points that are part of a surrogate pair, even though
|
|
* this is technically invalid UTF-8 that UTF8ToString would convert to 0xfffd.
|
|
*/
|
|
export default function HermesParserDecodeUTF8String(
|
|
ptrIn: number,
|
|
length: number,
|
|
heap: Uint8Array,
|
|
): string {
|
|
let ptr = ptrIn;
|
|
const endPtr = ptr + length;
|
|
let str = '';
|
|
|
|
while (ptr < endPtr) {
|
|
// ASCII characters fit in single byte code point
|
|
let u0 = heap[ptr++];
|
|
if (!(u0 & 0x80)) {
|
|
str += String.fromCharCode(u0);
|
|
continue;
|
|
}
|
|
|
|
// Two byte code point
|
|
const u1 = heap[ptr++] & 0x3f;
|
|
if ((u0 & 0xe0) === 0xc0) {
|
|
str += String.fromCharCode(((u0 & 0x1f) << 6) | u1);
|
|
continue;
|
|
}
|
|
|
|
const u2 = heap[ptr++] & 0x3f;
|
|
if ((u0 & 0xf0) === 0xe0) {
|
|
// Three byte code point
|
|
u0 = ((u0 & 0x0f) << 12) | (u1 << 6) | u2;
|
|
} else {
|
|
// Four byte code point
|
|
u0 = ((u0 & 0x07) << 18) | (u1 << 12) | (u2 << 6) | (heap[ptr++] & 0x3f);
|
|
}
|
|
|
|
if (u0 < 0x10000) {
|
|
// Code point fits into a single UTF-16 code unit
|
|
str += String.fromCharCode(u0);
|
|
} else {
|
|
// Code point does not fit into single UTF-16 code unit so convert to surrogate pair
|
|
u0 -= 0x10000;
|
|
str += String.fromCharCode(0xd800 | (u0 >> 10), 0xdc00 | (u0 & 0x3ff));
|
|
}
|
|
}
|
|
|
|
return str;
|
|
}
|