You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
68 lines
1.9 KiB
68 lines
1.9 KiB
/**
|
|
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*
|
|
*
|
|
* @format
|
|
*/
|
|
'use strict';
|
|
/**
|
|
* Decode a UTF-8 encoded string from Hermes with a known length.
|
|
* Based on Emscripten's UTF8ToString with the following differences:
|
|
* - Always reads all bytes up to the given length, including null bytes. This
|
|
* means that we can decode strings that contain null bytes in the middle.
|
|
* - Allow UTF-8 encoded code points that are part of a surrogate pair, even though
|
|
* this is technically invalid UTF-8 that UTF8ToString would convert to 0xfffd.
|
|
*/
|
|
|
|
Object.defineProperty(exports, "__esModule", {
|
|
value: true
|
|
});
|
|
exports.default = HermesParserDecodeUTF8String;
|
|
|
|
function HermesParserDecodeUTF8String(ptrIn, length, heap) {
|
|
let ptr = ptrIn;
|
|
const endPtr = ptr + length;
|
|
let str = '';
|
|
|
|
while (ptr < endPtr) {
|
|
// ASCII characters fit in single byte code point
|
|
let u0 = heap[ptr++];
|
|
|
|
if (!(u0 & 0x80)) {
|
|
str += String.fromCharCode(u0);
|
|
continue;
|
|
} // Two byte code point
|
|
|
|
|
|
const u1 = heap[ptr++] & 0x3f;
|
|
|
|
if ((u0 & 0xe0) === 0xc0) {
|
|
str += String.fromCharCode((u0 & 0x1f) << 6 | u1);
|
|
continue;
|
|
}
|
|
|
|
const u2 = heap[ptr++] & 0x3f;
|
|
|
|
if ((u0 & 0xf0) === 0xe0) {
|
|
// Three byte code point
|
|
u0 = (u0 & 0x0f) << 12 | u1 << 6 | u2;
|
|
} else {
|
|
// Four byte code point
|
|
u0 = (u0 & 0x07) << 18 | u1 << 12 | u2 << 6 | heap[ptr++] & 0x3f;
|
|
}
|
|
|
|
if (u0 < 0x10000) {
|
|
// Code point fits into a single UTF-16 code unit
|
|
str += String.fromCharCode(u0);
|
|
} else {
|
|
// Code point does not fit into single UTF-16 code unit so convert to surrogate pair
|
|
u0 -= 0x10000;
|
|
str += String.fromCharCode(0xd800 | u0 >> 10, 0xdc00 | u0 & 0x3ff);
|
|
}
|
|
}
|
|
|
|
return str;
|
|
} |