Source code
Revision control
Copy as Markdown
Other Tools
// generated by diplomat-tool
import { DataError } from "./DataError.mjs"
import { DataProvider } from "./DataProvider.mjs"
import { Locale } from "./Locale.mjs"
import { WordBreakIteratorUtf16 } from "./WordBreakIteratorUtf16.mjs"
import wasm from "./diplomat-wasm.mjs";
import * as diplomatRuntime from "./diplomat-runtime.mjs";
/**
* An ICU4X word-break segmenter, capable of finding word breakpoints in strings.
*
* See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html) for more information.
*/
const WordSegmenter_box_destroy_registry = new FinalizationRegistry((ptr) => {
wasm.icu4x_WordSegmenter_destroy_mv1(ptr);
});
export class WordSegmenter {
// Internal ptr reference:
#ptr = null;
// Lifetimes are only to keep dependencies alive.
// Since JS won't garbage collect until there are no incoming edges.
#selfEdge = [];
#internalConstructor(symbol, ptr, selfEdge) {
if (symbol !== diplomatRuntime.internalConstructor) {
console.error("WordSegmenter is an Opaque type. You cannot call its constructor.");
return;
}
this.#ptr = ptr;
this.#selfEdge = selfEdge;
// Are we being borrowed? If not, we can register.
if (this.#selfEdge.length === 0) {
WordSegmenter_box_destroy_registry.register(this, this.#ptr);
}
return this;
}
get ffiValue() {
return this.#ptr;
}
/**
* Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
* or dictionary payload data, using compiled data. This does not assume any content locale.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information.
*/
static createAuto() {
const result = wasm.icu4x_WordSegmenter_create_auto_mv1();
try {
return new WordSegmenter(diplomatRuntime.internalConstructor, result, []);
}
finally {
}
}
/**
* Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
* or dictionary payload data, using compiled data.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
*/
static createAutoWithContentLocale(locale) {
const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
const result = wasm.icu4x_WordSegmenter_create_auto_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue);
try {
if (!diplomatReceive.resultFlag) {
const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
throw new globalThis.Error('DataError: ' + cause.value, { cause });
}
return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
}
finally {
diplomatReceive.free();
}
}
/**
* Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
* or dictionary payload data, using a particular data source.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
*/
static createAutoWithContentLocaleAndProvider(provider, locale) {
const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
const result = wasm.icu4x_WordSegmenter_create_auto_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue);
try {
if (!diplomatReceive.resultFlag) {
const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
throw new globalThis.Error('DataError: ' + cause.value, { cause });
}
return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
}
finally {
diplomatReceive.free();
}
}
/**
* Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
* Thai, using compiled data. This does not assume any content locale.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information.
*/
static createLstm() {
const result = wasm.icu4x_WordSegmenter_create_lstm_mv1();
try {
return new WordSegmenter(diplomatRuntime.internalConstructor, result, []);
}
finally {
}
}
/**
* Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
* Thai, using compiled data.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
*/
static createLstmWithContentLocale(locale) {
const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
const result = wasm.icu4x_WordSegmenter_create_lstm_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue);
try {
if (!diplomatReceive.resultFlag) {
const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
throw new globalThis.Error('DataError: ' + cause.value, { cause });
}
return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
}
finally {
diplomatReceive.free();
}
}
/**
* Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
* Thai, using a particular data source.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
*/
static createLstmWithContentLocaleAndProvider(provider, locale) {
const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
const result = wasm.icu4x_WordSegmenter_create_lstm_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue);
try {
if (!diplomatReceive.resultFlag) {
const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
throw new globalThis.Error('DataError: ' + cause.value, { cause });
}
return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
}
finally {
diplomatReceive.free();
}
}
/**
* Construct an [`WordSegmenter`] with with dictionary payload data for Chinese, Japanese,
* Burmese, Khmer, Lao, and Thai, using compiled data. This does not assume any content locale.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information.
*/
static createDictionary() {
const result = wasm.icu4x_WordSegmenter_create_dictionary_mv1();
try {
return new WordSegmenter(diplomatRuntime.internalConstructor, result, []);
}
finally {
}
}
/**
* Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese,
* Burmese, Khmer, Lao, and Thai, using compiled data.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
*/
static createDictionaryWithContentLocale(locale) {
const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
const result = wasm.icu4x_WordSegmenter_create_dictionary_with_content_locale_mv1(diplomatReceive.buffer, locale.ffiValue);
try {
if (!diplomatReceive.resultFlag) {
const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
throw new globalThis.Error('DataError: ' + cause.value, { cause });
}
return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
}
finally {
diplomatReceive.free();
}
}
/**
* Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese,
* Burmese, Khmer, Lao, and Thai, using a particular data source.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
*/
static createDictionaryWithContentLocaleAndProvider(provider, locale) {
const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true);
const result = wasm.icu4x_WordSegmenter_create_dictionary_with_content_locale_and_provider_mv1(diplomatReceive.buffer, provider.ffiValue, locale.ffiValue);
try {
if (!diplomatReceive.resultFlag) {
const cause = new DataError(diplomatRuntime.internalConstructor, diplomatRuntime.enumDiscriminant(wasm, diplomatReceive.buffer));
throw new globalThis.Error('DataError: ' + cause.value, { cause });
}
return new WordSegmenter(diplomatRuntime.internalConstructor, diplomatRuntime.ptrRead(wasm, diplomatReceive.buffer), []);
}
finally {
diplomatReceive.free();
}
}
/**
* Segments a string.
*
* Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
* to the WHATWG Encoding Standard.
*
* See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information.
*/
segment(input) {
let functionGarbageCollectorGrip = new diplomatRuntime.GarbageCollectorGrip();
const inputSlice = diplomatRuntime.DiplomatBuf.str16(wasm, input);
// This lifetime edge depends on lifetimes 'a
let aEdges = [this, inputSlice];
const result = wasm.icu4x_WordSegmenter_segment_utf16_mv1(this.ffiValue, ...inputSlice.splat());
try {
return new WordBreakIteratorUtf16(diplomatRuntime.internalConstructor, result, [], aEdges);
}
finally {
functionGarbageCollectorGrip.releaseToGarbageCollector();
}
}
constructor(symbol, ptr, selfEdge) {
return this.#internalConstructor(...arguments)
}
}