Source code
Revision control
Copy as Markdown
Other Tools
// generated by diplomat-tool
import type { DataError } from "./DataError"
import type { DataProvider } from "./DataProvider"
import type { Locale } from "./Locale"
import type { WordBreakIteratorUtf16 } from "./WordBreakIteratorUtf16"
import type { pointer, codepoint } from "./diplomat-runtime.d.ts";
/**
* An ICU4X word-break segmenter, capable of finding word breakpoints in strings.
*
* See the [Rust documentation for `WordSegmenter`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html) for more information.
*/
export class WordSegmenter {
get ffiValue(): pointer;
/**
* Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
* or dictionary payload data, using compiled data. This does not assume any content locale.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_auto) for more information.
*/
static createAuto(): WordSegmenter;
/**
* Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
* or dictionary payload data, using compiled data.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
*/
static createAutoWithContentLocale(locale: Locale): WordSegmenter;
/**
* Construct an [`WordSegmenter`] with automatically selecting the best available LSTM
* or dictionary payload data, using a particular data source.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_auto`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_auto) for more information.
*/
static createAutoWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter;
/**
* Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
* Thai, using compiled data. This does not assume any content locale.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_lstm) for more information.
*/
static createLstm(): WordSegmenter;
/**
* Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
* Thai, using compiled data.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
*/
static createLstmWithContentLocale(locale: Locale): WordSegmenter;
/**
* Construct an [`WordSegmenter`] with LSTM payload data for Burmese, Khmer, Lao, and
* Thai, using a particular data source.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and LSTM for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_lstm`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_lstm) for more information.
*/
static createLstmWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter;
/**
* Construct an [`WordSegmenter`] with with dictionary payload data for Chinese, Japanese,
* Burmese, Khmer, Lao, and Thai, using compiled data. This does not assume any content locale.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.new_dictionary) for more information.
*/
static createDictionary(): WordSegmenter;
/**
* Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese,
* Burmese, Khmer, Lao, and Thai, using compiled data.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
*/
static createDictionaryWithContentLocale(locale: Locale): WordSegmenter;
/**
* Construct an [`WordSegmenter`] with dictionary payload data for Chinese, Japanese,
* Burmese, Khmer, Lao, and Thai, using a particular data source.
*
* Note: currently, it uses dictionary for Chinese and Japanese, and dictionary for Burmese,
* Khmer, Lao, and Thai.
*
* See the [Rust documentation for `try_new_dictionary`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenter.html#method.try_new_dictionary) for more information.
*/
static createDictionaryWithContentLocaleAndProvider(provider: DataProvider, locale: Locale): WordSegmenter;
/**
* Segments a string.
*
* Ill-formed input is treated as if errors had been replaced with REPLACEMENT CHARACTERs according
* to the WHATWG Encoding Standard.
*
* See the [Rust documentation for `segment_utf16`](https://docs.rs/icu/latest/icu/segmenter/struct.WordSegmenterBorrowed.html#method.segment_utf16) for more information.
*/
segment(input: string): WordBreakIteratorUtf16;
}