macros.rs - mozsearch

// Copyright Mozilla Foundation. See the COPYRIGHT

// file at the top-level directory of this distribution.

//

// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or

// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license

// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your

// option. This file may not be copied, modified, or distributed

// except according to those terms.

macro_rules! decoder_function {

    ($preamble:block,

     $loop_preable:block,

     $eof:block,

     $body:block,

     $slf:ident,

     $src_consumed:ident,

     $dest:ident,

     $source:ident,

     $b:ident,

     $destination_handle:ident,

     $unread_handle:ident,

     $destination_check:ident,

     $name:ident,

     $code_unit:ty,

     $dest_struct:ident) => (

    pub fn $name(&mut $slf,

                 src: &[u8],

                 dst: &mut [$code_unit],

                 last: bool)

                 -> (DecoderResult, usize, usize) {

        let mut $source = ByteSource::new(src);

        let mut $dest = $dest_struct::new(dst);

        loop { // TODO: remove this loop

                // Start non-boilerplate

                $preamble

                // End non-boilerplate

            loop {

                    $loop_preable

                match $source.check_available() {

                    Space::Full($src_consumed) => {

                        if last {

                            // Start non-boilerplate

                            $eof

                            // End non-boilerplate

                        return (DecoderResult::InputEmpty, $src_consumed, $dest.written());

                    Space::Available(source_handle) => {

                        match $dest.$destination_check() {

                            Space::Full(dst_written) => {

                                return (DecoderResult::OutputFull,

                                        source_handle.consumed(),

                                        dst_written);

                            Space::Available($destination_handle) => {

                                let ($b, $unread_handle) = source_handle.read();

                                // Start non-boilerplate

                                $body

                                // End non-boilerplate

});

macro_rules! decoder_functions {

        $preamble:block,

        $loop_preable:block,

        $eof:block,

        $body:block,

        $slf:ident,

        $src_consumed:ident,

        $dest:ident,

        $source:ident,

        $b:ident,

        $destination_handle:ident,

        $unread_handle:ident,

        $destination_check:ident

    ) => {

        decoder_function!(

            $preamble,

            $loop_preable,

            $eof,

            $body,

            $slf,

            $src_consumed,

            $dest,

            $source,

$b,

            $destination_handle,

            $unread_handle,

            $destination_check,

            decode_to_utf8_raw,

u8,

            Utf8Destination

);

        decoder_function!(

            $preamble,

            $loop_preable,

            $eof,

            $body,

            $slf,

            $src_consumed,

            $dest,

            $source,

$b,

            $destination_handle,

            $unread_handle,

            $destination_check,

            decode_to_utf16_raw,

            u16,

            Utf16Destination

);

};

macro_rules! ascii_compatible_two_byte_decoder_function {

    ($lead:block,

     $trail:block,

     $slf:ident,

     $non_ascii:ident,

     $byte:ident,

     $lead_minus_offset:ident,

     $unread_handle_trail:ident,

     $source:ident,

     $handle:ident,

     $outermost:tt,

     $copy_ascii:ident,

     $destination_check:ident,

     $name:ident,

     $code_unit:ty,

     $dest_struct:ident,

     $ascii_punctuation:expr) => (

    pub fn $name(&mut $slf,

                 src: &[u8],

                 dst: &mut [$code_unit],

                 last: bool)

                 -> (DecoderResult, usize, usize) {

        let mut $source = ByteSource::new(src);

        let mut dest_prolog = $dest_struct::new(dst);

        let dest = match $slf.lead {

            Some(lead) => {

                let $lead_minus_offset = lead;

                $slf.lead = None;

                // Since we don't have `goto` we could use to jump into the trail

                // handling part of the main loop, we need to repeat trail handling

                // here.

                match $source.check_available() {

                    Space::Full(src_consumed_prolog) => {

                        if last {

                            return (DecoderResult::Malformed(1, 0),

                                    src_consumed_prolog,

                                    dest_prolog.written());

                        return (DecoderResult::InputEmpty, src_consumed_prolog, dest_prolog.written());

                    Space::Available(source_handle_prolog) => {

                        match dest_prolog.$destination_check() {

                            Space::Full(dst_written_prolog) => {

                                return (DecoderResult::OutputFull,

                                        source_handle_prolog.consumed(),

                                        dst_written_prolog);

                            Space::Available($handle) => {

                                let ($byte, $unread_handle_trail) = source_handle_prolog.read();

                                // Start non-boilerplate

                                $trail

                                // End non-boilerplate

},

            None => {

                &mut dest_prolog

};

        $outermost: loop {

            match dest.$copy_ascii(&mut $source) {

                CopyAsciiResult::Stop(ret) => return ret,

                CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => {

                    'middle: loop {

                        let dest_again = {

                            let $lead_minus_offset = {

                                // Start non-boilerplate

                                $lead

                                // End non-boilerplate

};

                            match $source.check_available() {

                                Space::Full(src_consumed_trail) => {

                                    if last {

                                        return (DecoderResult::Malformed(1, 0),

                                                src_consumed_trail,

                                                $handle.written());

                                    $slf.lead = Some($lead_minus_offset);

                                    return (DecoderResult::InputEmpty,

                                            src_consumed_trail,

                                            $handle.written());

                                Space::Available(source_handle_trail) => {

                                    let ($byte, $unread_handle_trail) = source_handle_trail.read();

                                    // Start non-boilerplate

                                    $trail

                                    // End non-boilerplate

};

                        match $source.check_available() {

                            Space::Full(src_consumed) => {

                                return (DecoderResult::InputEmpty,

                                        src_consumed,

                                        dest_again.written());

                            Space::Available(source_handle) => {

                                match dest_again.$destination_check() {

                                    Space::Full(dst_written) => {

                                        return (DecoderResult::OutputFull,

                                                source_handle.consumed(),

                                                dst_written);

                                    Space::Available(mut destination_handle) => {

                                        let (mut b, unread_handle) = source_handle.read();

                                        let source_again = unread_handle.commit();

                                        'innermost: loop {

                                            if b > 127 {

                                                $non_ascii = b;

                                                $handle = destination_handle;

                                                continue 'middle;

                                            // Testing on Haswell says that we should write the

                                            // byte unconditionally instead of trying to unread it

                                            // to make it part of the next SIMD stride.

                                            let dest_again_again =

                                                destination_handle.write_ascii(b);

                                            if $ascii_punctuation && b < 60 {

                                                // We've got punctuation

                                                match source_again.check_available() {

                                                    Space::Full(src_consumed_again) => {

                                                        return (DecoderResult::InputEmpty,

                                                                src_consumed_again,

                                                                dest_again_again.written());

                                                    Space::Available(source_handle_again) => {

                                                        match dest_again_again.$destination_check() {

                                                            Space::Full(dst_written_again) => {

                                                                return (DecoderResult::OutputFull,

                                                                        source_handle_again.consumed(),

                                                                        dst_written_again);

                                                            Space::Available(destination_handle_again) => {

                                                                    let (b_again, _unread_handle_again) =

                                                                        source_handle_again.read();

                                                                    b = b_again;

                                                                    destination_handle = destination_handle_again;

                                                                    continue 'innermost;

                                            // We've got markup or ASCII text

                                            continue $outermost;

});

macro_rules! ascii_compatible_two_byte_decoder_functions {

        $lead:block,

        $trail:block,

        $slf:ident,

        $non_ascii:ident,

        $byte:ident,

        $lead_minus_offset:ident,

        $unread_handle_trail:ident,

        $source:ident,

        $handle:ident,

        $outermost:tt,

        $copy_ascii:ident,

        $destination_check:ident,

        $ascii_punctuation:expr

    ) => {

        ascii_compatible_two_byte_decoder_function!(

            $lead,

            $trail,

            $slf,

            $non_ascii,

            $byte,

            $lead_minus_offset,

            $unread_handle_trail,

            $source,

            $handle,

            $outermost,

            $copy_ascii,

            $destination_check,

            decode_to_utf8_raw,

u8,

            Utf8Destination,

            $ascii_punctuation

);

        ascii_compatible_two_byte_decoder_function!(

            $lead,

            $trail,

            $slf,

            $non_ascii,

            $byte,

            $lead_minus_offset,

            $unread_handle_trail,

            $source,

            $handle,

            $outermost,

            $copy_ascii,

            $destination_check,

            decode_to_utf16_raw,

            u16,

            Utf16Destination,

            $ascii_punctuation

);

};

macro_rules! gb18030_decoder_function {

    ($first_body:block,

     $second_body:block,

     $third_body:block,

     $fourth_body:block,

     $slf:ident,

     $non_ascii:ident,

     $first_minus_offset:ident,

     $second:ident,

     $second_minus_offset:ident,

     $unread_handle_second:ident,

     $third:ident,

     $third_minus_offset:ident,

     $unread_handle_third:ident,

     $fourth:ident,

     $fourth_minus_offset:ident,

     $unread_handle_fourth:ident,

     $source:ident,

     $handle:ident,

     $outermost:tt,

     $name:ident,

     $code_unit:ty,

     $dest_struct:ident) => (

    #[cfg_attr(feature = "cargo-clippy", allow(never_loop))]

    pub fn $name(&mut $slf,

                 src: &[u8],

                 dst: &mut [$code_unit],

                 last: bool)

                 -> (DecoderResult, usize, usize) {

        let mut $source = ByteSource::new(src);

        let mut dest = $dest_struct::new(dst);

            if let Some(ascii) = $slf.pending_ascii {

                match dest.check_space_bmp() {

                    Space::Full(_) => {

                        return (DecoderResult::OutputFull, 0, 0);

                    Space::Available(pending_ascii_handle) => {

                        $slf.pending_ascii = None;

                        pending_ascii_handle.write_ascii(ascii);

        while !$slf.pending.is_none() {

            match $source.check_available() {

                Space::Full(src_consumed) => {

                    if last {

                        // Start non-boilerplate

                        let count = $slf.pending.count();

                        $slf.pending = Gb18030Pending::None;

                        return (DecoderResult::Malformed(count as u8, 0),

                                src_consumed,

                                dest.written());

                        // End non-boilerplate

                    return (DecoderResult::InputEmpty, src_consumed, dest.written());

                Space::Available(source_handle) => {

                    match dest.check_space_astral() {

                        Space::Full(dst_written) => {

                            return (DecoderResult::OutputFull,

                                    source_handle.consumed(),

                                    dst_written);

                        Space::Available($handle) => {

                            let (byte, unread_handle) = source_handle.read();

                            match $slf.pending {

                                Gb18030Pending::One($first_minus_offset) => {

                                    $slf.pending = Gb18030Pending::None;

                                    let $second = byte;

                                    let $unread_handle_second = unread_handle;

                                    // If second is between 0x40 and 0x7E,

                                    // inclusive, subtract offset 0x40. Else if

                                    // second is between 0x80 and 0xFE, inclusive,

                                    // subtract offset 0x41. In both cases,

                                    // handle as a two-byte sequence.

                                    // Else if second is between 0x30 and 0x39,

                                    // inclusive, subtract offset 0x30 and

                                    // handle as a four-byte sequence.

                                    let $second_minus_offset = $second.wrapping_sub(0x30);

                                    // It's not optimal to do this check first,

                                    // but this results in more readable code.

                                    if $second_minus_offset > (0x39 - 0x30) {

                                        // Start non-boilerplate

                                        $second_body

                                        // End non-boilerplate

                                    } else {

                                        // Four-byte!

                                        $slf.pending = Gb18030Pending::Two($first_minus_offset,

                                                                           $second_minus_offset);

                                        $handle.commit()

                                Gb18030Pending::Two($first_minus_offset, $second_minus_offset) => {

                                    $slf.pending = Gb18030Pending::None;

                                    let $third = byte;

                                    let $unread_handle_third = unread_handle;

                                    let $third_minus_offset = {

                                        // Start non-boilerplate

                                        $third_body

                                        // End non-boilerplate

};

                                    $slf.pending = Gb18030Pending::Three($first_minus_offset,

                                                                         $second_minus_offset,

                                                                         $third_minus_offset);

                                    $handle.commit()

                                Gb18030Pending::Three($first_minus_offset,

                                                      $second_minus_offset,

                                                      $third_minus_offset) => {

                                    $slf.pending = Gb18030Pending::None;

                                    let $fourth = byte;

                                    let $unread_handle_fourth = unread_handle;

                                    // Start non-boilerplate

                                    $fourth_body

                                    // End non-boilerplate

                                Gb18030Pending::None => unreachable!("Checked in loop condition"),

};

        $outermost: loop {

            match dest.copy_ascii_from_check_space_astral(&mut $source) {

                CopyAsciiResult::Stop(ret) => return ret,

                CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => {

                    'middle: loop {

                        let dest_again = {

                            let $first_minus_offset = {

                                // Start non-boilerplate

                                $first_body

                                // End non-boilerplate

};

                            match $source.check_available() {

                                Space::Full(src_consumed_trail) => {

                                    if last {

                                        return (DecoderResult::Malformed(1, 0),

                                                src_consumed_trail,

                                                $handle.written());

                                    $slf.pending = Gb18030Pending::One($first_minus_offset);

                                    return (DecoderResult::InputEmpty,

                                            src_consumed_trail,

                                            $handle.written());

                                Space::Available(source_handle_trail) => {

                                    let ($second, $unread_handle_second) = source_handle_trail.read();

                                    // Start non-boilerplate

                                    // If second is between 0x40 and 0x7E,

                                    // inclusive, subtract offset 0x40. Else if

                                    // second is between 0x80 and 0xFE, inclusive,

                                    // subtract offset 0x41. In both cases,

                                    // handle as a two-byte sequence.

                                    // Else if second is between 0x30 and 0x39,

                                    // inclusive, subtract offset 0x30 and

                                    // handle as a four-byte sequence.

                                    let $second_minus_offset = $second.wrapping_sub(0x30);

                                    // It's not optimal to do this check first,

                                    // but this results in more readable code.

                                    if $second_minus_offset > (0x39 - 0x30) {

                                        // Start non-boilerplate

                                        $second_body

                                        // End non-boilerplate

                                    } else {

                                        // Four-byte!

                                        match $unread_handle_second.commit().check_available() {

                                            Space::Full(src_consumed_third) => {

                                                if last {

                                                    return (DecoderResult::Malformed(2, 0),

                                                            src_consumed_third,

                                                            $handle.written());

                                                $slf.pending =

                                                    Gb18030Pending::Two($first_minus_offset,

                                                                        $second_minus_offset);

                                                return (DecoderResult::InputEmpty,

                                                        src_consumed_third,

                                                        $handle.written());

                                            Space::Available(source_handle_third) => {

                                                let ($third, $unread_handle_third) =

                                                    source_handle_third.read();

                                                let $third_minus_offset = {

                                                    // Start non-boilerplate

                                                    $third_body

                                                    // End non-boilerplate

};

                                                match $unread_handle_third.commit()

                                                                         .check_available() {

                                                    Space::Full(src_consumed_fourth) => {

                                                        if last {

                                                            return (DecoderResult::Malformed(3, 0),

                                                                    src_consumed_fourth,

                                                                    $handle.written());

                                                        $slf.pending = Gb18030Pending::Three($first_minus_offset, $second_minus_offset, $third_minus_offset);

                                                        return (DecoderResult::InputEmpty,

                                                                src_consumed_fourth,

                                                                $handle.written());

                                                    Space::Available(source_handle_fourth) => {

                                                        let ($fourth, $unread_handle_fourth) =

                                                            source_handle_fourth.read();

                                                        // Start non-boilerplate

                                                        $fourth_body

                                                        // End non-boilerplate

                                    // End non-boilerplate

};

                        match $source.check_available() {

                            Space::Full(src_consumed) => {

                                return (DecoderResult::InputEmpty,

                                        src_consumed,

                                        dest_again.written());

                            Space::Available(source_handle) => {

                                match dest_again.check_space_astral() {

                                    Space::Full(dst_written) => {

                                        return (DecoderResult::OutputFull,

                                                source_handle.consumed(),

                                                dst_written);

                                    Space::Available(destination_handle) => {

                                        let (b, _) = source_handle.read();

                                        loop {

                                            if b > 127 {

                                                $non_ascii = b;

                                                $handle = destination_handle;

                                                continue 'middle;

                                            // Testing on Haswell says that we should write the

                                            // byte unconditionally instead of trying to unread it

                                            // to make it part of the next SIMD stride.

                                            destination_handle.write_ascii(b);

                                            // We've got markup or ASCII text

                                            continue $outermost;

});

macro_rules! gb18030_decoder_functions {

        $first_body:block,

        $second_body:block,

        $third_body:block,

        $fourth_body:block,

        $slf:ident,

        $non_ascii:ident,

        $first_minus_offset:ident,

        $second:ident,

        $second_minus_offset:ident,

        $unread_handle_second:ident,

        $third:ident,

        $third_minus_offset:ident,

        $unread_handle_third:ident,

        $fourth:ident,

        $fourth_minus_offset:ident,

        $unread_handle_fourth:ident,

        $source:ident,

        $handle:ident,

        $outermost:tt

    ) => {

        gb18030_decoder_function!(

            $first_body,

            $second_body,

            $third_body,

            $fourth_body,

            $slf,

            $non_ascii,

            $first_minus_offset,

            $second,

            $second_minus_offset,

            $unread_handle_second,

            $third,

            $third_minus_offset,

            $unread_handle_third,

            $fourth,

            $fourth_minus_offset,

            $unread_handle_fourth,

            $source,

            $handle,

            $outermost,

            decode_to_utf8_raw,

u8,

            Utf8Destination

);

        gb18030_decoder_function!(

            $first_body,

            $second_body,

            $third_body,

            $fourth_body,

            $slf,

            $non_ascii,

            $first_minus_offset,

            $second,

            $second_minus_offset,

            $unread_handle_second,

            $third,

            $third_minus_offset,

            $unread_handle_third,

            $fourth,

            $fourth_minus_offset,

            $unread_handle_fourth,

            $source,

            $handle,

            $outermost,

            decode_to_utf16_raw,

            u16,

            Utf16Destination

);

};

macro_rules! euc_jp_decoder_function {

    ($jis0802_trail_body:block,

     $jis0812_lead_body:block,

     $jis0812_trail_body:block,

     $half_width_katakana_body:block,

     $slf:ident,

     $non_ascii:ident,

     $jis0208_lead_minus_offset:ident,

     $byte:ident,

     $unread_handle_trail:ident,

     $jis0212_lead_minus_offset:ident,

     $lead:ident,

     $unread_handle_jis0212:ident,

     $source:ident,

     $handle:ident,

     $name:ident,

     $code_unit:ty,

     $dest_struct:ident) => (

    #[cfg_attr(feature = "cargo-clippy", allow(never_loop))]

    pub fn $name(&mut $slf,

                 src: &[u8],

                 dst: &mut [$code_unit],

                 last: bool)

                 -> (DecoderResult, usize, usize) {

        let mut $source = ByteSource::new(src);

        let mut dest = $dest_struct::new(dst);

        while !$slf.pending.is_none() {

            match $source.check_available() {

                Space::Full(src_consumed) => {

                    if last {

                        // Start non-boilerplate

                        let count = $slf.pending.count();

                        $slf.pending = EucJpPending::None;

                        return (DecoderResult::Malformed(count as u8, 0),

                                src_consumed,

                                dest.written());

                        // End non-boilerplate

                    return (DecoderResult::InputEmpty, src_consumed, dest.written());

                Space::Available(source_handle) => {

                    match dest.check_space_bmp() {

                        Space::Full(dst_written) => {

                            return (DecoderResult::OutputFull,

                                    source_handle.consumed(),

                                    dst_written);

                        Space::Available($handle) => {

                            let ($byte, $unread_handle_trail) = source_handle.read();

                            match $slf.pending {

                                EucJpPending::Jis0208Lead($jis0208_lead_minus_offset) => {

                                    $slf.pending = EucJpPending::None;

                                    // Start non-boilerplate

                                    $jis0802_trail_body

                                    // End non-boilerplate

                                EucJpPending::Jis0212Shift => {

                                    $slf.pending = EucJpPending::None;

                                    let $lead = $byte;

                                    let $unread_handle_jis0212 = $unread_handle_trail;

                                    let $jis0212_lead_minus_offset = {

                                        // Start non-boilerplate

                                        $jis0812_lead_body

                                        // End non-boilerplate

};

                                    $slf.pending =

                                        EucJpPending::Jis0212Lead($jis0212_lead_minus_offset);

                                    $handle.commit()

                                EucJpPending::Jis0212Lead($jis0212_lead_minus_offset) => {

                                    $slf.pending = EucJpPending::None;

                                    // Start non-boilerplate

                                    $jis0812_trail_body

                                    // End non-boilerplate

                                EucJpPending::HalfWidthKatakana => {

                                    $slf.pending = EucJpPending::None;

                                    // Start non-boilerplate

                                    $half_width_katakana_body

                                    // End non-boilerplate

                                EucJpPending::None => unreachable!("Checked in loop condition"),

};

        'outermost: loop {

            match dest.copy_ascii_from_check_space_bmp(&mut $source) {

                CopyAsciiResult::Stop(ret) => return ret,

                CopyAsciiResult::GoOn((mut $non_ascii, mut $handle)) => {

                    'middle: loop {

                        let dest_again = {

                            // If lead is between 0xA1 and 0xFE, inclusive,

                            // subtract 0xA1. Else if lead is 0x8E, handle the

                            // next byte as half-width Katakana. Else if lead is

                            // 0x8F, expect JIS 0212.

                            let $jis0208_lead_minus_offset = $non_ascii.wrapping_sub(0xA1);

                            if $jis0208_lead_minus_offset <= (0xFE - 0xA1) {

                                // JIS 0208

                                match $source.check_available() {

                                    Space::Full(src_consumed_trail) => {

                                        if last {

                                            return (DecoderResult::Malformed(1, 0),

                                                    src_consumed_trail,

                                                    $handle.written());

                                        $slf.pending =

                                            EucJpPending::Jis0208Lead($jis0208_lead_minus_offset);

                                        return (DecoderResult::InputEmpty,

                                                src_consumed_trail,

                                                $handle.written());

                                    Space::Available(source_handle_trail) => {

                                        let ($byte, $unread_handle_trail) =

                                            source_handle_trail.read();

                                        // Start non-boilerplate

                                        $jis0802_trail_body

                                        // End non-boilerplate

                            } else if $non_ascii == 0x8F {

                                match $source.check_available() {

                                    Space::Full(src_consumed_jis0212) => {

                                        if last {

                                            return (DecoderResult::Malformed(1, 0),

                                                    src_consumed_jis0212,

                                                    $handle.written());

                                        $slf.pending = EucJpPending::Jis0212Shift;

                                        return (DecoderResult::InputEmpty,

                                                src_consumed_jis0212,

                                                $handle.written());

                                    Space::Available(source_handle_jis0212) => {

                                        let ($lead, $unread_handle_jis0212) =

                                            source_handle_jis0212.read();

                                        let $jis0212_lead_minus_offset = {

                                            // Start non-boilerplate

                                            $jis0812_lead_body

                                            // End non-boilerplate

};

                                        match $unread_handle_jis0212.commit().check_available() {

                                            Space::Full(src_consumed_trail) => {

                                                if last {

                                                    return (DecoderResult::Malformed(2, 0),

                                                            src_consumed_trail,

                                                            $handle.written());

                                                $slf.pending = EucJpPending::Jis0212Lead($jis0212_lead_minus_offset);

                                                return (DecoderResult::InputEmpty,

                                                        src_consumed_trail,

                                                        $handle.written());

                                            Space::Available(source_handle_trail) => {

                                                let ($byte, $unread_handle_trail) =

                                                    source_handle_trail.read();

                                                // Start non-boilerplate

                                                $jis0812_trail_body

                                                // End non-boilerplate

                            } else if $non_ascii == 0x8E {

                                match $source.check_available() {

                                    Space::Full(src_consumed_trail) => {

                                        if last {

                                            return (DecoderResult::Malformed(1, 0),

                                                    src_consumed_trail,

                                                    $handle.written());

                                        $slf.pending = EucJpPending::HalfWidthKatakana;

                                        return (DecoderResult::InputEmpty,

                                                src_consumed_trail,

                                                $handle.written());

                                    Space::Available(source_handle_trail) => {

                                        let ($byte, $unread_handle_trail) =

                                            source_handle_trail.read();

                                        // Start non-boilerplate

                                        $half_width_katakana_body

                                        // End non-boilerplate

                            } else {

                                return (DecoderResult::Malformed(1, 0),

                                        $source.consumed(),

                                        $handle.written());

};

                        match $source.check_available() {

                            Space::Full(src_consumed) => {

                                return (DecoderResult::InputEmpty,

                                        src_consumed,

                                        dest_again.written());

                            Space::Available(source_handle) => {

                                match dest_again.check_space_bmp() {

                                    Space::Full(dst_written) => {

                                        return (DecoderResult::OutputFull,

                                                source_handle.consumed(),

                                                dst_written);

                                    Space::Available(destination_handle) => {

                                        let (b, _) = source_handle.read();

                                        loop {

                                            if b > 127 {

                                                $non_ascii = b;

                                                $handle = destination_handle;

                                                continue 'middle;

                                            // Testing on Haswell says that we should write the

                                            // byte unconditionally instead of trying to unread it

                                            // to make it part of the next SIMD stride.

                                            destination_handle.write_ascii(b);

                                            // We've got markup or ASCII text

                                            continue 'outermost;

});

macro_rules! euc_jp_decoder_functions {

        $jis0802_trail_body:block,

        $jis0812_lead_body:block,

        $jis0812_trail_body:block,

        $half_width_katakana_body:block,

        $slf:ident,

        $non_ascii:ident,

        $jis0208_lead_minus_offset:ident,

        $byte:ident,

        $unread_handle_trail:ident,

        $jis0212_lead_minus_offset:ident,

        $lead:ident,

        $unread_handle_jis0212:ident,

        $source:ident,

        $handle:ident

    ) => {

        euc_jp_decoder_function!(

            $jis0802_trail_body,

            $jis0812_lead_body,

            $jis0812_trail_body,

            $half_width_katakana_body,

            $slf,

            $non_ascii,

            $jis0208_lead_minus_offset,

            $byte,

            $unread_handle_trail,

            $jis0212_lead_minus_offset,

            $lead,

            $unread_handle_jis0212,

            $source,

            $handle,

            decode_to_utf8_raw,

u8,

            Utf8Destination

);

        euc_jp_decoder_function!(

            $jis0802_trail_body,

            $jis0812_lead_body,

            $jis0812_trail_body,

            $half_width_katakana_body,

            $slf,

            $non_ascii,

            $jis0208_lead_minus_offset,

            $byte,

            $unread_handle_trail,

            $jis0212_lead_minus_offset,

            $lead,

            $unread_handle_jis0212,

            $source,

            $handle,

            decode_to_utf16_raw,

            u16,

            Utf16Destination

);

};

macro_rules! encoder_function {

    ($eof:block,

     $body:block,

     $slf:ident,

     $src_consumed:ident,

     $source:ident,

     $dest:ident,

     $c:ident,

     $destination_handle:ident,

     $unread_handle:ident,

     $destination_check:ident,

     $name:ident,

     $input:ty,

     $source_struct:ident) => (

    pub fn $name(&mut $slf,

                 src: &$input,

                 dst: &mut [u8],

                 last: bool)

                 -> (EncoderResult, usize, usize) {

        let mut $source = $source_struct::new(src);

        let mut $dest = ByteDestination::new(dst);

        loop {

            match $source.check_available() {

                Space::Full($src_consumed) => {

                    if last {

                        // Start non-boilerplate

                        $eof

                        // End non-boilerplate

                    return (EncoderResult::InputEmpty, $src_consumed, $dest.written());

                Space::Available(source_handle) => {

                    match $dest.$destination_check() {

                        Space::Full(dst_written) => {

                            return (EncoderResult::OutputFull,

                                    source_handle.consumed(),

                                    dst_written);

                        Space::Available($destination_handle) => {

                            let ($c, $unread_handle) = source_handle.read();

                            // Start non-boilerplate

                            $body

                            // End non-boilerplate

});

macro_rules! encoder_functions {

        $eof:block,

        $body:block,

        $slf:ident,

        $src_consumed:ident,

        $source:ident,

        $dest:ident,

        $c:ident,

        $destination_handle:ident,

        $unread_handle:ident,

        $destination_check:ident

    ) => {

        encoder_function!(

            $eof,

            $body,

            $slf,

            $src_consumed,

            $source,

            $dest,

$c,

            $destination_handle,

            $unread_handle,

            $destination_check,

            encode_from_utf8_raw,

            str,

            Utf8Source

);

        encoder_function!(

            $eof,

            $body,

            $slf,

            $src_consumed,

            $source,

            $dest,

$c,

            $destination_handle,

            $unread_handle,

            $destination_check,

            encode_from_utf16_raw,

            [u16],

            Utf16Source

);

};

macro_rules! ascii_compatible_encoder_function {

    ($bmp_body:block,

     $astral_body:block,

     $bmp:ident,

     $astral:ident,

     $slf:ident,

     $source:ident,

     $handle:ident,

     $copy_ascii:ident,

     $destination_check:ident,

     $name:ident,

     $input:ty,

     $source_struct:ident,

     $ascii_punctuation:expr) => (

    pub fn $name(&mut $slf,

                 src: &$input,

                 dst: &mut [u8],

                 _last: bool)

                 -> (EncoderResult, usize, usize) {

        let mut $source = $source_struct::new(src);

        let mut dest = ByteDestination::new(dst);

        'outermost: loop {

            match $source.$copy_ascii(&mut dest) {

                CopyAsciiResult::Stop(ret) => return ret,

                CopyAsciiResult::GoOn((mut non_ascii, mut $handle)) => {

                    'middle: loop {

                        let dest_again = match non_ascii {

                            NonAscii::BmpExclAscii($bmp) => {

                                // Start non-boilerplate

                                $bmp_body

                                // End non-boilerplate

                            NonAscii::Astral($astral) => {

                                // Start non-boilerplate

                                $astral_body

                                // End non-boilerplate

};

                        match $source.check_available() {

                            Space::Full(src_consumed) => {

                                return (EncoderResult::InputEmpty,

                                        src_consumed,

                                        dest_again.written());

                            Space::Available(source_handle) => {

                                match dest_again.$destination_check() {

                                    Space::Full(dst_written) => {

                                        return (EncoderResult::OutputFull,

                                                source_handle.consumed(),

                                                dst_written);

                                    Space::Available(mut destination_handle) => {

                                        let (mut c, unread_handle) = source_handle.read_enum();

                                        let source_again = unread_handle.commit();

                                        'innermost: loop {

                                            let ascii = match c {

                                                Unicode::NonAscii(non_ascii_again) => {

                                                    non_ascii = non_ascii_again;

                                                    $handle = destination_handle;

                                                    continue 'middle;

                                                Unicode::Ascii(a) => a,

};

                                            // Testing on Haswell says that we should write the

                                            // byte unconditionally instead of trying to unread it

                                            // to make it part of the next SIMD stride.

                                            let dest_again_again =

                                                destination_handle.write_one(ascii);

                                            if $ascii_punctuation && ascii < 60 {

                                                // We've got punctuation

                                                match source_again.check_available() {

                                                    Space::Full(src_consumed_again) => {

                                                        return (EncoderResult::InputEmpty,

                                                                src_consumed_again,

                                                                dest_again_again.written());

                                                    Space::Available(source_handle_again) => {

                                                        match dest_again_again.$destination_check() {

                                                            Space::Full(dst_written_again) => {

                                                                return (EncoderResult::OutputFull,

                                                                        source_handle_again.consumed(),

                                                                        dst_written_again);

                                                            Space::Available(destination_handle_again) => {

                                                                    let (c_again, _unread_handle_again) =

                                                                        source_handle_again.read_enum();

                                                                    c = c_again;

                                                                    destination_handle = destination_handle_again;

                                                                    continue 'innermost;

                                            // We've got markup or ASCII text

                                            continue 'outermost;

});

macro_rules! ascii_compatible_encoder_functions {

        $bmp_body:block,

        $astral_body:block,

        $bmp:ident,

        $astral:ident,

        $slf:ident,

        $source:ident,

        $handle:ident,

        $copy_ascii:ident,

        $destination_check:ident,

        $ascii_punctuation:expr

    ) => {

        ascii_compatible_encoder_function!(

            $bmp_body,

            $astral_body,

            $bmp,

            $astral,

            $slf,

            $source,

            $handle,

            $copy_ascii,

            $destination_check,

            encode_from_utf8_raw,

            str,

            Utf8Source,

            $ascii_punctuation

);

        ascii_compatible_encoder_function!(

            $bmp_body,

            $astral_body,

            $bmp,

            $astral,

            $slf,

            $source,

            $handle,

            $copy_ascii,

            $destination_check,

            encode_from_utf16_raw,

            [u16],

            Utf16Source,

            $ascii_punctuation

);

};

macro_rules! ascii_compatible_bmp_encoder_function {

        $bmp_body:block,

        $bmp:ident,

        $slf:ident,

        $source:ident,

        $handle:ident,

        $copy_ascii:ident,

        $destination_check:ident,

        $name:ident,

        $input:ty,

        $source_struct:ident,

        $ascii_punctuation:expr

    ) => {

        ascii_compatible_encoder_function!(

            $bmp_body,

                return (

                    EncoderResult::Unmappable(astral),

                    $source.consumed(),

                    $handle.written(),

);

},

            $bmp,

            astral,

            $slf,

            $source,

            $handle,

            $copy_ascii,

            $destination_check,

            $name,

            $input,

            $source_struct,

            $ascii_punctuation

);

};

macro_rules! ascii_compatible_bmp_encoder_functions {

        $bmp_body:block,

        $bmp:ident,

        $slf:ident,

        $source:ident,

        $handle:ident,

        $copy_ascii:ident,

        $destination_check:ident,

        $ascii_punctuation:expr

    ) => {

        ascii_compatible_encoder_functions!(

            $bmp_body,

                return (

                    EncoderResult::Unmappable(astral),

                    $source.consumed(),

                    $handle.written(),

);

},

            $bmp,

            astral,

            $slf,

            $source,

            $handle,

            $copy_ascii,

            $destination_check,

            $ascii_punctuation

);

};

macro_rules! public_decode_function{

    ($(#[$meta:meta])*,

     $decode_to_utf:ident,

     $decode_to_utf_raw:ident,

     $decode_to_utf_checking_end:ident,

     $decode_to_utf_after_one_potential_bom_byte:ident,

     $decode_to_utf_after_two_potential_bom_bytes:ident,

     $decode_to_utf_checking_end_with_offset:ident,

     $code_unit:ty) => (

    $(#[$meta])*

    pub fn $decode_to_utf(&mut self,

                           src: &[u8],

                           dst: &mut [$code_unit],

                           last: bool)

                           -> (DecoderResult, usize, usize) {

        let mut offset = 0usize;

        loop {

            match self.life_cycle {

                // The common case. (Post-sniffing.)

                DecoderLifeCycle::Converting => {

                    return self.$decode_to_utf_checking_end(src, dst, last);

                // The rest is all BOM sniffing!

                DecoderLifeCycle::AtStart => {

                    debug_assert_eq!(offset, 0usize);

                    if src.is_empty() {

                        return (DecoderResult::InputEmpty, 0, 0);

                    match src[0] {

                        0xEFu8 => {

                            self.life_cycle = DecoderLifeCycle::SeenUtf8First;

                            offset += 1;

                            continue;

                        0xFEu8 => {

                            self.life_cycle = DecoderLifeCycle::SeenUtf16BeFirst;

                            offset += 1;

                            continue;

                        0xFFu8 => {

                            self.life_cycle = DecoderLifeCycle::SeenUtf16LeFirst;

                            offset += 1;

                            continue;

                        _ => {

                            self.life_cycle = DecoderLifeCycle::Converting;

                            continue;

                DecoderLifeCycle::AtUtf8Start => {

                    debug_assert_eq!(offset, 0usize);

                    if src.is_empty() {

                        return (DecoderResult::InputEmpty, 0, 0);

                    match src[0] {

                        0xEFu8 => {

                            self.life_cycle = DecoderLifeCycle::SeenUtf8First;

                            offset += 1;

                            continue;

                        _ => {

                            self.life_cycle = DecoderLifeCycle::Converting;

                            continue;

                DecoderLifeCycle::AtUtf16BeStart => {

                    debug_assert_eq!(offset, 0usize);

                    if src.is_empty() {

                        return (DecoderResult::InputEmpty, 0, 0);

                    match src[0] {

                        0xFEu8 => {

                            self.life_cycle = DecoderLifeCycle::SeenUtf16BeFirst;

                            offset += 1;

                            continue;

                        _ => {

                            self.life_cycle = DecoderLifeCycle::Converting;

                            continue;

                DecoderLifeCycle::AtUtf16LeStart => {

                    debug_assert_eq!(offset, 0usize);

                    if src.is_empty() {

                        return (DecoderResult::InputEmpty, 0, 0);

                    match src[0] {

                        0xFFu8 => {

                            self.life_cycle = DecoderLifeCycle::SeenUtf16LeFirst;

                            offset += 1;

                            continue;

                        _ => {

                            self.life_cycle = DecoderLifeCycle::Converting;

                            continue;

                DecoderLifeCycle::SeenUtf8First => {

                    if offset >= src.len() {

                        if last {

                            return self.$decode_to_utf_after_one_potential_bom_byte(src,

                                                                                    dst,

                                                                                    last,

                                                                                    offset,

                                                                                    0xEFu8);

                        return (DecoderResult::InputEmpty, offset, 0);

                    if src[offset] == 0xBBu8 {

                        self.life_cycle = DecoderLifeCycle::SeenUtf8Second;

                        offset += 1;

                        continue;

                    return self.$decode_to_utf_after_one_potential_bom_byte(src,

                                                                            dst,

                                                                            last,

                                                                            offset,

                                                                            0xEFu8);

                DecoderLifeCycle::SeenUtf8Second => {

                    if offset >= src.len() {

                        if last {

                            return self.$decode_to_utf_after_two_potential_bom_bytes(src,

                                                                                     dst,

                                                                                     last,

                                                                                     offset);

                        return (DecoderResult::InputEmpty, offset, 0);

                    if src[offset] == 0xBFu8 {

                        self.life_cycle = DecoderLifeCycle::Converting;

                        offset += 1;

                        if self.encoding != UTF_8 {

                            self.encoding = UTF_8;

                            self.variant = UTF_8.new_variant_decoder();

                        return self.$decode_to_utf_checking_end_with_offset(src,

                                                                            dst,

                                                                            last,

                                                                            offset);

                    return self.$decode_to_utf_after_two_potential_bom_bytes(src,

                                                                             dst,

                                                                             last,

                                                                             offset);

                DecoderLifeCycle::SeenUtf16BeFirst => {

                    if offset >= src.len() {

                        if last {

                            return self.$decode_to_utf_after_one_potential_bom_byte(src,

                                                                                    dst,

                                                                                    last,

                                                                                    offset,

                                                                                    0xFEu8);

                        return (DecoderResult::InputEmpty, offset, 0);

                    if src[offset] == 0xFFu8 {

                        self.life_cycle = DecoderLifeCycle::Converting;

                        offset += 1;

                        if self.encoding != UTF_16BE {

                            self.encoding = UTF_16BE;

                            self.variant = UTF_16BE.new_variant_decoder();

                        return self.$decode_to_utf_checking_end_with_offset(src,

                                                                            dst,

                                                                            last,

                                                                            offset);

                    return self.$decode_to_utf_after_one_potential_bom_byte(src,

                                                                            dst,

                                                                            last,

                                                                            offset,

                                                                            0xFEu8);

                DecoderLifeCycle::SeenUtf16LeFirst => {

                    if offset >= src.len() {

                        if last {

                            return self.$decode_to_utf_after_one_potential_bom_byte(src,

                                                                                    dst,

                                                                                    last,

                                                                                    offset,

                                                                                    0xFFu8);

                        return (DecoderResult::InputEmpty, offset, 0);

                    if src[offset] == 0xFEu8 {

                        self.life_cycle = DecoderLifeCycle::Converting;

                        offset += 1;

                        if self.encoding != UTF_16LE {

                            self.encoding = UTF_16LE;

                            self.variant = UTF_16LE.new_variant_decoder();

                        return self.$decode_to_utf_checking_end_with_offset(src,

                                                                            dst,

                                                                            last,

                                                                            offset);

                    return self.$decode_to_utf_after_one_potential_bom_byte(src,

                                                                            dst,

                                                                            last,

                                                                            offset,

                                                                            0xFFu8);

                DecoderLifeCycle::ConvertingWithPendingBB => {

                    debug_assert_eq!(offset, 0usize);

                    return self.$decode_to_utf_after_one_potential_bom_byte(src,

                                                                            dst,

                                                                            last,

                                                                            0usize,

                                                                            0xBBu8);

                DecoderLifeCycle::Finished => panic!("Must not use a decoder that has finished."),

    fn $decode_to_utf_after_one_potential_bom_byte(&mut self,

                                                   src: &[u8],

                                                   dst: &mut [$code_unit],

                                                   last: bool,

                                                   offset: usize,

                                                   first_byte: u8)

                                                   -> (DecoderResult, usize, usize) {

        self.life_cycle = DecoderLifeCycle::Converting;

        if offset == 0usize {

            // First byte was seen previously.

            let first = [first_byte];

            let mut out_read = 0usize;

            let (mut first_result, _, mut first_written) =

                self.variant

                    .$decode_to_utf_raw(&first[..], dst, false);

            match first_result {

                DecoderResult::InputEmpty => {

                    let (result, read, written) =

                        self.$decode_to_utf_checking_end(src, &mut dst[first_written..], last);

                    first_result = result;

                    out_read = read; // Overwrite, don't add!

                    first_written += written;

                DecoderResult::Malformed(_, _) => {

                    // Wasn't read from `src`!, leave out_read to 0

                DecoderResult::OutputFull => {

                    panic!("Output buffer must have been too small.");

            return (first_result, out_read, first_written);

        debug_assert_eq!(offset, 1usize);

        // The first byte is in `src`, so no need to push it separately.

        self.$decode_to_utf_checking_end(src, dst, last)

    fn $decode_to_utf_after_two_potential_bom_bytes(&mut self,

                                                    src: &[u8],

                                                    dst: &mut [$code_unit],

                                                    last: bool,

                                                    offset: usize)

                                                    -> (DecoderResult, usize, usize) {

        self.life_cycle = DecoderLifeCycle::Converting;

        if offset == 0usize {

            // The first two bytes are not in the current buffer..

            let ef_bb = [0xEFu8, 0xBBu8];

            let (mut first_result, mut first_read, mut first_written) =

                self.variant

                    .$decode_to_utf_raw(&ef_bb[..], dst, false);

            match first_result {

                DecoderResult::InputEmpty => {

                    let (result, read, written) =

                        self.$decode_to_utf_checking_end(src, &mut dst[first_written..], last);

                    first_result = result;

                    first_read = read; // Overwrite, don't add!

                    first_written += written;

                DecoderResult::Malformed(_, _) => {

                    if first_read == 1usize {

                        // The first byte was malformed. We need to handle

                        // the second one, which isn't in `src`, later.

                        self.life_cycle = DecoderLifeCycle::ConvertingWithPendingBB;

                    first_read = 0usize; // Wasn't read from `src`!

                DecoderResult::OutputFull => {

                    panic!("Output buffer must have been too small.");

            return (first_result, first_read, first_written);

        if offset == 1usize {

            // The first byte isn't in the current buffer but the second one

            // is.

            return self.$decode_to_utf_after_one_potential_bom_byte(src,

                                                                    dst,

                                                                    last,

                                                                    0usize,

                                                                    0xEFu8);

        debug_assert_eq!(offset, 2usize);

        // The first two bytes are in `src`, so no need to push them separately.

        self.$decode_to_utf_checking_end(src, dst, last)

    /// Calls `$decode_to_utf_checking_end` with `offset` bytes omitted from

    /// the start of `src` but adjusting the return values to show those bytes

    /// as having been consumed.

    fn $decode_to_utf_checking_end_with_offset(&mut self,

                                               src: &[u8],

                                               dst: &mut [$code_unit],

                                               last: bool,

                                               offset: usize)

                                               -> (DecoderResult, usize, usize) {

        debug_assert_eq!(self.life_cycle, DecoderLifeCycle::Converting);

        let (result, read, written) = self.$decode_to_utf_checking_end(&src[offset..], dst, last);

        (result, read + offset, written)

    /// Calls through to the delegate and adjusts life cycle iff `last` is

    /// `true` and result is `DecoderResult::InputEmpty`.

    fn $decode_to_utf_checking_end(&mut self,

                                   src: &[u8],

                                   dst: &mut [$code_unit],

                                   last: bool)

                                   -> (DecoderResult, usize, usize) {

        debug_assert_eq!(self.life_cycle, DecoderLifeCycle::Converting);

        let (result, read, written) = self.variant

                                          .$decode_to_utf_raw(src, dst, last);

        if last {

            if let DecoderResult::InputEmpty = result {

                self.life_cycle = DecoderLifeCycle::Finished;

        (result, read, written)

});

Revision control

Copy as Markdown

Other Tools