decode.rs - mozsearch

comm-central/third_party/rust/base64/src/engine/general_purpose/decode.rs

Enable keyboard shortcuts

Revision control

Copy as Markdown

Other Tools

HG Web

use crate::{

    engine::{general_purpose::INVALID_VALUE, DecodeEstimate, DecodeMetadata, DecodePaddingMode},

    DecodeError, PAD_BYTE,

};

// decode logic operates on chunks of 8 input bytes without padding

const INPUT_CHUNK_LEN: usize = 8;

const DECODED_CHUNK_LEN: usize = 6;

// we read a u64 and write a u64, but a u64 of input only yields 6 bytes of output, so the last

// 2 bytes of any output u64 should not be counted as written to (but must be available in a

// slice).

const DECODED_CHUNK_SUFFIX: usize = 2;

// how many u64's of input to handle at a time

const CHUNKS_PER_FAST_LOOP_BLOCK: usize = 4;

const INPUT_BLOCK_LEN: usize = CHUNKS_PER_FAST_LOOP_BLOCK * INPUT_CHUNK_LEN;

// includes the trailing 2 bytes for the final u64 write

const DECODED_BLOCK_LEN: usize =

    CHUNKS_PER_FAST_LOOP_BLOCK * DECODED_CHUNK_LEN + DECODED_CHUNK_SUFFIX;

#[doc(hidden)]

pub struct GeneralPurposeEstimate {

    /// Total number of decode chunks, including a possibly partial last chunk

    num_chunks: usize,

    decoded_len_estimate: usize,

impl GeneralPurposeEstimate {

    pub(crate) fn new(encoded_len: usize) -> Self {

        // Formulas that won't overflow

        Self {

            num_chunks: encoded_len / INPUT_CHUNK_LEN

                + (encoded_len % INPUT_CHUNK_LEN > 0) as usize,

            decoded_len_estimate: (encoded_len / 4 + (encoded_len % 4 > 0) as usize) * 3,

impl DecodeEstimate for GeneralPurposeEstimate {

    fn decoded_len_estimate(&self) -> usize {

        self.decoded_len_estimate

/// Helper to avoid duplicating num_chunks calculation, which is costly on short inputs.

/// Returns the decode metadata, or an error.

// We're on the fragile edge of compiler heuristics here. If this is not inlined, slow. If this is

// inlined(always), a different slow. plain ol' inline makes the benchmarks happiest at the moment,

// but this is fragile and the best setting changes with only minor code modifications.

#[inline]

pub(crate) fn decode_helper(

    input: &[u8],

    estimate: GeneralPurposeEstimate,

    output: &mut [u8],

    decode_table: &[u8; 256],

    decode_allow_trailing_bits: bool,

    padding_mode: DecodePaddingMode,

) -> Result<DecodeMetadata, DecodeError> {

    let remainder_len = input.len() % INPUT_CHUNK_LEN;

    // Because the fast decode loop writes in groups of 8 bytes (unrolled to

    // CHUNKS_PER_FAST_LOOP_BLOCK times 8 bytes, where possible) and outputs 8 bytes at a time (of

    // which only 6 are valid data), we need to be sure that we stop using the fast decode loop

    // soon enough that there will always be 2 more bytes of valid data written after that loop.

    let trailing_bytes_to_skip = match remainder_len {

        // if input is a multiple of the chunk size, ignore the last chunk as it may have padding,

        // and the fast decode logic cannot handle padding

        0 => INPUT_CHUNK_LEN,

        // 1 and 5 trailing bytes are illegal: can't decode 6 bits of input into a byte

        1 | 5 => {

            // trailing whitespace is so common that it's worth it to check the last byte to

            // possibly return a better error message

            if let Some(b) = input.last() {

                if *b != PAD_BYTE && decode_table[*b as usize] == INVALID_VALUE {

                    return Err(DecodeError::InvalidByte(input.len() - 1, *b));

            return Err(DecodeError::InvalidLength);

        // This will decode to one output byte, which isn't enough to overwrite the 2 extra bytes

        // written by the fast decode loop. So, we have to ignore both these 2 bytes and the

        // previous chunk.

        2 => INPUT_CHUNK_LEN + 2,

        // If this is 3 un-padded chars, then it would actually decode to 2 bytes. However, if this

        // is an erroneous 2 chars + 1 pad char that would decode to 1 byte, then it should fail

        // with an error, not panic from going past the bounds of the output slice, so we let it

        // use stage 3 + 4.

        3 => INPUT_CHUNK_LEN + 3,

        // This can also decode to one output byte because it may be 2 input chars + 2 padding

        // chars, which would decode to 1 byte.

        4 => INPUT_CHUNK_LEN + 4,

        // Everything else is a legal decode len (given that we don't require padding), and will

        // decode to at least 2 bytes of output.

        _ => remainder_len,

};

    // rounded up to include partial chunks

    let mut remaining_chunks = estimate.num_chunks;

    let mut input_index = 0;

    let mut output_index = 0;

        let length_of_fast_decode_chunks = input.len().saturating_sub(trailing_bytes_to_skip);

        // Fast loop, stage 1

        // manual unroll to CHUNKS_PER_FAST_LOOP_BLOCK of u64s to amortize slice bounds checks

        if let Some(max_start_index) = length_of_fast_decode_chunks.checked_sub(INPUT_BLOCK_LEN) {

            while input_index <= max_start_index {

                let input_slice = &input[input_index..(input_index + INPUT_BLOCK_LEN)];

                let output_slice = &mut output[output_index..(output_index + DECODED_BLOCK_LEN)];

                decode_chunk(

                    &input_slice[0..],

                    input_index,

                    decode_table,

                    &mut output_slice[0..],

)?;

                decode_chunk(

                    &input_slice[8..],

                    input_index + 8,

                    decode_table,

                    &mut output_slice[6..],

)?;

                decode_chunk(

                    &input_slice[16..],

                    input_index + 16,

                    decode_table,

                    &mut output_slice[12..],

)?;

                decode_chunk(

                    &input_slice[24..],

                    input_index + 24,

                    decode_table,

                    &mut output_slice[18..],

)?;

                input_index += INPUT_BLOCK_LEN;

                output_index += DECODED_BLOCK_LEN - DECODED_CHUNK_SUFFIX;

                remaining_chunks -= CHUNKS_PER_FAST_LOOP_BLOCK;

        // Fast loop, stage 2 (aka still pretty fast loop)

        // 8 bytes at a time for whatever we didn't do in stage 1.

        if let Some(max_start_index) = length_of_fast_decode_chunks.checked_sub(INPUT_CHUNK_LEN) {

            while input_index < max_start_index {

                decode_chunk(

                    &input[input_index..(input_index + INPUT_CHUNK_LEN)],

                    input_index,

                    decode_table,

                    &mut output

                        [output_index..(output_index + DECODED_CHUNK_LEN + DECODED_CHUNK_SUFFIX)],

)?;

                output_index += DECODED_CHUNK_LEN;

                input_index += INPUT_CHUNK_LEN;

                remaining_chunks -= 1;

    // Stage 3

    // If input length was such that a chunk had to be deferred until after the fast loop

    // because decoding it would have produced 2 trailing bytes that wouldn't then be

    // overwritten, we decode that chunk here. This way is slower but doesn't write the 2

    // trailing bytes.

    // However, we still need to avoid the last chunk (partial or complete) because it could

    // have padding, so we always do 1 fewer to avoid the last chunk.

    for _ in 1..remaining_chunks {

        decode_chunk_precise(

            &input[input_index..],

            input_index,

            decode_table,

            &mut output[output_index..(output_index + DECODED_CHUNK_LEN)],

)?;

        input_index += INPUT_CHUNK_LEN;

        output_index += DECODED_CHUNK_LEN;

    // always have one more (possibly partial) block of 8 input

    debug_assert!(input.len() - input_index > 1 || input.is_empty());

    debug_assert!(input.len() - input_index <= 8);

    super::decode_suffix::decode_suffix(

        input,

        input_index,

        output,

        output_index,

        decode_table,

        decode_allow_trailing_bits,

        padding_mode,

/// Decode 8 bytes of input into 6 bytes of output. 8 bytes of output will be written, but only the

/// first 6 of those contain meaningful data.

///

/// `input` is the bytes to decode, of which the first 8 bytes will be processed.

/// `index_at_start_of_input` is the offset in the overall input (used for reporting errors

/// accurately)

/// `decode_table` is the lookup table for the particular base64 alphabet.

/// `output` will have its first 8 bytes overwritten, of which only the first 6 are valid decoded

/// data.

// yes, really inline (worth 30-50% speedup)

#[inline(always)]

fn decode_chunk(

    input: &[u8],

    index_at_start_of_input: usize,

    decode_table: &[u8; 256],

    output: &mut [u8],

) -> Result<(), DecodeError> {

    let morsel = decode_table[input[0] as usize];

    if morsel == INVALID_VALUE {

        return Err(DecodeError::InvalidByte(index_at_start_of_input, input[0]));

    let mut accum = (morsel as u64) << 58;

    let morsel = decode_table[input[1] as usize];

    if morsel == INVALID_VALUE {

        return Err(DecodeError::InvalidByte(

            index_at_start_of_input + 1,

            input[1],

));

    accum |= (morsel as u64) << 52;

    let morsel = decode_table[input[2] as usize];

    if morsel == INVALID_VALUE {

        return Err(DecodeError::InvalidByte(

            index_at_start_of_input + 2,

            input[2],

));

    accum |= (morsel as u64) << 46;

    let morsel = decode_table[input[3] as usize];

    if morsel == INVALID_VALUE {

        return Err(DecodeError::InvalidByte(

            index_at_start_of_input + 3,

            input[3],

));

    accum |= (morsel as u64) << 40;

    let morsel = decode_table[input[4] as usize];

    if morsel == INVALID_VALUE {

        return Err(DecodeError::InvalidByte(

            index_at_start_of_input + 4,

            input[4],

));

    accum |= (morsel as u64) << 34;

    let morsel = decode_table[input[5] as usize];

    if morsel == INVALID_VALUE {

        return Err(DecodeError::InvalidByte(

            index_at_start_of_input + 5,

            input[5],

));

    accum |= (morsel as u64) << 28;

    let morsel = decode_table[input[6] as usize];

    if morsel == INVALID_VALUE {

        return Err(DecodeError::InvalidByte(

            index_at_start_of_input + 6,

            input[6],

));

    accum |= (morsel as u64) << 22;

    let morsel = decode_table[input[7] as usize];

    if morsel == INVALID_VALUE {

        return Err(DecodeError::InvalidByte(

            index_at_start_of_input + 7,

            input[7],

));

    accum |= (morsel as u64) << 16;

    write_u64(output, accum);

    Ok(())

/// Decode an 8-byte chunk, but only write the 6 bytes actually decoded instead of including 2

/// trailing garbage bytes.

#[inline]

fn decode_chunk_precise(

    input: &[u8],

    index_at_start_of_input: usize,

    decode_table: &[u8; 256],

    output: &mut [u8],

) -> Result<(), DecodeError> {

    let mut tmp_buf = [0_u8; 8];

    decode_chunk(

        input,

        index_at_start_of_input,

        decode_table,

        &mut tmp_buf[..],

)?;

    output[0..6].copy_from_slice(&tmp_buf[0..6]);

    Ok(())

#[inline]

fn write_u64(output: &mut [u8], value: u64) {

    output[..8].copy_from_slice(&value.to_be_bytes());

#[cfg(test)]

mod tests {

    use super::*;

    use crate::engine::general_purpose::STANDARD;

    #[test]

    fn decode_chunk_precise_writes_only_6_bytes() {

        let input = b"Zm9vYmFy"; // "foobar"

        let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7];

        decode_chunk_precise(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();

        assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 6, 7], &output);

    #[test]

    fn decode_chunk_writes_8_bytes() {

        let input = b"Zm9vYmFy"; // "foobar"

        let mut output = [0_u8, 1, 2, 3, 4, 5, 6, 7];

        decode_chunk(&input[..], 0, &STANDARD.decode_table, &mut output).unwrap();

        assert_eq!(&vec![b'f', b'o', b'o', b'b', b'a', b'r', 0, 0], &output);

    #[test]

    fn estimate_short_lengths() {

        for (range, (num_chunks, decoded_len_estimate)) in [

            (0..=0, (0, 0)),

            (1..=4, (1, 3)),

            (5..=8, (1, 6)),

            (9..=12, (2, 9)),

            (13..=16, (2, 12)),

            (17..=20, (3, 15)),

] {

            for encoded_len in range {

                let estimate = GeneralPurposeEstimate::new(encoded_len);

                assert_eq!(num_chunks, estimate.num_chunks);

                assert_eq!(decoded_len_estimate, estimate.decoded_len_estimate);

    #[test]

    fn estimate_via_u128_inflation() {

        // cover both ends of usize

        (0..1000)

            .chain(usize::MAX - 1000..=usize::MAX)

            .for_each(|encoded_len| {

                // inflate to 128 bit type to be able to safely use the easy formulas

                let len_128 = encoded_len as u128;

                let estimate = GeneralPurposeEstimate::new(encoded_len);

                assert_eq!(

                    ((len_128 + (INPUT_CHUNK_LEN - 1) as u128) / (INPUT_CHUNK_LEN as u128))

                        as usize,

                    estimate.num_chunks

);

                assert_eq!(

                    ((len_128 + 3) / 4 * 3) as usize,

                    estimate.decoded_len_estimate

);

})