groupbylazy.rs - mozsearch

comm-central/third_party/rust/itertools/src/groupbylazy.rs

Enable keyboard shortcuts

Revision control

Copy as Markdown

Other Tools

HG Web

use std::cell::{Cell, RefCell};

use alloc::vec::{self, Vec};

/// A trait to unify `FnMut` for `GroupBy` with the chunk key in `IntoChunks`

trait KeyFunction<A> {

    type Key;

    fn call_mut(&mut self, arg: A) -> Self::Key;

impl<A, K, F: ?Sized> KeyFunction<A> for F

    where F: FnMut(A) -> K

    type Key = K;

    #[inline]

    fn call_mut(&mut self, arg: A) -> Self::Key {

        (*self)(arg)

/// `ChunkIndex` acts like the grouping key function for `IntoChunks`

#[derive(Debug)]

struct ChunkIndex {

    size: usize,

    index: usize,

    key: usize,

impl ChunkIndex {

    #[inline(always)]

    fn new(size: usize) -> Self {

        ChunkIndex {

            size,

            index: 0,

            key: 0,

impl<A> KeyFunction<A> for ChunkIndex {

    type Key = usize;

    #[inline(always)]

    fn call_mut(&mut self, _arg: A) -> Self::Key {

        if self.index == self.size {

            self.key += 1;

            self.index = 0;

        self.index += 1;

        self.key

struct GroupInner<K, I, F>

    where I: Iterator

    key: F,

    iter: I,

    current_key: Option<K>,

    current_elt: Option<I::Item>,

    /// flag set if iterator is exhausted

    done: bool,

    /// Index of group we are currently buffering or visiting

    top_group: usize,

    /// Least index for which we still have elements buffered

    oldest_buffered_group: usize,

    /// Group index for `buffer[0]` -- the slots

    /// bottom_group..oldest_buffered_group are unused and will be erased when

    /// that range is large enough.

    bottom_group: usize,

    /// Buffered groups, from `bottom_group` (index 0) to `top_group`.

    buffer: Vec<vec::IntoIter<I::Item>>,

    /// index of last group iter that was dropped, usize::MAX == none

    dropped_group: usize,

impl<K, I, F> GroupInner<K, I, F>

    where I: Iterator,

          F: for<'a> KeyFunction<&'a I::Item, Key=K>,

          K: PartialEq,

    /// `client`: Index of group that requests next element

    #[inline(always)]

    fn step(&mut self, client: usize) -> Option<I::Item> {

/*

        println!("client={}, bottom_group={}, oldest_buffered_group={}, top_group={}, buffers=[{}]",

                 client, self.bottom_group, self.oldest_buffered_group,

                 self.top_group,

                 self.buffer.iter().map(|elt| elt.len()).format(", "));

*/

        if client < self.oldest_buffered_group {

            None

        } else if client < self.top_group ||

            (client == self.top_group &&

             self.buffer.len() > self.top_group - self.bottom_group)

            self.lookup_buffer(client)

        } else if self.done {

            None

        } else if self.top_group == client {

            self.step_current()

        } else {

            self.step_buffering(client)

    #[inline(never)]

    fn lookup_buffer(&mut self, client: usize) -> Option<I::Item> {

        // if `bufidx` doesn't exist in self.buffer, it might be empty

        let bufidx = client - self.bottom_group;

        if client < self.oldest_buffered_group {

            return None;

        let elt = self.buffer.get_mut(bufidx).and_then(|queue| queue.next());

        if elt.is_none() && client == self.oldest_buffered_group {

            // FIXME: VecDeque is unfortunately not zero allocation when empty,

            // so we do this job manually.

            // `bottom_group..oldest_buffered_group` is unused, and if it's large enough, erase it.

            self.oldest_buffered_group += 1;

            // skip forward further empty queues too

            while self.buffer.get(self.oldest_buffered_group - self.bottom_group)

                             .map_or(false, |buf| buf.len() == 0)

                self.oldest_buffered_group += 1;

            let nclear = self.oldest_buffered_group - self.bottom_group;

            if nclear > 0 && nclear >= self.buffer.len() / 2 {

                let mut i = 0;

                self.buffer.retain(|buf| {

                    i += 1;

                    debug_assert!(buf.len() == 0 || i > nclear);

                    i > nclear

});

                self.bottom_group = self.oldest_buffered_group;

elt

    /// Take the next element from the iterator, and set the done

    /// flag if exhausted. Must not be called after done.

    #[inline(always)]

    fn next_element(&mut self) -> Option<I::Item> {

        debug_assert!(!self.done);

        match self.iter.next() {

            None => { self.done = true; None }

            otherwise => otherwise,

    #[inline(never)]

    fn step_buffering(&mut self, client: usize) -> Option<I::Item> {

        // requested a later group -- walk through the current group up to

        // the requested group index, and buffer the elements (unless

        // the group is marked as dropped).

        // Because the `Groups` iterator is always the first to request

        // each group index, client is the next index efter top_group.

        debug_assert!(self.top_group + 1 == client);

        let mut group = Vec::new();

        if let Some(elt) = self.current_elt.take() {

            if self.top_group != self.dropped_group {

                group.push(elt);

        let mut first_elt = None; // first element of the next group

        while let Some(elt) = self.next_element() {

            let key = self.key.call_mut(&elt);

            match self.current_key.take() {

                None => {}

                Some(old_key) => if old_key != key {

                    self.current_key = Some(key);

                    first_elt = Some(elt);

                    break;

},

            self.current_key = Some(key);

            if self.top_group != self.dropped_group {

                group.push(elt);

        if self.top_group != self.dropped_group {

            self.push_next_group(group);

        if first_elt.is_some() {

            self.top_group += 1;

            debug_assert!(self.top_group == client);

        first_elt

    fn push_next_group(&mut self, group: Vec<I::Item>) {

        // When we add a new buffered group, fill up slots between oldest_buffered_group and top_group

        while self.top_group - self.bottom_group > self.buffer.len() {

            if self.buffer.is_empty() {

                self.bottom_group += 1;

                self.oldest_buffered_group += 1;

            } else {

                self.buffer.push(Vec::new().into_iter());

        self.buffer.push(group.into_iter());

        debug_assert!(self.top_group + 1 - self.bottom_group == self.buffer.len());

    /// This is the immediate case, where we use no buffering

    #[inline]

    fn step_current(&mut self) -> Option<I::Item> {

        debug_assert!(!self.done);

        if let elt @ Some(..) = self.current_elt.take() {

            return elt;

        match self.next_element() {

            None => None,

            Some(elt) => {

                let key = self.key.call_mut(&elt);

                match self.current_key.take() {

                    None => {}

                    Some(old_key) => if old_key != key {

                        self.current_key = Some(key);

                        self.current_elt = Some(elt);

                        self.top_group += 1;

                        return None;

},

                self.current_key = Some(key);

                Some(elt)

    /// Request the just started groups' key.

///

    /// `client`: Index of group

///

    /// **Panics** if no group key is available.

    fn group_key(&mut self, client: usize) -> K {

        // This can only be called after we have just returned the first

        // element of a group.

        // Perform this by simply buffering one more element, grabbing the

        // next key.

        debug_assert!(!self.done);

        debug_assert!(client == self.top_group);

        debug_assert!(self.current_key.is_some());

        debug_assert!(self.current_elt.is_none());

        let old_key = self.current_key.take().unwrap();

        if let Some(elt) = self.next_element() {

            let key = self.key.call_mut(&elt);

            if old_key != key {

                self.top_group += 1;

            self.current_key = Some(key);

            self.current_elt = Some(elt);

        old_key

impl<K, I, F> GroupInner<K, I, F>

    where I: Iterator,

    /// Called when a group is dropped

    fn drop_group(&mut self, client: usize) {

        // It's only useful to track the maximal index

        if self.dropped_group == !0 || client > self.dropped_group {

            self.dropped_group = client;

/// `GroupBy` is the storage for the lazy grouping operation.

///

/// If the groups are consumed in their original order, or if each

/// group is dropped without keeping it around, then `GroupBy` uses

/// no allocations. It needs allocations only if several group iterators

/// are alive at the same time.

///

/// This type implements [`IntoIterator`] (it is **not** an iterator

/// itself), because the group iterators need to borrow from this

/// value. It should be stored in a local variable or temporary and

/// iterated.

///

/// See [`.group_by()`](crate::Itertools::group_by) for more information.

#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]

pub struct GroupBy<K, I, F>

    where I: Iterator,

    inner: RefCell<GroupInner<K, I, F>>,

    // the group iterator's current index. Keep this in the main value

    // so that simultaneous iterators all use the same state.

    index: Cell<usize>,

/// Create a new

pub fn new<K, J, F>(iter: J, f: F) -> GroupBy<K, J::IntoIter, F>

    where J: IntoIterator,

          F: FnMut(&J::Item) -> K,

    GroupBy {

        inner: RefCell::new(GroupInner {

            key: f,

            iter: iter.into_iter(),

            current_key: None,

            current_elt: None,

            done: false,

            top_group: 0,

            oldest_buffered_group: 0,

            bottom_group: 0,

            buffer: Vec::new(),

            dropped_group: !0,

}),

        index: Cell::new(0),

impl<K, I, F> GroupBy<K, I, F>

    where I: Iterator,

    /// `client`: Index of group that requests next element

    fn step(&self, client: usize) -> Option<I::Item>

        where F: FnMut(&I::Item) -> K,

              K: PartialEq,

        self.inner.borrow_mut().step(client)

    /// `client`: Index of group

    fn drop_group(&self, client: usize) {

        self.inner.borrow_mut().drop_group(client);

impl<'a, K, I, F> IntoIterator for &'a GroupBy<K, I, F>

    where I: Iterator,

          I::Item: 'a,

          F: FnMut(&I::Item) -> K,

          K: PartialEq

    type Item = (K, Group<'a, K, I, F>);

    type IntoIter = Groups<'a, K, I, F>;

    fn into_iter(self) -> Self::IntoIter {

        Groups { parent: self }

/// An iterator that yields the Group iterators.

///

/// Iterator element type is `(K, Group)`:

/// the group's key `K` and the group's iterator.

///

/// See [`.group_by()`](crate::Itertools::group_by) for more information.

#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]

pub struct Groups<'a, K: 'a, I: 'a, F: 'a>

    where I: Iterator,

          I::Item: 'a

    parent: &'a GroupBy<K, I, F>,

impl<'a, K, I, F> Iterator for Groups<'a, K, I, F>

    where I: Iterator,

          I::Item: 'a,

          F: FnMut(&I::Item) -> K,

          K: PartialEq

    type Item = (K, Group<'a, K, I, F>);

    #[inline]

    fn next(&mut self) -> Option<Self::Item> {

        let index = self.parent.index.get();

        self.parent.index.set(index + 1);

        let inner = &mut *self.parent.inner.borrow_mut();

        inner.step(index).map(|elt| {

            let key = inner.group_key(index);

            (key, Group {

                parent: self.parent,

                index,

                first: Some(elt),

})

})

/// An iterator for the elements in a single group.

///

/// Iterator element type is `I::Item`.

pub struct Group<'a, K: 'a, I: 'a, F: 'a>

    where I: Iterator,

          I::Item: 'a,

    parent: &'a GroupBy<K, I, F>,

    index: usize,

    first: Option<I::Item>,

impl<'a, K, I, F> Drop for Group<'a, K, I, F>

    where I: Iterator,

          I::Item: 'a,

    fn drop(&mut self) {

        self.parent.drop_group(self.index);

impl<'a, K, I, F> Iterator for Group<'a, K, I, F>

    where I: Iterator,

          I::Item: 'a,

          F: FnMut(&I::Item) -> K,

          K: PartialEq,

    type Item = I::Item;

    #[inline]

    fn next(&mut self) -> Option<Self::Item> {

        if let elt @ Some(..) = self.first.take() {

            return elt;

        self.parent.step(self.index)

///// IntoChunks /////

/// Create a new

pub fn new_chunks<J>(iter: J, size: usize) -> IntoChunks<J::IntoIter>

    where J: IntoIterator,

    IntoChunks {

        inner: RefCell::new(GroupInner {

            key: ChunkIndex::new(size),

            iter: iter.into_iter(),

            current_key: None,

            current_elt: None,

            done: false,

            top_group: 0,

            oldest_buffered_group: 0,

            bottom_group: 0,

            buffer: Vec::new(),

            dropped_group: !0,

}),

        index: Cell::new(0),

/// `ChunkLazy` is the storage for a lazy chunking operation.

///

/// `IntoChunks` behaves just like `GroupBy`: it is iterable, and

/// it only buffers if several chunk iterators are alive at the same time.

///

/// This type implements [`IntoIterator`] (it is **not** an iterator

/// itself), because the chunk iterators need to borrow from this

/// value. It should be stored in a local variable or temporary and

/// iterated.

///

/// Iterator element type is `Chunk`, each chunk's iterator.

///

/// See [`.chunks()`](crate::Itertools::chunks) for more information.

#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]

pub struct IntoChunks<I>

    where I: Iterator,

    inner: RefCell<GroupInner<usize, I, ChunkIndex>>,

    // the chunk iterator's current index. Keep this in the main value

    // so that simultaneous iterators all use the same state.

    index: Cell<usize>,

impl<I> IntoChunks<I>

    where I: Iterator,

    /// `client`: Index of chunk that requests next element

    fn step(&self, client: usize) -> Option<I::Item> {

        self.inner.borrow_mut().step(client)

    /// `client`: Index of chunk

    fn drop_group(&self, client: usize) {

        self.inner.borrow_mut().drop_group(client);

impl<'a, I> IntoIterator for &'a IntoChunks<I>

    where I: Iterator,

          I::Item: 'a,

    type Item = Chunk<'a, I>;

    type IntoIter = Chunks<'a, I>;

    fn into_iter(self) -> Self::IntoIter {

        Chunks {

            parent: self,

/// An iterator that yields the Chunk iterators.

///

/// Iterator element type is `Chunk`.

///

/// See [`.chunks()`](crate::Itertools::chunks) for more information.

#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]

pub struct Chunks<'a, I: 'a>

    where I: Iterator,

          I::Item: 'a,

    parent: &'a IntoChunks<I>,

impl<'a, I> Iterator for Chunks<'a, I>

    where I: Iterator,

          I::Item: 'a,

    type Item = Chunk<'a, I>;

    #[inline]

    fn next(&mut self) -> Option<Self::Item> {

        let index = self.parent.index.get();

        self.parent.index.set(index + 1);

        let inner = &mut *self.parent.inner.borrow_mut();

        inner.step(index).map(|elt| {

            Chunk {

                parent: self.parent,

                index,

                first: Some(elt),

})

/// An iterator for the elements in a single chunk.

///

/// Iterator element type is `I::Item`.

pub struct Chunk<'a, I: 'a>

    where I: Iterator,

          I::Item: 'a,

    parent: &'a IntoChunks<I>,

    index: usize,

    first: Option<I::Item>,

impl<'a, I> Drop for Chunk<'a, I>

    where I: Iterator,

          I::Item: 'a,

    fn drop(&mut self) {

        self.parent.drop_group(self.index);

impl<'a, I> Iterator for Chunk<'a, I>

    where I: Iterator,

          I::Item: 'a,

    type Item = I::Item;

    #[inline]

    fn next(&mut self) -> Option<Self::Item> {

        if let elt @ Some(..) = self.first.take() {

            return elt;

        self.parent.step(self.index)