AudioSegment.h - mozsearch

mozilla-central/dom/media/AudioSegment.h (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this file,

 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef MOZILLA_AUDIOSEGMENT_H_

#define MOZILLA_AUDIOSEGMENT_H_

#include <speex/speex_resampler.h>

#include "MediaSegment.h"

#include "AudioSampleFormat.h"

#include "AudioChannelFormat.h"

#include "SharedBuffer.h"

#include "WebAudioUtils.h"

#include "mozilla/ScopeExit.h"

#include "nsAutoRef.h"

#ifdef MOZILLA_INTERNAL_API

#  include "mozilla/TimeStamp.h"

#endif

#include <float.h>

namespace mozilla {

struct AudioChunk;

class AudioSegment;

}  // namespace mozilla

MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioChunk)

/**

 * This allows compilation of nsTArray<AudioSegment> and

 * AutoTArray<AudioSegment> since without it, static analysis fails on the

 * mChunks member being a non-memmovable AutoTArray.

 * Note that AudioSegment(const AudioSegment&) is deleted, so this should

 * never come into effect.

*/

MOZ_DECLARE_RELOCATE_USING_MOVE_CONSTRUCTOR(mozilla::AudioSegment)

namespace mozilla {

template <typename T>

class SharedChannelArrayBuffer : public ThreadSharedObject {

 public:

  explicit SharedChannelArrayBuffer(nsTArray<nsTArray<T> >&& aBuffers)

      : mBuffers(std::move(aBuffers)) {}

  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {

    size_t amount = 0;

    amount += mBuffers.ShallowSizeOfExcludingThis(aMallocSizeOf);

    for (size_t i = 0; i < mBuffers.Length(); i++) {

      amount += mBuffers[i].ShallowSizeOfExcludingThis(aMallocSizeOf);

    return amount;

  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {

    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);

  nsTArray<nsTArray<T> > mBuffers;

};

class AudioMixer;

/**

 * For auto-arrays etc, guess this as the common number of channels.

*/

const int GUESS_AUDIO_CHANNELS = 2;

// We ensure that the graph advances in steps that are multiples of the Web

// Audio block size

const uint32_t WEBAUDIO_BLOCK_SIZE_BITS = 7;

const uint32_t WEBAUDIO_BLOCK_SIZE = 1 << WEBAUDIO_BLOCK_SIZE_BITS;

template <typename SrcT, typename DestT>

static void InterleaveAndConvertBuffer(const SrcT* const* aSourceChannels,

                                       uint32_t aLength, float aVolume,

                                       uint32_t aChannels, DestT* aOutput) {

  DestT* output = aOutput;

  for (size_t i = 0; i < aLength; ++i) {

    for (size_t channel = 0; channel < aChannels; ++channel) {

      float v =

          ConvertAudioSample<float>(aSourceChannels[channel][i]) * aVolume;

      *output = FloatToAudioSample<DestT>(v);

      ++output;

template <typename SrcT, typename DestT>

static void DeinterleaveAndConvertBuffer(const SrcT* aSourceBuffer,

                                         uint32_t aFrames, uint32_t aChannels,

                                         DestT** aOutput) {

  for (size_t i = 0; i < aChannels; i++) {

    size_t interleavedIndex = i;

    for (size_t j = 0; j < aFrames; j++) {

      aOutput[i][j] =

          ConvertAudioSample<DestT>(aSourceBuffer[interleavedIndex]);

      interleavedIndex += aChannels;

class SilentChannel {

 public:

  static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */

  static const uint8_t

      gZeroChannel[MAX_AUDIO_SAMPLE_SIZE * AUDIO_PROCESSING_FRAMES];

  // We take advantage of the fact that zero in float and zero in int have the

  // same all-zeros bit layout.

  template <typename T>

  static const T* ZeroChannel();

};

/**

 * Given an array of input channels (aChannelData), downmix to aOutputChannels,

 * interleave the channel data. A total of aOutputChannels*aDuration

 * interleaved samples will be copied to a channel buffer in aOutput.

*/

template <typename SrcT, typename DestT>

void DownmixAndInterleave(Span<const SrcT* const> aChannelData,

                          int32_t aDuration, float aVolume,

                          uint32_t aOutputChannels, DestT* aOutput) {

  if (aChannelData.Length() == aOutputChannels) {

    InterleaveAndConvertBuffer(aChannelData.Elements(), aDuration, aVolume,

                               aOutputChannels, aOutput);

  } else {

    AutoTArray<SrcT*, GUESS_AUDIO_CHANNELS> outputChannelData;

    AutoTArray<SrcT,

               SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>

        outputBuffers;

    outputChannelData.SetLength(aOutputChannels);

    outputBuffers.SetLength(aDuration * aOutputChannels);

    for (uint32_t i = 0; i < aOutputChannels; i++) {

      outputChannelData[i] = outputBuffers.Elements() + aDuration * i;

    AudioChannelsDownMix<SrcT, SrcT>(aChannelData, outputChannelData,

                                     aDuration);

    InterleaveAndConvertBuffer(outputChannelData.Elements(), aDuration, aVolume,

                               aOutputChannels, aOutput);

/**

 * An AudioChunk represents a multi-channel buffer of audio samples.

 * It references an underlying ThreadSharedObject which manages the lifetime

 * of the buffer. An AudioChunk maintains its own duration and channel data

 * pointers so it can represent a subinterval of a buffer without copying.

 * An AudioChunk can store its individual channels anywhere; it maintains

 * separate pointers to each channel's buffer.

*/

struct AudioChunk {

  using SampleFormat = mozilla::AudioSampleFormat;

  AudioChunk() = default;

  template <typename T>

  AudioChunk(already_AddRefed<ThreadSharedObject> aBuffer,

             const nsTArray<const T*>& aChannelData, TrackTime aDuration,

             PrincipalHandle aPrincipalHandle)

      : mDuration(aDuration),

        mBuffer(aBuffer),

        mBufferFormat(AudioSampleTypeToFormat<T>::Format),

        mPrincipalHandle(std::move(aPrincipalHandle)) {

    MOZ_ASSERT(!mBuffer == aChannelData.IsEmpty(), "Appending invalid data ?");

    for (const T* data : aChannelData) {

      mChannelData.AppendElement(data);

  // Generic methods

  void SliceTo(TrackTime aStart, TrackTime aEnd) {

    MOZ_ASSERT(aStart >= 0, "Slice out of bounds: invalid start");

    MOZ_ASSERT(aStart < aEnd, "Slice out of bounds: invalid range");

    MOZ_ASSERT(aEnd <= mDuration, "Slice out of bounds: invalid end");

    if (mBuffer) {

      MOZ_ASSERT(aStart < INT32_MAX,

                 "Can't slice beyond 32-bit sample lengths");

      for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {

        mChannelData[channel] = AddAudioSampleOffset(

            mChannelData[channel], mBufferFormat, int32_t(aStart));

    mDuration = aEnd - aStart;

  TrackTime GetDuration() const { return mDuration; }

  bool CanCombineWithFollowing(const AudioChunk& aOther) const {

    if (aOther.mBuffer != mBuffer) {

      return false;

    if (!mBuffer) {

      return true;

    if (aOther.mVolume != mVolume) {

      return false;

    if (aOther.mPrincipalHandle != mPrincipalHandle) {

      return false;

    NS_ASSERTION(aOther.mBufferFormat == mBufferFormat,

                 "Wrong metadata about buffer");

    NS_ASSERTION(aOther.mChannelData.Length() == mChannelData.Length(),

                 "Mismatched channel count");

    if (mDuration > INT32_MAX) {

      return false;

    for (uint32_t channel = 0; channel < mChannelData.Length(); ++channel) {

      if (aOther.mChannelData[channel] !=

          AddAudioSampleOffset(mChannelData[channel], mBufferFormat,

                               int32_t(mDuration))) {

        return false;

    return true;

  bool IsNull() const { return mBuffer == nullptr; }

  void SetNull(TrackTime aDuration) {

    mBuffer = nullptr;

    mChannelData.Clear();

    mDuration = aDuration;

    mVolume = 1.0f;

    mBufferFormat = AUDIO_FORMAT_SILENCE;

    mPrincipalHandle = PRINCIPAL_HANDLE_NONE;

  uint32_t ChannelCount() const { return mChannelData.Length(); }

  bool IsMuted() const { return mVolume == 0.0f; }

  size_t SizeOfExcludingThisIfUnshared(MallocSizeOf aMallocSizeOf) const {

    return SizeOfExcludingThis(aMallocSizeOf, true);

  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf, bool aUnshared) const {

    size_t amount = 0;

    // Possibly owned:

    // - mBuffer - Can hold data that is also in the decoded audio queue. If it

    //             is not shared, or unshared == false it gets counted.

    if (mBuffer && (!aUnshared || !mBuffer->IsShared())) {

      amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);

    // Memory in the array is owned by mBuffer.

    amount += mChannelData.ShallowSizeOfExcludingThis(aMallocSizeOf);

    return amount;

  template <typename T>

  Span<const T* const> ChannelData() const {

    MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);

    return Span(reinterpret_cast<const T* const*>(mChannelData.Elements()),

                mChannelData.Length());

/**

   * ChannelFloatsForWrite() should be used only when mBuffer is owned solely

   * by the calling thread.

*/

  template <typename T>

  T* ChannelDataForWrite(size_t aChannel) {

    MOZ_ASSERT(AudioSampleTypeToFormat<T>::Format == mBufferFormat);

    MOZ_ASSERT(!mBuffer->IsShared());

    return static_cast<T*>(const_cast<void*>(mChannelData[aChannel]));

  template <typename T>

  static AudioChunk FromInterleavedBuffer(

      const T* aBuffer, size_t aFrames, uint32_t aChannels,

      const PrincipalHandle& aPrincipalHandle) {

    CheckedInt<size_t> bufferSize(sizeof(T));

    bufferSize *= aFrames;

    bufferSize *= aChannels;

    RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize);

    AutoTArray<T*, 8> deinterleaved;

    if (aChannels == 1) {

      PodCopy(static_cast<T*>(buffer->Data()), aBuffer, aFrames);

      deinterleaved.AppendElement(static_cast<T*>(buffer->Data()));

    } else {

      deinterleaved.SetLength(aChannels);

      T* samples = static_cast<T*>(buffer->Data());

      size_t offset = 0;

      for (uint32_t i = 0; i < aChannels; ++i) {

        deinterleaved[i] = samples + offset;

        offset += aFrames;

      DeinterleaveAndConvertBuffer(aBuffer, static_cast<uint32_t>(aFrames),

                                   aChannels, deinterleaved.Elements());

    AutoTArray<const T*, GUESS_AUDIO_CHANNELS> channelData;

    channelData.AppendElements(deinterleaved);

    return AudioChunk(buffer.forget(), channelData,

                      static_cast<TrackTime>(aFrames), aPrincipalHandle);

  const PrincipalHandle& GetPrincipalHandle() const { return mPrincipalHandle; }

  // aOutputChannels must contain pointers to channel data of length mDuration.

  void DownMixTo(Span<AudioDataValue* const> aOutputChannels) const;

  TrackTime mDuration = 0;             // in frames within the buffer

  RefPtr<ThreadSharedObject> mBuffer;  // the buffer object whose lifetime is

                                       // managed; null means data is all zeroes

  // one pointer per channel; empty if and only if mBuffer is null

  CopyableAutoTArray<const void*, GUESS_AUDIO_CHANNELS> mChannelData;

  float mVolume = 1.0f;  // volume multiplier to apply

  // format of frames in mBuffer (or silence if mBuffer is null)

  SampleFormat mBufferFormat = AUDIO_FORMAT_SILENCE;

  // principalHandle for the data in this chunk.

  // This can be compared to an nsIPrincipal* when back on main thread.

  PrincipalHandle mPrincipalHandle = PRINCIPAL_HANDLE_NONE;

};

/**

 * A list of audio samples consisting of a sequence of slices of SharedBuffers.

 * The audio rate is determined by the track, not stored in this class.

*/

class AudioSegment final : public MediaSegmentBase<AudioSegment, AudioChunk> {

  // The channel count that MaxChannelCount() returned last time it was called.

  uint32_t mMemoizedMaxChannelCount = 0;

 public:

  typedef mozilla::AudioSampleFormat SampleFormat;

  AudioSegment() : MediaSegmentBase<AudioSegment, AudioChunk>(AUDIO) {}

  AudioSegment(AudioSegment&& aSegment) = default;

  AudioSegment(const AudioSegment&) = delete;

  AudioSegment& operator=(const AudioSegment&) = delete;

  ~AudioSegment() = default;

  // Resample the whole segment in place.  `aResampler` is an instance of a

  // resampler, initialized with `aResamplerChannelCount` channels. If this

  // function finds a chunk with more channels, `aResampler` is destroyed and a

  // new resampler is created, and `aResamplerChannelCount` is updated with the

  // new channel count value.

  void ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler,

                      uint32_t* aResamplerChannelCount, uint32_t aInRate,

                      uint32_t aOutRate);

  template <typename T>

  void AppendFrames(already_AddRefed<ThreadSharedObject> aBuffer,

                    const nsTArray<const T*>& aChannelData, TrackTime aDuration,

                    const PrincipalHandle& aPrincipalHandle) {

    AppendAndConsumeChunk(AudioChunk(std::move(aBuffer), aChannelData,

                                     aDuration, aPrincipalHandle));

  void AppendSegment(const AudioSegment* aSegment) {

    MOZ_ASSERT(aSegment);

    for (const AudioChunk& c : aSegment->mChunks) {

      AudioChunk* chunk = AppendChunk(c.GetDuration());

      chunk->mBuffer = c.mBuffer;

      chunk->mChannelData = c.mChannelData;

      chunk->mBufferFormat = c.mBufferFormat;

      chunk->mPrincipalHandle = c.mPrincipalHandle;

  template <typename T>

  void AppendFromInterleavedBuffer(const T* aBuffer, size_t aFrames,

                                   uint32_t aChannels,

                                   const PrincipalHandle& aPrincipalHandle) {

    AppendAndConsumeChunk(AudioChunk::FromInterleavedBuffer<T>(

        aBuffer, aFrames, aChannels, aPrincipalHandle));

  // Write the segement data into an interleaved buffer. Do mixing if the

  // AudioChunk's channel count in the segment is different from aChannels.

  // Returns sample count of the converted audio data. The converted data will

  // be stored into aBuffer.

  size_t WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer,

                                  uint32_t aChannels) const;

  // Consumes aChunk, and append it to the segment if its duration is not zero.

  void AppendAndConsumeChunk(AudioChunk&& aChunk) {

    AudioChunk unused;

    AudioChunk* chunk = &unused;

    // Always consume aChunk. The chunk's mBuffer can be non-null even if its

    // duration is 0.

    auto consume = MakeScopeExit([&] {

      chunk->mBuffer = std::move(aChunk.mBuffer);

      chunk->mChannelData = std::move(aChunk.mChannelData);

      MOZ_ASSERT(chunk->mBuffer || chunk->mChannelData.IsEmpty(),

                 "Appending invalid data ?");

      chunk->mVolume = aChunk.mVolume;

      chunk->mBufferFormat = aChunk.mBufferFormat;

      chunk->mPrincipalHandle = std::move(aChunk.mPrincipalHandle);

});

    if (aChunk.GetDuration() == 0) {

      return;

    if (!mChunks.IsEmpty() &&

        mChunks.LastElement().CanCombineWithFollowing(aChunk)) {

      mChunks.LastElement().mDuration += aChunk.GetDuration();

      mDuration += aChunk.GetDuration();

      return;

    chunk = AppendChunk(aChunk.mDuration);

  void ApplyVolume(float aVolume);

  // Mix the segment into a mixer, keeping it planar, up or down mixing to

  // aChannelCount channels.

  void Mix(AudioMixer& aMixer, uint32_t aChannelCount, uint32_t aSampleRate);

  // Returns the maximum channel count across all chunks in this segment.

  // Should there be no chunk with a channel count we return the memoized return

  // value from last time this method was called.

  uint32_t MaxChannelCount() {

    uint32_t channelCount = 0;

    for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {

      if (ci->ChannelCount()) {

        channelCount = std::max(channelCount, ci->ChannelCount());

    if (channelCount == 0) {

      return mMemoizedMaxChannelCount;

    return mMemoizedMaxChannelCount = channelCount;

  static Type StaticType() { return AUDIO; }

  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {

    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);

  PrincipalHandle GetOldestPrinciple() const {

    const AudioChunk* chunk = mChunks.IsEmpty() ? nullptr : &mChunks[0];

    return chunk ? chunk->GetPrincipalHandle() : PRINCIPAL_HANDLE_NONE;

  // Iterate on each chunks until the input function returns true.

  template <typename Function>

  void IterateOnChunks(const Function&& aFunction) {

    for (uint32_t idx = 0; idx < mChunks.Length(); idx++) {

      if (aFunction(&mChunks[idx])) {

        return;

 private:

  template <typename T>

  void Resample(nsAutoRef<SpeexResamplerState>& aResampler,

                uint32_t* aResamplerChannelCount, uint32_t aInRate,

                uint32_t aOutRate);

};

template <typename SrcT>

void WriteChunk(const AudioChunk& aChunk, uint32_t aOutputChannels,

                float aVolume, AudioDataValue* aOutputBuffer) {

  CopyableAutoTArray<const SrcT*, GUESS_AUDIO_CHANNELS> channelData;

  channelData.AppendElements(aChunk.ChannelData<SrcT>());

  if (channelData.Length() < aOutputChannels) {

    // Up-mix. Note that this might actually make channelData have more

    // than aOutputChannels temporarily.

    AudioChannelsUpMix(&channelData, aOutputChannels,

                       SilentChannel::ZeroChannel<SrcT>());

  if (channelData.Length() > aOutputChannels) {

    // Down-mix.

    DownmixAndInterleave<SrcT>(channelData, aChunk.mDuration, aVolume,

                               aOutputChannels, aOutputBuffer);

  } else {

    InterleaveAndConvertBuffer(channelData.Elements(), aChunk.mDuration,

                               aVolume, aOutputChannels, aOutputBuffer);

}  // namespace mozilla

#endif /* MOZILLA_AUDIOSEGMENT_H_ */