ConvolverNode.cpp - mozsearch

mozilla-central/dom/media/webaudio/ConvolverNode.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim:set ts=2 sw=2 sts=2 et cindent: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "ConvolverNode.h"

#include "mozilla/dom/ConvolverNodeBinding.h"

#include "AlignmentUtils.h"

#include "AudioNodeEngine.h"

#include "AudioNodeTrack.h"

#include "blink/Reverb.h"

#include "PlayingRefChangeHandler.h"

#include "Tracing.h"

namespace mozilla::dom {

NS_IMPL_CYCLE_COLLECTION_INHERITED(ConvolverNode, AudioNode, mBuffer)

NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(ConvolverNode)

NS_INTERFACE_MAP_END_INHERITING(AudioNode)

NS_IMPL_ADDREF_INHERITED(ConvolverNode, AudioNode)

NS_IMPL_RELEASE_INHERITED(ConvolverNode, AudioNode)

class ConvolverNodeEngine final : public AudioNodeEngine {

  typedef PlayingRefChangeHandler PlayingRefChanged;

 public:

  ConvolverNodeEngine(AudioNode* aNode, bool aNormalize)

      : AudioNodeEngine(aNode) {}

  // Indicates how the right output channel is generated.

  enum class RightConvolverMode {

    // A right convolver is always used when there is more than one impulse

    // response channel.

    Always,

    // With a single response channel, the mode may be either Direct or

    // Difference.  The decision on which to use is made when stereo input is

    // received.  Once the right convolver is in use, convolver state is

    // suitable only for the selected mode, and so the mode cannot change

    // until the right convolver contains only silent history.

//

    // With Direct mode, each convolver processes a corresponding channel.

    // This mode is selected when input is initially stereo or

    // channelInterpretation is "discrete" at the time or starting the right

    // convolver when input changes from non-silent mono to stereo.

    Direct,

    // Difference mode is selected if channelInterpretation is "speakers" at

    // the time starting the right convolver when the input changes from mono

    // to stereo.

//

    // When non-silent input is initially mono, with a single response

    // channel, the right output channel is not produced until input becomes

    // stereo.  Only a single convolver is used for mono processing.  When

    // stereo input arrives after mono input, output must be as if the mono

    // signal remaining in the left convolver is up-mixed, but the right

    // convolver has not been initialized with the history of the mono input.

    // Copying the state of the left convolver into the right convolver is not

    // desirable, because there is considerable state to copy, and the

    // different convolvers are intended to process out of phase, which means

    // that state from one convolver would not directly map to state in

    // another convolver.

//

    // Instead the distributive property of convolution is used to generate

    // the right output channel using information in the left output channel.

    // Using l and r to denote the left and right channel input signals, g the

    // impulse response, and * convolution, the convolution of the right

    // channel can be given by

//

    //   r * g = (l + (r - l)) * g

    //         = l * g + (r - l) * g

//

    // The left convolver continues to process the left channel l to produce

    // l * g.  The right convolver processes the difference of input channel

    // signals r - l to produce (r - l) * g.  The outputs of the two

    // convolvers are added to generate the right channel output r * g.

//

    // The benefit of doing this is that the history of the r - l input for a

    // "speakers" up-mixed mono signal is zero, and so an empty convolver

    // already has exactly the right history for mixing the previous mono

    // signal with the new stereo signal.

    Difference

};

  void SetReverb(WebCore::Reverb* aReverb,

                 uint32_t aImpulseChannelCount) override {

    mRemainingLeftOutput = INT32_MIN;

    mRemainingRightOutput = 0;

    mRemainingRightHistory = 0;

    // Assume for now that convolution of channel difference is not required.

    // Direct may change to Difference during processing.

    if (aReverb) {

      mRightConvolverMode = aImpulseChannelCount == 1

                                ? RightConvolverMode::Direct

                                : RightConvolverMode::Always;

    } else {

      mRightConvolverMode = RightConvolverMode::Always;

    mReverb.reset(aReverb);

  void AllocateReverbInput(const AudioBlock& aInput,

                           uint32_t aTotalChannelCount) {

    uint32_t inputChannelCount = aInput.ChannelCount();

    MOZ_ASSERT(inputChannelCount <= aTotalChannelCount);

    mReverbInput.AllocateChannels(aTotalChannelCount);

    // Pre-multiply the input's volume

    for (uint32_t i = 0; i < inputChannelCount; ++i) {

      const float* src = static_cast<const float*>(aInput.mChannelData[i]);

      float* dest = mReverbInput.ChannelFloatsForWrite(i);

      AudioBlockCopyChannelWithScale(src, aInput.mVolume, dest);

    // Fill remaining channels with silence

    for (uint32_t i = inputChannelCount; i < aTotalChannelCount; ++i) {

      float* dest = mReverbInput.ChannelFloatsForWrite(i);

      std::fill_n(dest, WEBAUDIO_BLOCK_SIZE, 0.0f);

  void ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom,

                    const AudioBlock& aInput, AudioBlock* aOutput,

                    bool* aFinished) override;

  bool IsActive() const override { return mRemainingLeftOutput != INT32_MIN; }

  size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override {

    size_t amount = AudioNodeEngine::SizeOfExcludingThis(aMallocSizeOf);

    amount += mReverbInput.SizeOfExcludingThis(aMallocSizeOf, false);

    if (mReverb) {

      amount += mReverb->sizeOfIncludingThis(aMallocSizeOf);

    return amount;

  size_t SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const override {

    return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);

 private:

  // Keeping mReverbInput across process calls avoids unnecessary reallocation.

  AudioBlock mReverbInput;

  UniquePtr<WebCore::Reverb> mReverb;

  // Tracks samples of the tail remaining to be output.  INT32_MIN is a

  // special value to indicate that the end of any previous tail has been

  // handled.

  int32_t mRemainingLeftOutput = INT32_MIN;

  // mRemainingRightOutput and mRemainingRightHistory are only used when

  // mRightOutputMode != Always.  There is no special handling required at the

  // end of tail times and so INT32_MIN is not used.

  // mRemainingRightOutput tracks how much longer this node needs to continue

  // to produce a right output channel.

  int32_t mRemainingRightOutput = 0;

  // mRemainingRightHistory tracks how much silent input would be required to

  // drain the right convolver, which may sometimes be longer than the period

  // a right output channel is required.

  int32_t mRemainingRightHistory = 0;

  RightConvolverMode mRightConvolverMode = RightConvolverMode::Always;

};

static void AddScaledLeftToRight(AudioBlock* aBlock, float aScale) {

  const float* left = static_cast<const float*>(aBlock->mChannelData[0]);

  float* right = aBlock->ChannelFloatsForWrite(1);

  AudioBlockAddChannelWithScale(left, aScale, right);

void ConvolverNodeEngine::ProcessBlock(AudioNodeTrack* aTrack, GraphTime aFrom,

                                       const AudioBlock& aInput,

                                       AudioBlock* aOutput, bool* aFinished) {

  TRACE("ConvolverNodeEngine::ProcessBlock");

  if (!mReverb) {

    aOutput->SetNull(WEBAUDIO_BLOCK_SIZE);

    return;

  uint32_t inputChannelCount = aInput.ChannelCount();

  if (aInput.IsNull()) {

    if (mRemainingLeftOutput > 0) {

      mRemainingLeftOutput -= WEBAUDIO_BLOCK_SIZE;

      AllocateReverbInput(aInput, 1);  // floats for silence

    } else {

      if (mRemainingLeftOutput != INT32_MIN) {

        mRemainingLeftOutput = INT32_MIN;

        MOZ_ASSERT(mRemainingRightOutput <= 0);

        MOZ_ASSERT(mRemainingRightHistory <= 0);

        aTrack->ScheduleCheckForInactive();

        RefPtr<PlayingRefChanged> refchanged =

            new PlayingRefChanged(aTrack, PlayingRefChanged::RELEASE);

        aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget());

      aOutput->SetNull(WEBAUDIO_BLOCK_SIZE);

      return;

  } else {

    if (mRemainingLeftOutput <= 0) {

      RefPtr<PlayingRefChanged> refchanged =

          new PlayingRefChanged(aTrack, PlayingRefChanged::ADDREF);

      aTrack->Graph()->DispatchToMainThreadStableState(refchanged.forget());

    // Use mVolume as a flag to detect whether AllocateReverbInput() gets

    // called.

    mReverbInput.mVolume = 0.0f;

    // Special handling of input channel count changes is used when there is

    // only a single impulse response channel.  See RightConvolverMode.

    if (mRightConvolverMode != RightConvolverMode::Always) {

      ChannelInterpretation channelInterpretation =

          aTrack->GetChannelInterpretation();

      if (inputChannelCount == 2) {

        if (mRemainingRightHistory <= 0) {

          // Will start the second convolver.  Choose to convolve the right

          // channel directly if there is no left tail to up-mix or up-mixing

          // is "discrete".

          mRightConvolverMode =

              (mRemainingLeftOutput <= 0 ||

               channelInterpretation == ChannelInterpretation::Discrete)

                  ? RightConvolverMode::Direct

                  : RightConvolverMode::Difference;

        // The extra WEBAUDIO_BLOCK_SIZE is subtracted below.

        mRemainingRightOutput =

            mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;

        mRemainingRightHistory = mRemainingRightOutput;

        if (mRightConvolverMode == RightConvolverMode::Difference) {

          AllocateReverbInput(aInput, 2);

          // Subtract left from right.

          AddScaledLeftToRight(&mReverbInput, -1.0f);

      } else if (mRemainingRightHistory > 0) {

        // There is one channel of input, but a second convolver also

        // requires input.  Up-mix appropriately for the second convolver.

        if ((mRightConvolverMode == RightConvolverMode::Difference) ^

            (channelInterpretation == ChannelInterpretation::Discrete)) {

          MOZ_ASSERT(

              (mRightConvolverMode == RightConvolverMode::Difference &&

               channelInterpretation == ChannelInterpretation::Speakers) ||

              (mRightConvolverMode == RightConvolverMode::Direct &&

               channelInterpretation == ChannelInterpretation::Discrete));

          // The state is one of the following combinations:

          // 1) Difference and speakers.

          //    Up-mixing gives r = l.

          //    The input to the second convolver is r - l.

          // 2) Direct and discrete.

          //    Up-mixing gives r = 0.

          //    The input to the second convolver is r.

//

          // In each case the input for the second convolver is silence, which

          // will drain the convolver.

          AllocateReverbInput(aInput, 2);

        } else {

          if (channelInterpretation == ChannelInterpretation::Discrete) {

            MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Difference);

            // channelInterpretation has changed since the second convolver

            // was added.  "discrete" up-mixing of input would produce a

            // silent right channel r = 0, but the second convolver needs

            // r - l for RightConvolverMode::Difference.

            AllocateReverbInput(aInput, 2);

            AddScaledLeftToRight(&mReverbInput, -1.0f);

          } else {

            MOZ_ASSERT(channelInterpretation ==

                       ChannelInterpretation::Speakers);

            MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Direct);

            // The Reverb will essentially up-mix the single input channel by

            // feeding it into both convolvers.

          // The second convolver does not have silent input, and so it will

          // not drain.  It will need to continue processing up-mixed input

          // because the next input block may be stereo, which would be mixed

          // with the signal remaining in the convolvers.

          // The extra WEBAUDIO_BLOCK_SIZE is subtracted below.

          mRemainingRightHistory =

              mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;

    if (mReverbInput.mVolume == 0.0f) {  // not yet set

      if (aInput.mVolume != 1.0f) {

        AllocateReverbInput(aInput, inputChannelCount);  // pre-multiply

      } else {

        mReverbInput = aInput;

    mRemainingLeftOutput = mReverb->impulseResponseLength();

    MOZ_ASSERT(mRemainingLeftOutput > 0);

  // "The ConvolverNode produces a mono output only in the single case where

  // there is a single input channel and a single-channel buffer."

  uint32_t outputChannelCount = 2;

  uint32_t reverbOutputChannelCount = 2;

  if (mRightConvolverMode != RightConvolverMode::Always) {

    // When the input changes from stereo to mono, the output continues to be

    // stereo for the length of the tail time, during which the two channels

    // may differ.

    if (mRemainingRightOutput > 0) {

      MOZ_ASSERT(mRemainingRightHistory > 0);

      mRemainingRightOutput -= WEBAUDIO_BLOCK_SIZE;

    } else {

      outputChannelCount = 1;

    // The second convolver keeps processing until it drains.

    if (mRemainingRightHistory > 0) {

      mRemainingRightHistory -= WEBAUDIO_BLOCK_SIZE;

    } else {

      reverbOutputChannelCount = 1;

  // If there are two convolvers, then they each need an output buffer, even

  // if the second convolver is only processing to keep history of up-mixed

  // input.

  aOutput->AllocateChannels(reverbOutputChannelCount);

  mReverb->process(&mReverbInput, aOutput);

  if (mRightConvolverMode == RightConvolverMode::Difference &&

      outputChannelCount == 2) {

    // Add left to right.

    AddScaledLeftToRight(aOutput, 1.0f);

  } else {

    // Trim if outputChannelCount < reverbOutputChannelCount

    aOutput->mChannelData.TruncateLength(outputChannelCount);

ConvolverNode::ConvolverNode(AudioContext* aContext)

    : AudioNode(aContext, 2, ChannelCountMode::Clamped_max,

                ChannelInterpretation::Speakers),

      mNormalize(true) {

  ConvolverNodeEngine* engine = new ConvolverNodeEngine(this, mNormalize);

  mTrack = AudioNodeTrack::Create(

      aContext, engine, AudioNodeTrack::NO_TRACK_FLAGS, aContext->Graph());

/* static */

already_AddRefed<ConvolverNode> ConvolverNode::Create(

    JSContext* aCx, AudioContext& aAudioContext,

    const ConvolverOptions& aOptions, ErrorResult& aRv) {

  RefPtr<ConvolverNode> audioNode = new ConvolverNode(&aAudioContext);

  audioNode->Initialize(aOptions, aRv);

  if (NS_WARN_IF(aRv.Failed())) {

    return nullptr;

  // This must be done before setting the buffer.

  audioNode->SetNormalize(!aOptions.mDisableNormalization);

  if (aOptions.mBuffer.WasPassed()) {

    MOZ_ASSERT(aCx);

    audioNode->SetBuffer(aCx, aOptions.mBuffer.Value(), aRv);

    if (NS_WARN_IF(aRv.Failed())) {

      return nullptr;

  return audioNode.forget();

size_t ConvolverNode::SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const {

  size_t amount = AudioNode::SizeOfExcludingThis(aMallocSizeOf);

  if (mBuffer) {

    // NB: mBuffer might be shared with the associated engine, by convention

    //     the AudioNode will report.

    amount += mBuffer->SizeOfIncludingThis(aMallocSizeOf);

  return amount;

size_t ConvolverNode::SizeOfIncludingThis(MallocSizeOf aMallocSizeOf) const {

  return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);

JSObject* ConvolverNode::WrapObject(JSContext* aCx,

                                    JS::Handle<JSObject*> aGivenProto) {

  return ConvolverNode_Binding::Wrap(aCx, this, aGivenProto);

void ConvolverNode::SetBuffer(JSContext* aCx, AudioBuffer* aBuffer,

                              ErrorResult& aRv) {

  if (aBuffer) {

    switch (aBuffer->NumberOfChannels()) {

      case 1:

      case 2:

      case 4:

        // Supported number of channels

        break;

      default:

        aRv.ThrowNotSupportedError(

            nsPrintfCString("%u is not a supported number of channels",

                            aBuffer->NumberOfChannels()));

        return;

  if (aBuffer && (aBuffer->SampleRate() != Context()->SampleRate())) {

    aRv.ThrowNotSupportedError(nsPrintfCString(

        "Buffer sample rate (%g) does not match AudioContext sample rate (%g)",

        aBuffer->SampleRate(), Context()->SampleRate()));

    return;

  // Send the buffer to the track

  AudioNodeTrack* ns = mTrack;

  MOZ_ASSERT(ns, "Why don't we have a track here?");

  if (aBuffer) {

    AudioChunk data = aBuffer->GetThreadSharedChannelsForRate(aCx);

    if (data.mBufferFormat == AUDIO_FORMAT_S16) {

      // Reverb expects data in float format.

      // Convert on the main thread so as to minimize allocations on the audio

      // thread.

      // Reverb will dispose of the buffer once initialized, so convert here

      // and leave the smaller arrays in the AudioBuffer.

      // There is currently no value in providing 16/32-byte aligned data

      // because PadAndMakeScaledDFT() will copy the data (without SIMD

      // instructions) to aligned arrays for the FFT.

      CheckedInt<size_t> bufferSize(sizeof(float));

      bufferSize *= data.mDuration;

      bufferSize *= data.ChannelCount();

      RefPtr<SharedBuffer> floatBuffer =

          SharedBuffer::Create(bufferSize, fallible);

      if (!floatBuffer) {

        aRv.Throw(NS_ERROR_OUT_OF_MEMORY);

        return;

      auto floatData = static_cast<float*>(floatBuffer->Data());

      for (size_t i = 0; i < data.ChannelCount(); ++i) {

        ConvertAudioSamples(data.ChannelData<int16_t>()[i], floatData,

                            data.mDuration);

        data.mChannelData[i] = floatData;

        floatData += data.mDuration;

      data.mBuffer = std::move(floatBuffer);

      data.mBufferFormat = AUDIO_FORMAT_FLOAT32;

    } else if (data.mBufferFormat == AUDIO_FORMAT_SILENCE) {

      // This is valid, but a signal convolved by a silent signal is silent, set

      // the reverb to nullptr and return.

      ns->SetReverb(nullptr, 0);

      mBuffer = aBuffer;

      return;

    // Note about empirical tuning (this is copied from Blink)

    // The maximum FFT size affects reverb performance and accuracy.

    // If the reverb is single-threaded and processes entirely in the real-time

    // audio thread, it's important not to make this too high.  In this case

    // 8192 is a good value. But, the Reverb object is multi-threaded, so we

    // want this as high as possible without losing too much accuracy. Very

    // large FFTs will have worse phase errors. Given these constraints 32768 is

    // a good compromise.

    const size_t MaxFFTSize = 32768;

    bool allocationFailure = false;

    UniquePtr<WebCore::Reverb> reverb(new WebCore::Reverb(

        data, MaxFFTSize, !Context()->IsOffline(), mNormalize,

        aBuffer->SampleRate(), &allocationFailure));

    if (!allocationFailure) {

      ns->SetReverb(reverb.release(), data.ChannelCount());

    } else {

      aRv.Throw(NS_ERROR_OUT_OF_MEMORY);

      return;

  } else {

    ns->SetReverb(nullptr, 0);

  mBuffer = aBuffer;

void ConvolverNode::SetNormalize(bool aNormalize) { mNormalize = aNormalize; }

}  // namespace mozilla::dom