WMFAudioMFTManager.cpp

mozilla-central/dom/media/platforms/wmf/WMFAudioMFTManager.cpp (file symbol)

Enable keyboard shortcuts

Source code

Revision control

Copy as Markdown

Other Tools

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */

/* vim:set ts=2 sw=2 sts=2 et cindent: */

/* This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "WMFAudioMFTManager.h"

#include "MediaInfo.h"

#include "TimeUnits.h"

#include "VideoUtils.h"

#include "WMFUtils.h"

#include "mozilla/AbstractThread.h"

#include "mozilla/Logging.h"

#include "mozilla/Telemetry.h"

#include "nsTArray.h"

#include "BufferReader.h"

#include "mozilla/ScopeExit.h"

#define LOG(...) MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__))

namespace mozilla {

using media::TimeUnit;

WMFAudioMFTManager::WMFAudioMFTManager(const AudioInfo& aConfig)

    : mAudioChannels(aConfig.mChannels),

      mChannelsMap(AudioConfig::ChannelLayout::UNKNOWN_MAP),

      mAudioRate(aConfig.mRate),

      mStreamType(GetStreamTypeFromMimeType(aConfig.mMimeType)) {

  MOZ_COUNT_CTOR(WMFAudioMFTManager);

  if (mStreamType == WMFStreamType::AAC) {

    const uint8_t* audioSpecConfig;

    uint32_t configLength;

    if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) {

      const AacCodecSpecificData& aacCodecSpecificData =

          aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>();

      audioSpecConfig =

          aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Elements();

      configLength =

          aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Length();

      mRemainingEncoderDelay = mEncoderDelay =

          aacCodecSpecificData.mEncoderDelayFrames;

      mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount;

      LOG("AudioMFT decoder: Found AAC decoder delay (%" PRIu32

          "frames) and total media frames (%" PRIu64 " frames)\n",

          mEncoderDelay, mTotalMediaFrames);

    } else {

      // Gracefully handle failure to cover all codec specific cases above. Once

      // we're confident there is no fall through from these cases above, we

      // should remove this code.

      RefPtr<MediaByteBuffer> audioCodecSpecificBinaryBlob =

          GetAudioCodecSpecificBlob(aConfig.mCodecSpecificConfig);

      audioSpecConfig = audioCodecSpecificBinaryBlob->Elements();

      configLength = audioCodecSpecificBinaryBlob->Length();

    // If no extradata has been provided, assume this is ADTS. Otherwise,

    // assume raw AAC packets.

    mIsADTS = !configLength;

    AACAudioSpecificConfigToUserData(aConfig.mExtendedProfile, audioSpecConfig,

                                     configLength, mUserData);

WMFAudioMFTManager::~WMFAudioMFTManager() {

  MOZ_COUNT_DTOR(WMFAudioMFTManager);

const GUID& WMFAudioMFTManager::GetMediaSubtypeGUID() {

  MOZ_ASSERT(StreamTypeIsAudio(mStreamType));

  switch (mStreamType) {

    case WMFStreamType::AAC:

      return MFAudioFormat_AAC;

    case WMFStreamType::MP3:

      return MFAudioFormat_MP3;

    default:

      return GUID_NULL;

};

bool WMFAudioMFTManager::Init() {

  NS_ENSURE_TRUE(StreamTypeIsAudio(mStreamType), false);

  RefPtr<MFTDecoder> decoder(new MFTDecoder());

  // Note: MP3 MFT isn't registered as supporting Float output, but it works.

  // Find PCM output MFTs as this is the common type.

  HRESULT hr = WMFDecoderModule::CreateMFTDecoder(mStreamType, decoder);

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  // Setup input/output media types

  RefPtr<IMFMediaType> inputType;

  hr = wmf::MFCreateMediaType(getter_AddRefs(inputType));

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = inputType->SetGUID(MF_MT_SUBTYPE, GetMediaSubtypeGUID());

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = inputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, mAudioRate);

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = inputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, mAudioChannels);

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  if (mStreamType == WMFStreamType::AAC) {

    UINT32 payloadType = mIsADTS ? 1 : 0;

    hr = inputType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, payloadType);

    NS_ENSURE_TRUE(SUCCEEDED(hr), false);

    hr = inputType->SetBlob(MF_MT_USER_DATA, mUserData.Elements(),

                            mUserData.Length());

    NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  RefPtr<IMFMediaType> outputType;

  hr = wmf::MFCreateMediaType(getter_AddRefs(outputType));

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float);

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = outputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, 32);

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = decoder->SetMediaTypes(inputType, outputType);

  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  mDecoder = decoder;

  return true;

HRESULT

WMFAudioMFTManager::Input(MediaRawData* aSample) {

  mLastInputTime = aSample->mTime;

  return mDecoder->Input(aSample->Data(), uint32_t(aSample->Size()),

                         aSample->mTime.ToMicroseconds(),

                         aSample->mDuration.ToMicroseconds());

nsCString WMFAudioMFTManager::GetCodecName() const {

  if (mStreamType == WMFStreamType::AAC) {

    return "aac"_ns;

  if (mStreamType == WMFStreamType::MP3) {

    return "mp3"_ns;

  return "unknown"_ns;

HRESULT

WMFAudioMFTManager::UpdateOutputType() {

  HRESULT hr;

  RefPtr<IMFMediaType> type;

  hr = mDecoder->GetOutputMediaType(type);

  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  hr = type->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, &mAudioRate);

  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  hr = type->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &mAudioChannels);

  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  uint32_t channelsMap;

  hr = type->GetUINT32(MF_MT_AUDIO_CHANNEL_MASK, &channelsMap);

  if (SUCCEEDED(hr)) {

    mChannelsMap = channelsMap;

  } else {

    LOG("Unable to retrieve channel layout. Ignoring");

    mChannelsMap = AudioConfig::ChannelLayout::UNKNOWN_MAP;

  return S_OK;

HRESULT

WMFAudioMFTManager::Output(int64_t aStreamOffset, RefPtr<MediaData>& aOutput) {

  aOutput = nullptr;

  RefPtr<IMFSample> sample;

  HRESULT hr;

  int typeChangeCount = 0;

  const auto oldAudioRate = mAudioRate;

  while (true) {

    hr = mDecoder->Output(&sample);

    if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {

      return hr;

    if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {

      hr = mDecoder->FindDecoderOutputType();

      NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

      hr = UpdateOutputType();

      NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

      // Catch infinite loops, but some decoders perform at least 2 stream

      // changes on consecutive calls, so be permissive.

      // 100 is arbitrarily > 2.

      NS_ENSURE_TRUE(typeChangeCount < 100, MF_E_TRANSFORM_STREAM_CHANGE);

      ++typeChangeCount;

      continue;

    break;

  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  if (!sample) {

    LOG("Audio MFTDecoder returned success but null output.");

    return E_FAIL;

  UINT32 discontinuity = false;

  sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity);

  if (mFirstFrame || discontinuity) {

    // Update the output type, in case this segment has a different

    // rate. This also triggers on the first sample, which can have a

    // different rate than is advertised in the container, and sometimes we

    // don't get a MF_E_TRANSFORM_STREAM_CHANGE when the rate changes.

    hr = UpdateOutputType();

    NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

    mFirstFrame = false;

  LONGLONG hns;

  hr = sample->GetSampleTime(&hns);

  if (FAILED(hr)) {

    return E_FAIL;

  TimeUnit pts = TimeUnit::FromHns(hns, mAudioRate);

  NS_ENSURE_TRUE(pts.IsValid(), E_FAIL);

  RefPtr<IMFMediaBuffer> buffer;

  hr = sample->ConvertToContiguousBuffer(getter_AddRefs(buffer));

  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  BYTE* data = nullptr;  // Note: *data will be owned by the IMFMediaBuffer, we

                         // don't need to free it.

  DWORD maxLength = 0, currentLength = 0;

  hr = buffer->Lock(&data, &maxLength, &currentLength);

  ScopeExit exit([buffer] { buffer->Unlock(); });

  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  // Output is made of floats.

  uint32_t numSamples = currentLength / sizeof(float);

  uint32_t numFrames = numSamples / mAudioChannels;

  MOZ_ASSERT(numFrames >= 0);

  MOZ_ASSERT(numSamples >= 0);

  if (numFrames == 0) {

    // All data from this chunk stripped, loop back and try to output the next

    // frame, if possible.

    return S_OK;

  if (oldAudioRate != mAudioRate) {

    LOG("Audio rate changed from %" PRIu32 " to %" PRIu32, oldAudioRate,

        mAudioRate);

  AlignedAudioBuffer audioData(numSamples);

  if (!audioData) {

    return E_OUTOFMEMORY;

  float* floatData = reinterpret_cast<float*>(data);

  PodCopy(audioData.Data(), floatData, numSamples);

  TimeUnit duration(numFrames, mAudioRate);

  NS_ENSURE_TRUE(duration.IsValid(), E_FAIL);

  const bool isAudioRateChangedToHigher = oldAudioRate < mAudioRate;

  if (IsPartialOutput(duration, isAudioRateChangedToHigher)) {

    LOG("Encounter a partial frame?! duration shrinks from %s to %s",

        mLastOutputDuration.ToString().get(), duration.ToString().get());

    return MF_E_TRANSFORM_NEED_MORE_INPUT;

  aOutput = new AudioData(aStreamOffset, pts, std::move(audioData),

                          mAudioChannels, mAudioRate, mChannelsMap);

  MOZ_DIAGNOSTIC_ASSERT(duration == aOutput->mDuration, "must be equal");

  mLastOutputDuration = aOutput->mDuration;

#ifdef LOG_SAMPLE_DECODE

  LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u",

      pts.ToMicroseconds(), duration.ToMicroseconds(), currentLength);

#endif

  return S_OK;

bool WMFAudioMFTManager::IsPartialOutput(

    const media::TimeUnit& aNewOutputDuration,

    const bool aIsRateChangedToHigher) const {

  // This issue was found in Windows11, where AAC MFT decoder would incorrectly

  // output partial output samples to us, even if MS's documentation said it

  // won't happen [1]. More details are described in bug 1731430 comment 26.

  // If the audio rate isn't changed to higher, which would result in shorter

  // duration, but the new output duration is still shorter than the last one,

  // then new output is possible an incorrect partial output.

  // [1]

  // https://docs.microsoft.com/en-us/windows/win32/medfound/mft-message-command-drain

  if (mStreamType != WMFStreamType::AAC) {

    return false;

  if (mLastOutputDuration > aNewOutputDuration && !aIsRateChangedToHigher) {

    return true;

  return false;

void WMFAudioMFTManager::Shutdown() { mDecoder = nullptr; }

}  // namespace mozilla

#undef LOG