Source code
Revision control
Copy as Markdown
Other Tools
/*
* Copyright 2016 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#import "voice_processing_audio_unit.h"
#include "rtc_base/checks.h"
#include "system_wrappers/include/metrics.h"
#import "base/RTCLogging.h"
#import "sdk/objc/components/audio/RTCAudioSessionConfiguration.h"
#if !defined(NDEBUG)
static void LogStreamDescription(AudioStreamBasicDescription description) {
char formatIdString[5];
UInt32 formatId = CFSwapInt32HostToBig(description.mFormatID);
bcopy(&formatId, formatIdString, 4);
formatIdString[4] = '\0';
RTCLog(@"AudioStreamBasicDescription: {\n"
" mSampleRate: %.2f\n"
" formatIDString: %s\n"
" mFormatFlags: 0x%X\n"
" mBytesPerPacket: %u\n"
" mFramesPerPacket: %u\n"
" mBytesPerFrame: %u\n"
" mChannelsPerFrame: %u\n"
" mBitsPerChannel: %u\n"
" mReserved: %u\n}",
description.mSampleRate, formatIdString,
static_cast<unsigned int>(description.mFormatFlags),
static_cast<unsigned int>(description.mBytesPerPacket),
static_cast<unsigned int>(description.mFramesPerPacket),
static_cast<unsigned int>(description.mBytesPerFrame),
static_cast<unsigned int>(description.mChannelsPerFrame),
static_cast<unsigned int>(description.mBitsPerChannel),
static_cast<unsigned int>(description.mReserved));
}
#endif
namespace webrtc {
namespace ios_adm {
// Calls to AudioUnitInitialize() can fail if called back-to-back on different
// ADM instances. A fall-back solution is to allow multiple sequential calls
// with as small delay between each. This factor sets the max number of allowed
// initialization attempts.
static const int kMaxNumberOfAudioUnitInitializeAttempts = 5;
// A VP I/O unit's bus 1 connects to input hardware (microphone).
static const AudioUnitElement kInputBus = 1;
// A VP I/O unit's bus 0 connects to output hardware (speaker).
static const AudioUnitElement kOutputBus = 0;
// Returns the automatic gain control (AGC) state on the processed microphone
// signal. Should be on by default for Voice Processing audio units.
static OSStatus GetAGCState(AudioUnit audio_unit, UInt32* enabled) {
RTC_DCHECK(audio_unit);
UInt32 size = sizeof(*enabled);
OSStatus result = AudioUnitGetProperty(audio_unit,
kAUVoiceIOProperty_VoiceProcessingEnableAGC,
kAudioUnitScope_Global,
kInputBus,
enabled,
&size);
RTCLog(@"VPIO unit AGC: %u", static_cast<unsigned int>(*enabled));
return result;
}
VoiceProcessingAudioUnit::VoiceProcessingAudioUnit(bool bypass_voice_processing,
bool detect_mute_speech,
VoiceProcessingAudioUnitObserver* observer)
: bypass_voice_processing_(bypass_voice_processing),
detect_mute_speech_(detect_mute_speech),
observer_(observer),
vpio_unit_(nullptr),
state_(kInitRequired) {
RTC_DCHECK(observer);
}
VoiceProcessingAudioUnit::~VoiceProcessingAudioUnit() {
DisposeAudioUnit();
}
const UInt32 VoiceProcessingAudioUnit::kBytesPerSample = 2;
bool VoiceProcessingAudioUnit::Init() {
RTC_DCHECK_EQ(state_, kInitRequired);
// Create an audio component description to identify the Voice Processing
// I/O audio unit.
AudioComponentDescription vpio_unit_description;
vpio_unit_description.componentType = kAudioUnitType_Output;
vpio_unit_description.componentSubType = kAudioUnitSubType_VoiceProcessingIO;
vpio_unit_description.componentManufacturer = kAudioUnitManufacturer_Apple;
vpio_unit_description.componentFlags = 0;
vpio_unit_description.componentFlagsMask = 0;
// Obtain an audio unit instance given the description.
AudioComponent found_vpio_unit_ref =
AudioComponentFindNext(nullptr, &vpio_unit_description);
// Create a Voice Processing IO audio unit.
OSStatus result = noErr;
result = AudioComponentInstanceNew(found_vpio_unit_ref, &vpio_unit_);
if (result != noErr) {
vpio_unit_ = nullptr;
RTCLogError(@"AudioComponentInstanceNew failed. Error=%ld.", (long)result);
return false;
}
// Enable input on the input scope of the input element.
UInt32 enable_input = 1;
result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input, kInputBus, &enable_input,
sizeof(enable_input));
if (result != noErr) {
DisposeAudioUnit();
RTCLogError(@"Failed to enable input on input scope of input element. "
"Error=%ld.",
(long)result);
return false;
}
// Enable output on the output scope of the output element.
UInt32 enable_output = 1;
result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Output, kOutputBus,
&enable_output, sizeof(enable_output));
if (result != noErr) {
DisposeAudioUnit();
RTCLogError(@"Failed to enable output on output scope of output element. "
"Error=%ld.",
(long)result);
return false;
}
// Specify the callback function that provides audio samples to the audio
// unit.
AURenderCallbackStruct render_callback;
render_callback.inputProc = OnGetPlayoutData;
render_callback.inputProcRefCon = this;
result = AudioUnitSetProperty(
vpio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input,
kOutputBus, &render_callback, sizeof(render_callback));
if (result != noErr) {
DisposeAudioUnit();
RTCLogError(@"Failed to specify the render callback on the output bus. "
"Error=%ld.",
(long)result);
return false;
}
// Disable AU buffer allocation for the recorder, we allocate our own.
// TODO(henrika): not sure that it actually saves resource to make this call.
UInt32 flag = 0;
result = AudioUnitSetProperty(
vpio_unit_, kAudioUnitProperty_ShouldAllocateBuffer,
kAudioUnitScope_Output, kInputBus, &flag, sizeof(flag));
if (result != noErr) {
DisposeAudioUnit();
RTCLogError(@"Failed to disable buffer allocation on the input bus. "
"Error=%ld.",
(long)result);
return false;
}
// Specify the callback to be called by the I/O thread to us when input audio
// is available. The recorded samples can then be obtained by calling the
// AudioUnitRender() method.
AURenderCallbackStruct input_callback;
input_callback.inputProc = OnDeliverRecordedData;
input_callback.inputProcRefCon = this;
result = AudioUnitSetProperty(vpio_unit_,
kAudioOutputUnitProperty_SetInputCallback,
kAudioUnitScope_Global, kInputBus,
&input_callback, sizeof(input_callback));
if (result != noErr) {
DisposeAudioUnit();
RTCLogError(@"Failed to specify the input callback on the input bus. "
"Error=%ld.",
(long)result);
return false;
}
state_ = kUninitialized;
return true;
}
VoiceProcessingAudioUnit::State VoiceProcessingAudioUnit::GetState() const {
return state_;
}
bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) {
RTC_DCHECK_GE(state_, kUninitialized);
RTCLog(@"Initializing audio unit with sample rate: %f", sample_rate);
OSStatus result = noErr;
AudioStreamBasicDescription format = GetFormat(sample_rate);
UInt32 size = sizeof(format);
#if !defined(NDEBUG)
LogStreamDescription(format);
#endif
// Set the format on the output scope of the input element/bus.
result =
AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output, kInputBus, &format, size);
if (result != noErr) {
RTCLogError(@"Failed to set format on output scope of input bus. "
"Error=%ld.",
(long)result);
return false;
}
// Set the format on the input scope of the output element/bus.
result =
AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Input, kOutputBus, &format, size);
if (result != noErr) {
RTCLogError(@"Failed to set format on input scope of output bus. "
"Error=%ld.",
(long)result);
return false;
}
// Initialize the Voice Processing I/O unit instance.
// Calls to AudioUnitInitialize() can fail if called back-to-back on
// different ADM instances. The error message in this case is -66635 which is
// undocumented. Tests have shown that calling AudioUnitInitialize a second
// time, after a short sleep, avoids this issue.
// See webrtc:5166 for details.
int failed_initalize_attempts = 0;
result = AudioUnitInitialize(vpio_unit_);
while (result != noErr) {
RTCLogError(@"Failed to initialize the Voice Processing I/O unit. "
"Error=%ld.",
(long)result);
++failed_initalize_attempts;
if (failed_initalize_attempts == kMaxNumberOfAudioUnitInitializeAttempts) {
// Max number of initialization attempts exceeded, hence abort.
RTCLogError(@"Too many initialization attempts.");
return false;
}
RTCLog(@"Pause 100ms and try audio unit initialization again...");
[NSThread sleepForTimeInterval:0.1f];
result = AudioUnitInitialize(vpio_unit_);
}
if (result == noErr) {
RTCLog(@"Voice Processing I/O unit is now initialized.");
}
if (detect_mute_speech_) {
if (@available(iOS 15, *)) {
// Set listener for muted speech event.
AUVoiceIOMutedSpeechActivityEventListener listener = ^(AUVoiceIOSpeechActivityEvent event) {
observer_->OnReceivedMutedSpeechActivity(event);
};
result = AudioUnitSetProperty(vpio_unit_,
kAUVoiceIOProperty_MutedSpeechActivityEventListener,
kAudioUnitScope_Global,
0,
&listener,
sizeof(AUVoiceIOMutedSpeechActivityEventListener));
if (result != noErr) {
RTCLog(@"Failed to set muted speech activity event listener. Error=%ld.", (long)result);
}
}
}
if (bypass_voice_processing_) {
// Attempt to disable builtin voice processing.
UInt32 toggle = 1;
result = AudioUnitSetProperty(vpio_unit_,
kAUVoiceIOProperty_BypassVoiceProcessing,
kAudioUnitScope_Global,
kInputBus,
&toggle,
sizeof(toggle));
if (result == noErr) {
RTCLog(@"Successfully bypassed voice processing.");
} else {
RTCLogError(@"Failed to bypass voice processing. Error=%ld.", (long)result);
}
state_ = kInitialized;
return true;
}
// AGC should be enabled by default for Voice Processing I/O units but it is
// checked below and enabled explicitly if needed. This scheme is used
// to be absolutely sure that the AGC is enabled since we have seen cases
// where only zeros are recorded and a disabled AGC could be one of the
// reasons why it happens.
int agc_was_enabled_by_default = 0;
UInt32 agc_is_enabled = 0;
result = GetAGCState(vpio_unit_, &agc_is_enabled);
if (result != noErr) {
RTCLogError(@"Failed to get AGC state (1st attempt). "
"Error=%ld.",
(long)result);
// Example of error code: kAudioUnitErr_NoConnection (-10876).
// All error codes related to audio units are negative and are therefore
// converted into a postive value to match the UMA APIs.
RTC_HISTOGRAM_COUNTS_SPARSE_100000(
"WebRTC.Audio.GetAGCStateErrorCode1", (-1) * result);
} else if (agc_is_enabled) {
// Remember that the AGC was enabled by default. Will be used in UMA.
agc_was_enabled_by_default = 1;
} else {
// AGC was initially disabled => try to enable it explicitly.
UInt32 enable_agc = 1;
result =
AudioUnitSetProperty(vpio_unit_,
kAUVoiceIOProperty_VoiceProcessingEnableAGC,
kAudioUnitScope_Global, kInputBus, &enable_agc,
sizeof(enable_agc));
if (result != noErr) {
RTCLogError(@"Failed to enable the built-in AGC. "
"Error=%ld.",
(long)result);
RTC_HISTOGRAM_COUNTS_SPARSE_100000(
"WebRTC.Audio.SetAGCStateErrorCode", (-1) * result);
}
result = GetAGCState(vpio_unit_, &agc_is_enabled);
if (result != noErr) {
RTCLogError(@"Failed to get AGC state (2nd attempt). "
"Error=%ld.",
(long)result);
RTC_HISTOGRAM_COUNTS_SPARSE_100000(
"WebRTC.Audio.GetAGCStateErrorCode2", (-1) * result);
}
}
// Track if the built-in AGC was enabled by default (as it should) or not.
RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCWasEnabledByDefault",
agc_was_enabled_by_default);
RTCLog(@"WebRTC.Audio.BuiltInAGCWasEnabledByDefault: %d",
agc_was_enabled_by_default);
// As a final step, add an UMA histogram for tracking the AGC state.
// At this stage, the AGC should be enabled, and if it is not, more work is
// needed to find out the root cause.
RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.BuiltInAGCIsEnabled", agc_is_enabled);
RTCLog(@"WebRTC.Audio.BuiltInAGCIsEnabled: %u",
static_cast<unsigned int>(agc_is_enabled));
state_ = kInitialized;
return true;
}
OSStatus VoiceProcessingAudioUnit::Start() {
RTC_DCHECK_GE(state_, kUninitialized);
RTCLog(@"Starting audio unit.");
OSStatus result = AudioOutputUnitStart(vpio_unit_);
if (result != noErr) {
RTCLogError(@"Failed to start audio unit. Error=%ld", (long)result);
return result;
} else {
RTCLog(@"Started audio unit");
}
state_ = kStarted;
return noErr;
}
bool VoiceProcessingAudioUnit::Stop() {
RTC_DCHECK_GE(state_, kUninitialized);
RTCLog(@"Stopping audio unit.");
OSStatus result = AudioOutputUnitStop(vpio_unit_);
if (result != noErr) {
RTCLogError(@"Failed to stop audio unit. Error=%ld", (long)result);
return false;
} else {
RTCLog(@"Stopped audio unit");
}
state_ = kInitialized;
return true;
}
bool VoiceProcessingAudioUnit::Uninitialize() {
RTC_DCHECK_GE(state_, kUninitialized);
RTCLog(@"Unintializing audio unit.");
OSStatus result = AudioUnitUninitialize(vpio_unit_);
if (result != noErr) {
RTCLogError(@"Failed to uninitialize audio unit. Error=%ld", (long)result);
return false;
} else {
RTCLog(@"Uninitialized audio unit.");
}
state_ = kUninitialized;
return true;
}
bool VoiceProcessingAudioUnit::SetMicrophoneMute(bool enable) {
RTC_DCHECK_GE(state_, kUninitialized);
RTCLog(@"Setting microphone %s.", enable ? "mute" : "unmute");
OSStatus result = noErr;
if (detect_mute_speech_) {
UInt32 muteUplinkOutput = enable ? 1 : 0;
result = AudioUnitSetProperty(vpio_unit_,
kAUVoiceIOProperty_MuteOutput,
kAudioUnitScope_Global,
kInputBus,
&muteUplinkOutput,
sizeof(muteUplinkOutput));
} else {
UInt32 enableInput = enable ? 0 : 1;
result = AudioUnitSetProperty(vpio_unit_,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
kInputBus,
&enableInput,
sizeof(enableInput));
}
if (result != noErr) {
RTCLogError(@"Failed to %s microphone. Error=%ld", (enable ? "mute" : "unmute"), (long)result);
return false;
}
RTCLog(@"Set microphone %s.", enable ? "mute" : "unmute");
return true;
}
OSStatus VoiceProcessingAudioUnit::Render(AudioUnitRenderActionFlags* flags,
const AudioTimeStamp* time_stamp,
UInt32 output_bus_number,
UInt32 num_frames,
AudioBufferList* io_data) {
RTC_DCHECK(vpio_unit_) << "Init() not called.";
OSStatus result = AudioUnitRender(vpio_unit_, flags, time_stamp,
output_bus_number, num_frames, io_data);
if (result != noErr) {
RTCLogError(@"Failed to render audio unit. Error=%ld", (long)result);
}
return result;
}
OSStatus VoiceProcessingAudioUnit::OnGetPlayoutData(
void* in_ref_con,
AudioUnitRenderActionFlags* flags,
const AudioTimeStamp* time_stamp,
UInt32 bus_number,
UInt32 num_frames,
AudioBufferList* io_data) {
VoiceProcessingAudioUnit* audio_unit =
static_cast<VoiceProcessingAudioUnit*>(in_ref_con);
return audio_unit->NotifyGetPlayoutData(flags, time_stamp, bus_number,
num_frames, io_data);
}
OSStatus VoiceProcessingAudioUnit::OnDeliverRecordedData(
void* in_ref_con,
AudioUnitRenderActionFlags* flags,
const AudioTimeStamp* time_stamp,
UInt32 bus_number,
UInt32 num_frames,
AudioBufferList* io_data) {
VoiceProcessingAudioUnit* audio_unit =
static_cast<VoiceProcessingAudioUnit*>(in_ref_con);
return audio_unit->NotifyDeliverRecordedData(flags, time_stamp, bus_number,
num_frames, io_data);
}
OSStatus VoiceProcessingAudioUnit::NotifyGetPlayoutData(
AudioUnitRenderActionFlags* flags,
const AudioTimeStamp* time_stamp,
UInt32 bus_number,
UInt32 num_frames,
AudioBufferList* io_data) {
return observer_->OnGetPlayoutData(flags, time_stamp, bus_number, num_frames,
io_data);
}
OSStatus VoiceProcessingAudioUnit::NotifyDeliverRecordedData(
AudioUnitRenderActionFlags* flags,
const AudioTimeStamp* time_stamp,
UInt32 bus_number,
UInt32 num_frames,
AudioBufferList* io_data) {
return observer_->OnDeliverRecordedData(flags, time_stamp, bus_number,
num_frames, io_data);
}
AudioStreamBasicDescription VoiceProcessingAudioUnit::GetFormat(
Float64 sample_rate) const {
// Set the application formats for input and output:
// - use same format in both directions
// - avoid resampling in the I/O unit by using the hardware sample rate
// - linear PCM => noncompressed audio data format with one frame per packet
// - no need to specify interleaving since only mono is supported
AudioStreamBasicDescription format;
RTC_DCHECK_EQ(1, kRTCAudioSessionPreferredNumberOfChannels);
format.mSampleRate = sample_rate;
format.mFormatID = kAudioFormatLinearPCM;
format.mFormatFlags =
kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
format.mBytesPerPacket = kBytesPerSample;
format.mFramesPerPacket = 1; // uncompressed.
format.mBytesPerFrame = kBytesPerSample;
format.mChannelsPerFrame = kRTCAudioSessionPreferredNumberOfChannels;
format.mBitsPerChannel = 8 * kBytesPerSample;
return format;
}
void VoiceProcessingAudioUnit::DisposeAudioUnit() {
if (vpio_unit_) {
switch (state_) {
case kStarted:
Stop();
[[fallthrough]];
case kInitialized:
Uninitialize();
break;
case kUninitialized:
case kInitRequired:
break;
}
RTCLog(@"Disposing audio unit.");
OSStatus result = AudioComponentInstanceDispose(vpio_unit_);
if (result != noErr) {
RTCLogError(@"AudioComponentInstanceDispose failed. Error=%ld.",
(long)result);
}
vpio_unit_ = nullptr;
}
}
} // namespace ios_adm
} // namespace webrtc