transient_suppressor.h

Enable keyboard shortcuts

/*

 *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.

 *  Use of this source code is governed by a BSD-style license

 *  that can be found in the LICENSE file in the root of the source

 *  tree. An additional intellectual property rights grant can be found

 *  in the file PATENTS.  All contributing project authors may

 *  be found in the AUTHORS file in the root of the source tree.

*/

#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_

#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_

#include <cstddef>

namespace webrtc {

// Detects transients in an audio stream and suppress them using a simple

// restoration algorithm that attenuates unexpected spikes in the spectrum.

class TransientSuppressor {

 public:

  // Type of VAD used by the caller to compute the `voice_probability` argument

  // `Suppress()`.

  enum class VadMode {

    // By default, `TransientSuppressor` assumes that `voice_probability` is

    // computed by `AgcManagerDirect`.

    kDefault = 0,

    // Use this mode when `TransientSuppressor` must assume that

    // `voice_probability` is computed by the RNN VAD.

    kRnnVad,

    // Use this mode to let `TransientSuppressor::Suppressor()` ignore

    // `voice_probability` and behave as if voice information is unavailable

    // (regardless of the passed value).

    kNoVad,

};

  virtual ~TransientSuppressor() {}

  virtual void Initialize(int sample_rate_hz,

                          int detector_rate_hz,

                          int num_channels) = 0;

  // Processes a `data` chunk, and returns it with keystrokes suppressed from

  // it. The float format is assumed to be int16 ranged. If there are more than

  // one channel, the chunks are concatenated one after the other in `data`.

  // `data_length` must be equal to `data_length_`.

  // `num_channels` must be equal to `num_channels_`.

  // A sub-band, ideally the higher, can be used as `detection_data`. If it is

  // NULL, `data` is used for the detection too. The `detection_data` is always

  // assumed mono.

  // If a reference signal (e.g. keyboard microphone) is available, it can be

  // passed in as `reference_data`. It is assumed mono and must have the same

  // length as `data`. NULL is accepted if unavailable.

  // This suppressor performs better if voice information is available.

  // `voice_probability` is the probability of voice being present in this chunk

  // of audio. If voice information is not available, `voice_probability` must

  // always be set to 1.

  // `key_pressed` determines if a key was pressed on this audio chunk.

  // Returns a delayed version of `voice_probability` according to the

  // algorithmic delay introduced by this method. In this way, the modified

  // `data` and the returned voice probability will be temporally aligned.

  virtual float Suppress(float* data,

                         size_t data_length,

                         int num_channels,

                         const float* detection_data,

                         size_t detection_length,

                         const float* reference_data,

                         size_t reference_length,

                         float voice_probability,

                         bool key_pressed) = 0;

};

}  // namespace webrtc

#endif  // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_