/* * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ #define MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ //#define MIC_LEVEL_FEEDBACK #ifdef WEBRTC_AGC_DEBUG_DUMP #include #endif #include // NOLINT(build/include) #include #ifdef WEBRTC_AGC_DEBUG_DUMP #include #endif #include // NOLINT(build/include) #include // allpass filter coefficients. static const uint16_t kResampleAllpass1[3] = {3284, 24441, 49528}; static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255}; typedef struct BufferAgc { size_t frame_size; size_t in_uselen; size_t out_uselen; int16_t *buffer_in; int16_t *buffer_out; }BufferAgc; typedef struct VadEnergy { size_t sil2spe; // silence to speech size_t spe2sil; // speech to silence size_t amth; // threshold for energy based vad size_t VAD; // 0(silence) or 1(speech) size_t specnt; // count for speech frame size_t silcnt; // count for silence frame size_t silforReset; // reset activeSpeech and Rxx16_LPw32Max size_t BeforeSilEnd; // frames are needed to enter the silent segment size_t lastSilcent; }VadEnergy; typedef struct { int32_t downState[8]; int16_t HPstate; int16_t counter; int16_t logRatio; // log( P(active) / P(inactive) ) (Q10) int16_t meanLongTerm; // Q10 int32_t varianceLongTerm; // Q8 int16_t stdLongTerm; // Q10 int16_t meanShortTerm; // Q10 int32_t varianceShortTerm; // Q8 int16_t stdShortTerm; // Q10 } AgcVad; // total = 54 bytes typedef struct { int32_t capacitorSlow; int32_t capacitorFast; int32_t gain; int32_t gainTable[32]; int16_t gatePrevious; int16_t agcMode; AgcVad vadNearend; AgcVad vadFarend; #ifdef WEBRTC_AGC_DEBUG_DUMP FILE* logFile; int frameCounter; #endif } DigitalAgc; void buffer_agcProcess_init(BufferAgc *hd); void buffer_agcProcess_deinit(void *hd); void vad_init (VadEnergy *state); int32_t WebRtcAgc_InitDigital(DigitalAgc *digitalAgcInst, int16_t agcMode); int32_t WebRtcAgc_ProcessDigital(DigitalAgc *digitalAgcInst, int16_t *const *inNear, size_t num_bands, int16_t *const *out, uint32_t FS, int16_t lowLevelSignal); int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc *digitalAgcInst, const int16_t *inFar, size_t nrSamples); void WebRtcAgc_InitVad(AgcVad *vadInst); int16_t WebRtcAgc_ProcessVad(AgcVad *vadInst, // (i) VAD state const int16_t *in, // (i) Speech signal size_t nrSamples); // (i) number of samples int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16 int16_t compressionGaindB, // Q0 (in dB) int16_t targetLevelDbfs, // Q0 (in dB) uint8_t limiterEnable, int16_t analogTarget); // Errors #define AGC_UNSPECIFIED_ERROR 18000 #define AGC_UNSUPPORTED_FUNCTION_ERROR 18001 #define AGC_UNINITIALIZED_ERROR 18002 #define AGC_NULL_POINTER_ERROR 18003 #define AGC_BAD_PARAMETER_ERROR 18004 // Warnings #define AGC_BAD_PARAMETER_WARNING 18050 enum { kAgcModeUnchanged, kAgcModeAdaptiveAnalog, kAgcModeAdaptiveDigital, kAgcModeFixedDigital }; enum { kAgcFalse = 0, kAgcTrue }; typedef struct { int16_t targetLevelDbfs; // default 3 (-3 dBOv) int16_t compressionGaindB; // default 9 dB uint8_t limiterEnable; // default kAgcTrue (on) } WebRtcAgcConfig; #if defined(__cplusplus) extern "C" { #endif /* * This function analyses the number of samples passed to * farend and produces any error code that could arise. * * Input: * - agcInst : AGC instance. * - samples : Number of samples in input vector. * * Return value: * : 0 - Normal operation. * : -1 - Error. */ int WebRtcAgc_GetAddFarendError(void *state, size_t samples); /* * This function processes a 10 ms frame of far-end speech to determine * if there is active speech. The length of the input speech vector must be * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or * FS=48000). * * Input: * - agcInst : AGC instance. * - inFar : Far-end input speech vector * - samples : Number of samples in input vector * * Return value: * : 0 - Normal operation. * : -1 - Error */ int WebRtcAgc_AddFarend(void *agcInst, const int16_t *inFar, size_t samples); /* * This function processes a 10 ms frame of microphone speech to determine * if there is active speech. The length of the input speech vector must be * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or * FS=48000). For very low input levels, the input signal is increased in level * by multiplying and overwriting the samples in inMic[]. * * This function should be called before any further processing of the * near-end microphone signal. * * Input: * - agcInst : AGC instance. * - inMic : Microphone input speech vector for each band * - num_bands : Number of bands in input vector * - samples : Number of samples in input vector * * Return value: * : 0 - Normal operation. * : -1 - Error */ int WebRtcAgc_AddMic(void *agcInst, int16_t *const *inMic, size_t num_bands, size_t samples); /* * This function replaces the analog microphone with a virtual one. * It is a digital gain applied to the input signal and is used in the * agcAdaptiveDigital mode where no microphone level is adjustable. The length * of the input speech vector must be given in samples (80 when FS=8000, and 160 * when FS=16000, FS=32000 or FS=48000). * * Input: * - agcInst : AGC instance. * - inMic : Microphone input speech vector for each band * - num_bands : Number of bands in input vector * - samples : Number of samples in input vector * - micLevelIn : Input level of microphone (static) * * Output: * - inMic : Microphone output after processing (L band) * - inMic_H : Microphone output after processing (H band) * - micLevelOut : Adjusted microphone level after processing * * Return value: * : 0 - Normal operation. * : -1 - Error */ int WebRtcAgc_VirtualMic(void *agcInst, int16_t *const *inMic, size_t num_bands, size_t samples, int32_t micLevelIn, int32_t *micLevelOut); /* * This function processes a 10 ms frame and adjusts (normalizes) the gain both * analog and digitally. The gain adjustments are done only during active * periods of speech. The length of the speech vectors must be given in samples * (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo * parameter can be used to ensure the AGC will not adjust upward in the * presence of echo. * * This function should be called after processing the near-end microphone * signal, in any case after any echo cancellation. * * Input: * - agcInst : AGC instance * - inNear : Near-end input speech vector for each band * - num_bands : Number of bands in input/output vector * - samples : Number of samples in input/output vector * - inMicLevel : Current microphone volume level * - echo : Set to 0 if the signal passed to add_mic is * almost certainly free of echo; otherwise set * to 1. If you have no information regarding echo * set to 0. * * Output: * - outMicLevel : Adjusted microphone volume level * - out : Gain-adjusted near-end speech vector * : May be the same vector as the input. * - saturationWarning : A returned value of 1 indicates a saturation event * has occurred and the volume cannot be further * reduced. Otherwise will be set to 0. * * Return value: * : 0 - Normal operation. * : -1 - Error */ int WebRtcAgc_Process(void *agcInst, int16_t *const *inNear, size_t num_bands, size_t samples, int16_t *const *out, int32_t inMicLevel, int32_t *outMicLevel, int16_t echo, uint8_t *saturationWarning); /* * This function sets the config parameters (targetLevelDbfs, * compressionGaindB and limiterEnable). * * Input: * - agcInst : AGC instance * - config : config struct * * Output: * * Return value: * : 0 - Normal operation. * : -1 - Error */ int WebRtcAgc_set_config(void *agcInst, WebRtcAgcConfig config); /* * This function returns the config parameters (targetLevelDbfs, * compressionGaindB and limiterEnable). * * Input: * - agcInst : AGC instance * * Output: * - config : config struct * * Return value: * : 0 - Normal operation. * : -1 - Error */ int WebRtcAgc_get_config(void *agcInst, WebRtcAgcConfig *config); /* * This function creates and returns an AGC instance, which will contain the * state information for one (duplex) channel. */ void *WebRtcAgc_Create(void); /* * This function frees the AGC instance created at the beginning. * * Input: * - agcInst : AGC instance. */ void WebRtcAgc_Free(void *agcInst); /* * This function initializes an AGC instance. * * Input: * - agcInst : AGC instance. * - minLevel : Minimum possible mic level * - maxLevel : Maximum possible mic level * - agcMode : 0 - Unchanged * : 1 - Adaptive Analog Automatic Gain Control -3dBOv * : 2 - Adaptive Digital Automatic Gain Control -3dBOv * : 3 - Fixed Digital Gain 0dB * - fs : Sampling frequency * * Return value : 0 - Ok * -1 - Error */ int WebRtcAgc_Init(void *agcInst, int32_t minLevel, int32_t maxLevel, int16_t agcMode, uint32_t fs); #if defined(__cplusplus) } #endif /* Analog Automatic Gain Control variables: * Constant declarations (inner limits inside which no changes are done) * In the beginning the range is narrower to widen as soon as the measure * 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0 * and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal * go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm * The limits are created by running the AGC with a file having the desired * signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined * by out=10*log10(in/260537279.7); Set the target level to the average level * of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in * Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) ) */ #define RXX_BUFFER_LEN 10 static const int16_t kMsecSpeechInner = 190; // 520 static const int16_t kMsecSpeechOuter = 120; // 340 static const int16_t kNormalVadThreshold = 400; static const int16_t kAlphaShortTerm = 4; // 1 >> 6 = 0.0156 static const int16_t kAlphaLongTerm = 8; // 1 >> 10 = 0.000977 typedef struct { // Configurable parameters/variables uint32_t fs; // Sampling frequency int16_t compressionGaindB; // Fixed gain level in dB int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3) int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig) uint8_t limiterEnable; // Enabling limiter (on/off (default off)) WebRtcAgcConfig defaultConfig; WebRtcAgcConfig usedConfig; // General variables int16_t initFlag; int16_t lastError; // Target level parameters // Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7) int32_t analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs int32_t startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs int32_t startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs int32_t upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs int32_t lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs int32_t upperSecondaryLimit; // = RXX_BUFFER_LEN * 2677832; -17 dBfs int32_t lowerSecondaryLimit; // = RXX_BUFFER_LEN * 267783; -27 dBfs uint16_t targetIdx; // Table index for corresponding target level #ifdef MIC_LEVEL_FEEDBACK uint16_t targetIdxOffset; // Table index offset for level compensation #endif int16_t analogTarget; // Digital reference level in ENV scale // Analog AGC specific variables int32_t filterState[8]; // For downsampling wb to nb int32_t upperLimit; // Upper limit for mic energy int32_t lowerLimit; // Lower limit for mic energy int32_t Rxx160w32; // Average energy for one frame int32_t Rxx16_LPw32; // Low pass filtered subframe energies int32_t Rxx160_LPw32; // Low pass filtered frame energies int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe int32_t Rxx16_vectorw32[RXX_BUFFER_LEN]; // Array with subframe energies int32_t Rxx16w32_array[2][5]; // Energy values of microphone signal int32_t env[2][10]; // Envelope values of subframes int16_t Rxx16pos; // Current position in the Rxx16_vectorw32 int16_t envSum; // Filtered scaled envelope in subframes int16_t vadThreshold; // Threshold for VAD decision int16_t inActive; // Inactive time in milliseconds int16_t msTooLow; // Milliseconds of speech at a too low level int16_t msTooHigh; // Milliseconds of speech at a too high level int16_t changeToSlowMode; // Change to slow mode after some time at target int16_t firstCall; // First call to the process-function int16_t msZero; // Milliseconds of zero input int16_t msecSpeechOuterChange; // Min ms of speech between volume changes int16_t msecSpeechInnerChange; // Min ms of speech between volume changes int16_t activeSpeech; // Milliseconds of active speech int16_t muteGuardMs; // Counter to prevent mute action int16_t inQueue; // 10 ms batch indicator // Microphone level variables int32_t micRef; // Remember ref. mic level for virtual mic uint16_t gainTableIdx; // Current position in virtual gain table int32_t micGainIdx; // Gain index of mic level to increase slowly int32_t micVol; // Remember volume between frames int32_t maxLevel; // Max possible vol level, incl dig gain int32_t maxAnalog; // Maximum possible analog volume level int32_t maxInit; // Initial value of "max" int32_t minLevel; // Minimum possible volume level int32_t minOutput; // Minimum output volume level int32_t zeroCtrlMax; // Remember max gain => don't amp low input int32_t lastInMicLevel; int16_t msChangeLarge; int32_t silEnd_micVol; // volume when silent is (spe2sil-10) int16_t scale; // Scale factor for internal volume levels #ifdef MIC_LEVEL_FEEDBACK int16_t numBlocksMicLvlSat; uint8_t micLvlSat; #endif // Structs for VAD and digital_agc AgcVad vadMic; DigitalAgc digitalAgc; // AGC process buffer BufferAgc bufferAgc; // Energy VAD VadEnergy vad_energy; #ifdef WEBRTC_AGC_DEBUG_DUMP FILE* fpt; FILE* agcLog; int32_t fcount; #endif int16_t lowLevelSignal; } LegacyAgc; #endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_