#include #include #include #include "timing.h" #include "log/logger.h" #include "common.h" #include #include #include #include #include "alsa_dev.h" using namespace std; using namespace toolkit; #define MIX_INPUT_CHANNELS 2 #define MIX_INPUT_SAMPLES (10 * MIX_INPUT_SAMPLE_RATE/1000) #define MIX_INPUT_SAMPLE_RATE 44100 struct audio_buf_t { uint8_t* data; int index; int size; }; struct RtmpConfig { char url[1024]; AVFormatContext *formatCtx; AVStream *stream; AVCodecContext *codecCtx; SwrContext *swrCtx; std::thread *thread; std::mutex *mutex; bool quit; }; static SampleInfo kPcmSampleInfo; //---------------------------------------------- #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) // 最大/小音量（db） #define MIN_DB (-10) #define MAX_DB (20) // 最大/小音量： 0: 静音； 100:最大音量 #define MUTE_VOLUME (0) #define MAX_VOLUME (100) static int vol_scaler_init(int *scaler, int mindb, int maxdb); typedef struct VolumeCtlUnit { int scaler[MAX_VOLUME + 1]; // 音量表 int zeroDb; // 0db在scaler中的索引 // 自定义需要调节的音量 int micVolume; VolumeCtlUnit() { // 音量控制器初始化 zeroDb = vol_scaler_init(scaler, MIN_DB, MAX_DB); micVolume = 100; } } volume_ctl_unit_t; static volume_ctl_unit_t kVolCtrlUnit; static int vol_scaler_init(int *scaler, int mindb, int maxdb) { double tabdb[MAX_VOLUME + 1]; double tabf [MAX_VOLUME + 1]; int z, i; for (i = 0; i < (MAX_VOLUME + 1); i++) { // (mindb, maxdb)平均分成(MAX_VOLUME + 1)份 tabdb[i] = mindb + (maxdb - mindb) * i / (MAX_VOLUME + 1); // dB = 20 * log(A1 / A2)，当A1，A2相等时，db为0 // 这里以(1 << 14)作为原始声音振幅，得到调节后的振幅(A1),将A1存入音量表中 tabf [i] = pow(10.0, tabdb[i] / 20.0); scaler[i] = (int)((1 << 14) * tabf[i]); // Q14 fix point } z = -mindb * (MAX_VOLUME + 1) / (maxdb - mindb); z = MAX(z, 0 ); z = MIN(z, MAX_VOLUME); scaler[0] = 0; // 音量表中，0标识静音 scaler[z] = (1 << 14);// (mindb, maxdb)的中间值作为0db，即不做增益处理 return z; } static void vol_scaler_run(int16_t *buf, int n, int volume) { /* 简易版 while (n--) { *buf = (*buf) * multiplier / 100.0; *buf = std::max((int)*buf, -0x7fff); *buf = std::min((int)*buf, 0x7fff); buf++; } */ int multiplier = kVolCtrlUnit.scaler[volume]; if (multiplier > (1 << 14)) { int32_t v; while (n--) { v = ((int32_t)*buf * multiplier) >> 14; v = MAX(v,-0x7fff); v = MIN(v, 0x7fff); *buf++ = (int16_t)v; } } else if (multiplier < (1 << 14)) { while (n--) { *buf = ((int32_t)*buf * multiplier) >> 14; buf++; } } } //---------------------------------------------- webrtc::AudioProcessing::Config webtcConfigInit() { webrtc::AudioProcessing::Config apmConfig; apmConfig.pipeline.maximum_internal_processing_rate = MIX_INPUT_SAMPLE_RATE; apmConfig.pipeline.multi_channel_capture = true; apmConfig.pipeline.multi_channel_render = true; //PreAmplifier apmConfig.pre_amplifier.enabled = false; apmConfig.pre_amplifier.fixed_gain_factor = 0.7f; //HighPassFilter apmConfig.high_pass_filter.enabled = false; apmConfig.high_pass_filter.apply_in_full_band = false; //EchoCanceller apmConfig.echo_canceller.enabled = false; apmConfig.echo_canceller.mobile_mode = false; apmConfig.echo_canceller.export_linear_aec_output = false; apmConfig.echo_canceller.enforce_high_pass_filtering = true; //NoiseSuppression apmConfig.noise_suppression.enabled = true; apmConfig.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::kHigh; apmConfig.noise_suppression.analyze_linear_aec_output_when_available = false; //TransientSuppression apmConfig.transient_suppression.enabled = false; //VoiceDetection apmConfig.voice_detection.enabled = true; //GainController1 apmConfig.gain_controller1.enabled = true; apmConfig.gain_controller1.mode = webrtc::AudioProcessing::Config::GainController1::kAdaptiveAnalog; apmConfig.gain_controller1.target_level_dbfs = 3; apmConfig.gain_controller1.compression_gain_db = 12; apmConfig.gain_controller1.enable_limiter = true; apmConfig.gain_controller1.analog_level_minimum = 0; apmConfig.gain_controller1.analog_level_maximum = 496; apmConfig.gain_controller1.analog_gain_controller.enabled = true; apmConfig.gain_controller1.analog_gain_controller.startup_min_volume = webrtc::kAgcStartupMinVolume; apmConfig.gain_controller1.analog_gain_controller.clipped_level_min = webrtc::kClippedLevelMin; apmConfig.gain_controller1.analog_gain_controller.enable_agc2_level_estimator = false; apmConfig.gain_controller1.analog_gain_controller.enable_digital_adaptive = true; //GainController2 apmConfig.gain_controller2.enabled = false; apmConfig.gain_controller2.fixed_digital.gain_db = 0.f; apmConfig.gain_controller2.adaptive_digital.enabled = false; apmConfig.gain_controller2.adaptive_digital.vad_probability_attack = 1.f; apmConfig.gain_controller2.adaptive_digital.level_estimator = webrtc::AudioProcessing::Config::GainController2::kRms; apmConfig.gain_controller2.adaptive_digital.level_estimator_adjacent_speech_frames_threshold = 1; apmConfig.gain_controller2.adaptive_digital.use_saturation_protector = true; apmConfig.gain_controller2.adaptive_digital.initial_saturation_margin_db = 20.f; apmConfig.gain_controller2.adaptive_digital.extra_saturation_margin_db = 2.f; apmConfig.gain_controller2.adaptive_digital.gain_applier_adjacent_speech_frames_threshold = 1; apmConfig.gain_controller2.adaptive_digital.max_gain_change_db_per_second = 3.f; apmConfig.gain_controller2.adaptive_digital.max_output_noise_level_dbfs = -50.f; //ResidualEchoDetector apmConfig.residual_echo_detector.enabled = false; //LevelEstimation apmConfig.level_estimation.enabled = false; return apmConfig; } void pullDestory(RtmpConfig *config) { if (config->formatCtx) avformat_close_input(&config->formatCtx); if (config->codecCtx) { avcodec_close(config->codecCtx); avcodec_free_context(&config->codecCtx); } if (config->swrCtx) { swr_close(config->swrCtx); swr_free(&config->swrCtx); } } int pullInit(RtmpConfig *config, int channels, AVSampleFormat format, int sample_rate) { if (nullptr == strstr(config->url, "rtmp://")) { LogE("url error, url: %s\n", config->url); return -1; } int ret = 0; int scan_all_pmts_set = 0; int st_index = -1; AVDictionary *format_opts = nullptr; AVFormatContext *ic = nullptr; AVCodecParameters *codecPar = nullptr; AVCodec *codec = nullptr; AVCodecContext *codecCtx = nullptr; SwrContext *swrCtx = nullptr; ic = avformat_alloc_context(); if (!ic) { throw(std::runtime_error("avformat_alloc_context failed.")); } if (!av_dict_get(format_opts, "scan_all_pmts", NULL, AV_DICT_MATCH_CASE)) { av_dict_set(&format_opts, "scan_all_pmts", "1", AV_DICT_DONT_OVERWRITE); scan_all_pmts_set = 1; } // 禁用缓冲 av_dict_set(&format_opts, "fflags", "nobuffer", AV_DICT_MATCH_CASE); // 设置媒体流分析最大字节数 av_dict_set(&format_opts, "probesize", "10000", AV_DICT_MATCH_CASE); retry: // 打开输入流 ret = avformat_open_input(&ic, config->url, nullptr, &format_opts); if (ret < 0) { LogE("avformat_open_input failed.\n"); goto fail; } if (scan_all_pmts_set) av_dict_set(&format_opts, "scan_all_pmts", nullptr, AV_DICT_MATCH_CASE); av_format_inject_global_side_data(ic); ret = avformat_find_stream_info(ic, nullptr); if (ret < 0) { // LOG(ERROR) << url << ": could not find codec parameters"; LogE("{} : could not find codec parameters\n", config->url); goto fail; } if (ic->pb) ic->pb->eof_reached = 0; // 打印输入流参数 av_dump_format(ic, 0, config->url, 0); st_index = av_find_best_stream(ic, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0); if (st_index >= 0) { // config->stream = ic->streams[st_index]; } else { LogW("find audio stream failed, try again.\n"); avformat_close_input(&ic); goto retry; } // 初始化解码器 codecPar = config->stream->codecpar; codec = avcodec_find_decoder(codecPar->codec_id); if (!codec) { LogE("find codec failed.\n"); goto fail; } codecCtx = avcodec_alloc_context3(codec); if (!codecCtx) { LogE("avcodec_alloc_context3 failed.\n"); goto fail; } ret = avcodec_parameters_to_context(codecCtx, codecPar); if (ret < 0) { LogE("avcodec_parameters_to_context\n"); goto fail; } codecCtx->time_base = config->stream->time_base; // 打开解码器 if (avcodec_open2(codecCtx, codec, nullptr) < 0){ LogE("avcodec_open2 failed\n"); goto fail; } // 重采样初始化 swrCtx = swr_alloc_set_opts(nullptr, av_get_default_channel_layout(channels), format, sample_rate, codecCtx->channel_layout, codecCtx->sample_fmt, codecCtx->sample_rate, 0, nullptr); if (!swrCtx) { LogE("swr_alloc_set_opts failed.\n"); goto fail; } swr_init(swrCtx); config->formatCtx = ic; config->codecCtx = codecCtx; config->swrCtx = swrCtx; config->stream->discard = AVDISCARD_DEFAULT; av_dict_free(&format_opts); return 0; fail: if (format_opts) av_dict_free(&format_opts); if (ic) avformat_close_input(&ic); if (codecCtx) { avcodec_close(codecCtx); avcodec_free_context(&codecCtx); } if (swrCtx) { swr_close(swrCtx); swr_free(&swrCtx); } return -1; } void playbackLoop(RtmpConfig *rtmp, std::vector *list, webrtc::AudioProcessing *apm, alsa::AlsaDev* play); int main(int argc, char *argv[]) { if (argc < 3) { fprintf(stderr, "usage %s card_num url\n", argv[0]); return -1; } //初始化日志系统 Logger::Instance().add(std::make_shared ()); Logger::Instance().add(std::make_shared()); Logger::Instance().setWriter(std::make_shared()); // 初始化声卡设备 int card = atoi(argv[1]); alsa::Config alsaConfig; alsaConfig.period_time = 10000; alsaConfig.buffer_time = 50000; alsaConfig.channels = MIX_INPUT_CHANNELS; alsaConfig.format = SND_PCM_FORMAT_S16_LE; alsaConfig.rate = MIX_INPUT_SAMPLE_RATE; if (card < 0) sprintf(alsaConfig.device, "default"); else sprintf(alsaConfig.device, "plughw:%d", card); alsa::AlsaDev usbPlaybackDev; if (usbPlaybackDev.applyConfig(alsaConfig) < 0) { PrintE("alsa config failed.\n"); return -1; } // PrintI("alsa before init: %s\n", usbPlaybackDev.configToString()); if (usbPlaybackDev.init(SND_PCM_STREAM_PLAYBACK) < 0) { PrintE("alsa init failed.\n"); return -1; } PrintI("alsa init: %s\n", usbPlaybackDev.configToString()); // webrtc初始化 webrtc::AudioProcessing *apm = webrtc::AudioProcessingBuilder().Create(); if (!apm) { LogI("create apm failed.\n"); return -1; } webrtc::AudioProcessing::Config apmConfig = webtcConfigInit(); apm->ApplyConfig(apmConfig); apm->Initialize(); apm->set_stream_analog_level(408); LogI("webrtc params: {\n%s\n}\n", apmConfig.ToString().c_str()); // 拉流初始化 RtmpConfig rtmp; memset(&rtmp, 0, sizeof(rtmp)); strcpy(rtmp.url, argv[2]); if (pullInit(&rtmp, MIX_INPUT_CHANNELS, AV_SAMPLE_FMT_S16, MIX_INPUT_SAMPLE_RATE) < 0) { return -1; } AVPacket *pkt = av_packet_alloc(); AVFrame *outputFrame = av_frame_alloc(); int maxBuffSize = 1024 * 4 * 2; uint8_t *swrBuffer = (uint8_t *)calloc(maxBuffSize, sizeof(uint8_t)); int ret; std::vector swr_list; rtmp.mutex = new std::mutex; rtmp.thread = new std::thread(playbackLoop, &rtmp, &swr_list, apm, &usbPlaybackDev); rtmp.quit = false; while (true) { if (av_read_frame(rtmp.formatCtx, pkt) >= 0 && pkt->stream_index == rtmp.stream->index) { ret = avcodec_send_packet(rtmp.codecCtx, pkt); if (ret == AVERROR(EAGAIN)) { LogW("send packet again.\n"); av_usleep(10*1000); continue; } else if (ret < 0) { LogE("send packet error ret={}\n", ret); break; } while ( avcodec_receive_frame(rtmp.codecCtx, outputFrame) >= 0 ) { int outSamples = swr_convert(rtmp.swrCtx, &swrBuffer, maxBuffSize/(sizeof(int16_t) * MIX_INPUT_CHANNELS), (uint8_t const **) (outputFrame->data), outputFrame->nb_samples); int size = outSamples * MIX_INPUT_CHANNELS * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16); { int size = outSamples * MIX_INPUT_CHANNELS * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16); uint8_t *buffer = (uint8_t *)calloc(size, sizeof(uint8_t)); memcpy(buffer, swrBuffer, size); std::unique_lock lck(*rtmp.mutex); audio_buf_t out; out.data = buffer; out.index = 0; out.size = size; swr_list.emplace_back(out); // if (out_fp) fwrite(buffer, 1, size, out_fp); } } av_frame_unref(outputFrame); } av_packet_unref(pkt); } if (apm) { delete apm; apm = nullptr; } pullDestory(&rtmp); return 0; } void playbackLoop(RtmpConfig *rtmp, std::vector *list, webrtc::AudioProcessing *apm, alsa::AlsaDev* play) { // webrtc::StreamConfig playConfig; playConfig.set_has_keyboard(false); playConfig.set_num_channels(kPcmSampleInfo.channels); playConfig.set_sample_rate_hz(kPcmSampleInfo.sample_rate); int sampleSize = 0; int outSize = MIX_INPUT_SAMPLES * MIX_INPUT_CHANNELS * sizeof(int16_t); uint8_t *outBuffer = (uint8_t *)calloc(outSize, sizeof(uint8_t)); // FILE *out_fp = fopen("/root/swr_out.pcm", "wb"); while (!rtmp->quit) { // 获取 MIX_INPUT_SAMPLES 长度的解码音频，填充到outBuffer中 sampleSize = outSize; while (sampleSize > 0) { if (list->size() <= 0) { av_usleep(1000); continue; } std::unique_lock lck(*rtmp->mutex); auto data = list->begin(); int readSize = sampleSize < (data->size - data->index) ? sampleSize : (data->size - data->index); memcpy(outBuffer + outSize - sampleSize, data->data + data->index, readSize); sampleSize -= readSize; data->index += readSize; if (data->index >= data->size) { free(data->data); list->erase(list->begin()); } } // if (out_fp) fwrite(outBuffer, 1, outSize, out_fp); // 音频处理 { apm->ProcessStream((int16_t *)outBuffer, playConfig, playConfig, (int16_t *)outBuffer); } play->write(outBuffer, outSize); } }