audio_demo/main.cpp
2025-02-14 08:58:27 +08:00

430 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <iostream>
#include "ns/noise_suppressor.h"
#include "common.h"
#include <cmath>
#include "agcm.h"
#define MIX_INPUT_CHANNELS 2
// #define MIX_INPUT_SAMPLES (10 * MIX_INPUT_SAMPLE_RATE/1000)
#define MIX_INPUT_SAMPLES 1440
// #define MIX_INPUT_SAMPLES 1024
#define MIX_INPUT_SAMPLE_RATE 48000
//----------------------------------------------
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
// 最大/小音量db
#define MIN_DB (-10)
#define MAX_DB (60)
// 最大/小音量: 0: 静音; 100:最大音量
#define MUTE_VOLUME (0)
#define MAX_VOLUME (100)
static int vol_scaler_init(int *scaler, int mindb, int maxdb);
typedef struct VolumeCtlUnit
{
int scaler[MAX_VOLUME + 1]; // 音量表
int zeroDb; // 0db在scaler中的索引
// 自定义需要调节的音量
int micVolume;
VolumeCtlUnit() {
// 音量控制器初始化
zeroDb = vol_scaler_init(scaler, MIN_DB, MAX_DB);
micVolume = 100;
}
} volume_ctl_unit_t;
static volume_ctl_unit_t kVolCtrlUnit;
static int vol_scaler_init(int *scaler, int mindb, int maxdb)
{
double tabdb[MAX_VOLUME + 1];
double tabf [MAX_VOLUME + 1];
int z, i;
for (i = 0; i < (MAX_VOLUME + 1); i++) {
// (mindb, maxdb)平均分成(MAX_VOLUME + 1)份
tabdb[i] = mindb + (maxdb - mindb) * i / (MAX_VOLUME + 1);
// dB = 20 * log(A1 / A2)当A1A2相等时db为0
// 这里以(1 << 14)作为原始声音振幅,得到调节后的振幅(A1),将A1存入音量表中
tabf [i] = pow(10.0, tabdb[i] / 20.0);
scaler[i] = (int)((1 << 14) * tabf[i]); // Q14 fix point
}
z = -mindb * (MAX_VOLUME + 1) / (maxdb - mindb);
z = MAX(z, 0 );
z = MIN(z, MAX_VOLUME);
scaler[0] = 0; // 音量表中0标识静音
scaler[z] = (1 << 14);// (mindb, maxdb)的中间值作为0db即不做增益处理
return z;
}
static void vol_scaler_run(int16_t *buf, int n, int volume)
{
/* 简易版
while (n--) {
*buf = (*buf) * multiplier / 100.0;
*buf = std::max((int)*buf, -0x7fff);
*buf = std::min((int)*buf, 0x7fff);
buf++;
}
*/
int multiplier = kVolCtrlUnit.scaler[volume];
if (multiplier > (1 << 14)) {
int32_t v;
while (n--) {
v = ((int32_t)*buf * multiplier) >> 14;
v = MAX(v,-0x7fff);
v = MIN(v, 0x7fff);
*buf++ = (int16_t)v;
}
} else if (multiplier < (1 << 14)) {
while (n--) {
*buf = ((int32_t)*buf * multiplier) >> 14;
buf++;
}
}
}
//----------------------------------------------
// usb声卡设备句柄
static rkStreamPtr usbCaptureDev = nullptr;
static SampleInfo kPcmSampleInfo;
// 降噪
using namespace webrtc;
struct audio_buf_t
{
uint8_t* data;
int index;
int size;
};
int main(int argc, char *argv[])
{
if (argc < 1) {
printf("usage: %s rtmp_url \n", argv[0]);
return 0;
}
std::string url = argv[1];
// PCM参数
kPcmSampleInfo.channels = MIX_INPUT_CHANNELS;
kPcmSampleInfo.fmt = SAMPLE_FMT_S16;
kPcmSampleInfo.sample_rate = MIX_INPUT_SAMPLE_RATE;
kPcmSampleInfo.nb_samples = MIX_INPUT_SAMPLES;
// 声卡设备初始化
RkStreamInit(2, capture, kPcmSampleInfo, usbCaptureDev);
// 采集buffer
int ret = 0;
std::shared_ptr<easymedia::SampleBuffer> capData = nullptr;
int buffer_size = GetSampleSize(kPcmSampleInfo) * kPcmSampleInfo.nb_samples;
void *ptr = malloc(buffer_size);
capData = std::make_shared<easymedia::SampleBuffer>(easymedia::MediaBuffer(ptr, buffer_size, -1, ptr, free_memory),
kPcmSampleInfo);
assert(capData);
uint8_t *lineBuffer = nullptr;
size_t lineSize = buffer_size;
lineBuffer = (uint8_t*)malloc(lineSize);
assert(lineBuffer);
// 初始化 FFmpeg
AVFormatContext *oc = NULL;
AVOutputFormat *fmt = NULL;
AVStream *audio_st = NULL;
AVCodecContext *c = NULL;
AVCodecParameters *codec_par = nullptr;
AVCodec *codec = NULL;
avformat_alloc_output_context2(&oc, NULL, "flv", url.c_str()); // RTMP 使用 FLV 封装格式
if (!oc) {
fprintf(stderr, "Could not create output context\n");
return -1;
}
fmt = oc->oformat;
// 创建音频流
audio_st = avformat_new_stream(oc, NULL);
if (!audio_st) {
fprintf(stderr, "Could not create audio stream\n");
return -1;
}
codec_par = audio_st->codecpar;
codec_par->codec_id = AV_CODEC_ID_AAC;
codec_par->codec_type = AVMEDIA_TYPE_AUDIO;
codec_par->codec_tag = 0;
codec_par->bit_rate = 128 * 1024;
codec_par->sample_rate = MIX_INPUT_SAMPLE_RATE;
codec_par->channel_layout = av_get_default_channel_layout(MIX_INPUT_CHANNELS);
codec_par->channels = MIX_INPUT_CHANNELS;
codec_par->format = AV_SAMPLE_FMT_FLTP;
codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
if (!codec) {
fprintf(stderr, "Could not find encoder\n");
return -1;
}
c = avcodec_alloc_context3(codec);
if (avcodec_parameters_to_context(c, codec_par) < 0) {
fprintf(stderr, "avcodec_parameters_to_context failed.\n");
return -1;
}
// 打开编码器
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open encoder\n");
return -1;
}
// 写入头信息
av_dump_format(oc, 0, url.c_str(), 1);
if (!(fmt->flags & AVFMT_NOFILE)) {
if (avio_open(&oc->pb, url.c_str(), AVIO_FLAG_WRITE) < 0) {
fprintf(stderr, "Could not open '%s'\n", url.c_str());
return -1;
}
}
// 写入流头信息
if (avformat_write_header(oc, NULL) < 0) {
fprintf(stderr, "Error occurred when opening output URL\n");
return -1;
}
AVFrame *frame = av_frame_alloc();
frame->format = c->sample_fmt;
frame->channel_layout = c->channel_layout;
frame->sample_rate = c->sample_rate;
frame->nb_samples = c->frame_size;
// 分配音频帧缓冲区
int output_bz = av_samples_get_buffer_size(NULL, c->channels, c->frame_size, c->sample_fmt, 0);
uint8_t *samples_data = (uint8_t *)av_malloc(output_bz);
avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt, samples_data, output_bz, 0);
printf("cap_bz = %d, frame_bz=%d, chn=%d, frame_size=%d, fmt=%d\n", buffer_size, output_bz, c->channels, c->frame_size, c->sample_fmt);
// 重采样
uint64_t output_channel_layout = av_get_default_channel_layout(c->channels);
SwrContext *swrCtx = swr_alloc_set_opts(nullptr,
output_channel_layout,
c->sample_fmt,
c->sample_rate,
av_get_default_channel_layout(MIX_INPUT_CHANNELS),
AV_SAMPLE_FMT_S16,
MIX_INPUT_SAMPLE_RATE,
0, nullptr);
if (!swrCtx) {
printf("swr_alloc_set_opts failed.\n");
return 0;
}
if (swr_init(swrCtx) < 0) {
printf("swr_init failed.\n");
return 0;
}
// 降噪初始化
AudioBuffer audio(MIX_INPUT_SAMPLE_RATE, MIX_INPUT_CHANNELS,
MIX_INPUT_SAMPLE_RATE, MIX_INPUT_CHANNELS,
MIX_INPUT_SAMPLE_RATE, MIX_INPUT_CHANNELS);
StreamConfig stream_config(MIX_INPUT_SAMPLE_RATE, MIX_INPUT_CHANNELS);
NsConfig cfg;
cfg.target_level = NsConfig::SuppressionLevel::k12dB;
NoiseSuppressor ns(cfg, MIX_INPUT_SAMPLE_RATE, MIX_INPUT_CHANNELS);
bool split_bands = MIX_INPUT_SAMPLE_RATE > 16000;
std::vector<audio_buf_t> cap_in_list;
std::vector<audio_buf_t> ns_out_list;
printf("ns config: %d\n", stream_config.num_samples());
// 推流循环
uint64_t pts = 0;
// FILE *swr_fp = fopen("/root/swr_out.pcm", "wb");
FILE *input_fp = fopen("/root/input_out.pcm", "wb");
while(true)
{
// 采集
size_t read_size = usbCaptureDev->Read(capData->GetPtr(), capData->GetSampleSize(), kPcmSampleInfo.nb_samples);
if (!read_size && errno != EAGAIN) {
printf("capture error: %s\n", strerror(errno));
msleep(10);
continue ;
}
capData->SetSamples(read_size);
// 分流
uint8_t *ptr = (uint8_t*)capData->GetPtr();
for (int i = 0; i < capData->GetSamples(); i++) {
int size = capData->GetSampleSize() / capData->GetSampleInfo().channels;
// 左声道 (2.4G)
// memcpy(wirelessBuffer + i * 2 * size, ptr + i * 2 * size, size);
// memcpy(wirelessBuffer + (i * 2 + 1) * size, ptr + i * 2 * size, size);
// 右声道 (LINE IN)
memcpy(lineBuffer + i * 2 * size, ptr + (i * 2 + 1) * size, size);
memcpy(lineBuffer + (i * 2 + 1) * size, ptr + (i * 2 + 1) * size, size);
}
vol_scaler_run((int16_t *)lineBuffer, capData->GetSamples() * kPcmSampleInfo.channels, kVolCtrlUnit.micVolume);
// 降噪处理
#if 1
uint8_t *cap_in = (uint8_t *)malloc(lineSize);
memcpy(cap_in, lineBuffer, lineSize);
audio_buf_t cap_in_buf;
cap_in_buf.data = cap_in;
cap_in_buf.index = 0;
cap_in_buf.size = lineSize;
cap_in_list.emplace_back(cap_in_buf);
int frames = capData->GetSamples();
int frameSize = stream_config.num_samples() * sizeof(int16_t);
while (frames > 0)
{
auto capBuf = cap_in_list.begin();
int readSize = (capBuf->size - capBuf->index) >= frameSize ? frameSize : (capBuf->size - capBuf->index);
uint8_t *data = (uint8_t *)malloc(readSize);
memcpy(data, capBuf->data + capBuf->index, readSize);
// 降噪
{
short *buffer = (short *)data;
audio.CopyFrom(buffer, stream_config);
if (split_bands)
audio.SplitIntoFrequencyBands();
ns.Analyze(audio);
ns.Process(&audio);
if (split_bands)
audio.MergeFrequencyBands();
audio.CopyTo(stream_config, buffer);
}
// 存储降噪后的音频
audio_buf_t out;
out.data = data;
out.index = 0;
out.size = readSize;
ns_out_list.emplace_back(out);
// 更新index
frames -= stream_config.num_samples();
capBuf->index += readSize;
// 释放音频资源
if (capBuf->index >= capBuf->size) {
free(capBuf->data);
cap_in_list.erase(cap_in_list.begin());
}
}
if (ns_out_list.size() <= 2) {
continue;
}
frames = c->frame_size;
while (frames > 0) {
auto nsData = ns_out_list.begin();
int needSize = frames * sizeof(int16_t);
int readSize = (nsData->size - nsData->index) >= needSize ? needSize : (nsData->size - nsData->index);
memcpy(lineBuffer + (c->frame_size - frames)*sizeof(int16_t), nsData->data + nsData->index, readSize);
frames -= readSize/sizeof(int16_t);
nsData->index += readSize;
if (nsData->index >= nsData->size) {
free(nsData->data);
ns_out_list.erase(ns_out_list.begin());
}
}
#endif
if (input_fp) fwrite(lineBuffer, 1, lineSize, input_fp);
// 重采样
{
const uint8_t** in = (const uint8_t**)&lineBuffer;
uint8_t **out = frame->data;
int len2, out_data_size;
len2 = swr_convert(swrCtx, out, frame->nb_samples, in, capData->GetSamples());
if (len2 < 0) {
printf("swr_convert failed. \n");
break;
}
out_data_size = len2 * c->channels * av_get_bytes_per_sample(c->sample_fmt);
// if (swr_fp) fwrite(frame->data[0], 1, out_data_size, swr_fp);
}
// 设置帧时间戳
// frame->pts = av_rescale_q(audio_st->cur_dts, audio_st->time_base, c->time_base);
pts += c->frame_size * 1000 / MIX_INPUT_SAMPLE_RATE;
frame->pts = pts;
// printf("frame pts=%lld, cur_dts=%lld stream_tb=(%d/%d), ctx_tb(%d/%d)\n", frame->pts, audio_st->cur_dts,
// audio_st->time_base.num, audio_st->time_base.den,c->time_base.num, c->time_base.den);
// 发送帧到编码器
ret = avcodec_send_frame(c, frame);
if (ret < 0) {
fprintf(stderr, "Error sending a frame for encoding\n");
break;
}
// 接收编码后的数据包
AVPacket pkt;
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
while (ret >= 0) {
ret = avcodec_receive_packet(c, &pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
fprintf(stderr, "Error during encoding\n");
break;
}
// 将数据包时间戳从编码器时间基转换到流时间基
pkt.stream_index = audio_st->index;
av_packet_rescale_ts(&pkt, c->time_base, audio_st->time_base);
pkt.duration = av_rescale_q(pkt.duration, c->time_base, audio_st->time_base);
// 写入数据包到输出媒体文件
ret = av_interleaved_write_frame(oc, &pkt);
if (ret < 0) {
fprintf(stderr, "Error while writing audio frame\n");
break;
}
// 释放数据包
av_packet_unref(&pkt);
}
}
if (swrCtx)
swr_free(&swrCtx);
if (c)
avcodec_close(c);
if (oc) {
avio_close(oc->pb);
avformat_free_context(oc);
}
if (frame) {
av_frame_free(&frame);
}
if (lineBuffer)
av_free(lineBuffer);
return 0;
}