I have a running RTSP stream, streaming video on a loop using the following FFMPEG command:
ffmpeg -re -stream_loop -1 -i ./ffmpeg_c_test/small_bunny_1080p_60fps.mp4 -ac 2 -f rtsp -rtsp_transport tcp rtsp://localhost:8554/mystream
The video file is obtained from the github link: https://github.com/leandromoreira/ffmpeg-libav-tutorial
I keep getting error response, when I calling the function av_interleaved_write_frame called from the function remux in the attached program. The output format is mp4, output video codec is av1 and output audio codec is same as input audio codec. The error is from audio stream.
I tried to create a "minimal reproducible code", however, I think it is still not completely minimal, but it reproduces the exact error.
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/timestamp.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
typedef struct StreamingContext{
AVFormatContext* avfc;
const AVCodec *video_avc;
const AVCodec *audio_avc;
AVStream *video_avs;
AVStream *audio_avs;
AVCodecContext *video_avcc;
AVCodecContext *audio_avcc;
int video_index;
int audio_index;
char* filename;
struct SwsContext *sws_ctx;
}StreamingContext;
typedef struct StreamingParams{
char copy_video;
char copy_audio;
char *output_extension;
char *muxer_opt_key;
char *muxer_opt_value;
char *video_codec;
char *audio_codec;
char *codec_priv_key;
char *codec_priv_value;
}StreamingParams;
void logging(const char *fmt, ...)
{
va_list args;
fprintf(stderr, "LOG: ");
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
fprintf(stderr, "\n");
}
int fill_stream_info(AVStream *avs, const AVCodec **avc, AVCodecContext **avcc)
{
*avc = avcodec_find_decoder(avs->codecpar->codec_id);
*avcc = avcodec_alloc_context3(*avc);
if (avcodec_parameters_to_context(*avcc, avs->codecpar) < 0)
{
logging("Failed to fill Codec Context.");
return -1;
}
avcodec_open2(*avcc, *avc, NULL);
return 0;
}
int open_media(const char *in_filename, AVFormatContext **avfc)
{
*avfc = avformat_alloc_context();
if (avformat_open_input(avfc, in_filename, NULL, NULL) != 0)
{
logging("Failed to open input file %s", in_filename);
return -1;
}
if (avformat_find_stream_info(*avfc, NULL) < 0)
{
logging("Failed to get Stream Info.");
return -1;
}
}
int prepare_decoder(StreamingContext *sc)
{
for (int i = 0; i < (int)sc->avfc->nb_streams; i++)
{
if (sc->avfc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
{
sc->video_avs = sc->avfc->streams[i];
sc->video_index = i;
if (fill_stream_info(sc->video_avs, &sc->video_avc, &sc->video_avcc))
{
return -1;
}
}
else if (sc->avfc->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
{
sc->audio_avs = sc->avfc->streams[i];
sc->audio_index = i;
if (fill_stream_info(sc->audio_avs, &sc->audio_avc, &sc->audio_avcc))
{
return -1;
}
}
else
{
logging("Skipping Streams other than Audio and Video.");
}
}
return 0;
}
int prepare_video_encoder(StreamingContext *encoder_sc, AVCodecContext *decoder_ctx, AVRational input_framerate,
StreamingParams sp, int scaled_frame_width, int scaled_frame_height)
{
encoder_sc->video_avs = avformat_new_stream(encoder_sc->avfc, NULL);
encoder_sc->video_avc = avcodec_find_encoder_by_name(sp.video_codec);
if (!encoder_sc->video_avc)
{
logging("Cannot find the Codec.");
return -1;
}
encoder_sc->video_avcc = avcodec_alloc_context3(encoder_sc->video_avc);
if (!encoder_sc->video_avcc)
{
logging("Could not allocate memory for Codec Context.");
return -1;
}
av_opt_set(encoder_sc->video_avcc->priv_data, "preset", "fast", 0);
if (sp.codec_priv_key && sp.codec_priv_value)
av_opt_set(encoder_sc->video_avcc->priv_data, sp.codec_priv_key, sp.codec_priv_value, 0);
encoder_sc->video_avcc->height = scaled_frame_height;
encoder_sc->video_avcc->width = scaled_frame_width;
encoder_sc->video_avcc->sample_aspect_ratio = decoder_ctx->sample_aspect_ratio;
if (encoder_sc->video_avc->pix_fmts)
encoder_sc->video_avcc->pix_fmt = encoder_sc->video_avc->pix_fmts[0];
else
encoder_sc->video_avcc->pix_fmt = decoder_ctx->pix_fmt;
encoder_sc->video_avcc->bit_rate = 2 * 1000 * 1000;
encoder_sc->video_avcc->time_base = av_inv_q(input_framerate);
encoder_sc->video_avs->time_base = encoder_sc->video_avcc->time_base;
if (avcodec_open2(encoder_sc->video_avcc, encoder_sc->video_avc, NULL) < 0)
{
logging("Could not open the Codec.");
return -1;
}
avcodec_parameters_from_context(encoder_sc->video_avs->codecpar, encoder_sc->video_avcc);
return 0;
}
int prepare_copy(AVFormatContext *avfc, AVStream **avs, AVCodecParameters *decoder_par)
{
*avs = avformat_new_stream(avfc, NULL);
avcodec_parameters_copy((*avs)->codecpar, decoder_par);
return 0;
}
int encode_video(StreamingContext *decoder, StreamingContext *encoder, AVFrame *input_frame)
{
if (input_frame)
input_frame->pict_type = AV_PICTURE_TYPE_NONE;
AVPacket *output_packet = av_packet_alloc();
int response = avcodec_send_frame(encoder->video_avcc, input_frame);
while (response >= 0)
{
response = avcodec_receive_packet(encoder->video_avcc, output_packet);
if (response == AVERROR(EAGAIN) || response == AVERROR_EOF)
{
break;
}
output_packet->stream_index = decoder->video_index;
output_packet->duration = encoder->video_avs->time_base.den / encoder->video_avs->time_base.num;
av_packet_rescale_ts(output_packet, decoder->video_avs->time_base, encoder->video_avs->time_base);
response = av_interleaved_write_frame(encoder->avfc, output_packet);
}
av_packet_unref(output_packet);
av_packet_free(&output_packet);
return 0;
}
int remux(AVPacket **pkt, AVFormatContext **avfc, AVRational decoder_tb, AVRational encoder_tb)
{
(*pkt)->duration = av_rescale_q((*pkt)->duration, decoder_tb, encoder_tb);
(*pkt)->pos = -1;
av_packet_rescale_ts(*pkt, decoder_tb, encoder_tb);
if (av_interleaved_write_frame(*avfc, *pkt) < 0)
{
logging("Error while copying Stream Packet.");
return -1;
}
return 0;
}
int transcode_video(StreamingContext *decoder, StreamingContext *encoder, AVPacket *input_packet, AVFrame *input_frame)
{
int response = avcodec_send_packet(decoder->video_avcc, input_packet);
while (response >= 0)
{
response = avcodec_receive_frame(decoder->video_avcc, input_frame);
if (response == AVERROR(EAGAIN) || response == AVERROR_EOF)
{
break;
}
if (response >= 0)
{
if (encode_video(decoder, encoder, input_frame))
return -1;
}
av_frame_unref(input_frame);
}
return 0;
}
int main(int argc, char *argv[])
{
const int scaled_frame_width = 854;
const int scaled_frame_height = 480;
StreamingParams sp = {0};
sp.copy_audio = 1;
sp.copy_video = 0;
sp.video_codec = "libsvtav1";
StreamingContext *decoder = (StreamingContext *)calloc(1, sizeof(StreamingContext));
decoder->filename = "rtsp://localhost:8554/mystream";
StreamingContext *encoder = (StreamingContext *)calloc(1, sizeof(StreamingContext));
encoder->filename = "small_bunny_9.mp4";
if (sp.output_extension)
{
strcat(encoder->filename, sp.output_extension);
}
open_media(decoder->filename, &decoder->avfc);
prepare_decoder(decoder);
avformat_alloc_output_context2(&encoder->avfc, NULL, "mp4", encoder->filename);
AVRational input_framerate = av_guess_frame_rate(decoder->avfc, decoder->video_avs, NULL);
prepare_video_encoder(encoder, decoder->video_avcc, input_framerate, sp, scaled_frame_width, scaled_frame_height);
prepare_copy(encoder->avfc, &encoder->audio_avs, decoder->audio_avs->codecpar);
if (encoder->avfc->oformat->flags & AVFMT_GLOBALHEADER)
encoder->avfc->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
if (!(encoder->avfc->oformat->flags & AVFMT_NOFILE))
{
if (avio_open(&encoder->avfc->pb, encoder->filename, AVIO_FLAG_WRITE) < 0)
{
logging("could not open the output file");
return -1;
}
}
if (avformat_write_header(encoder->avfc, NULL) < 0)
{
logging("an error occurred when opening output file");
return -1;
}
AVFrame *input_frame = av_frame_alloc();
AVPacket *input_packet = av_packet_alloc();
while (1)
{
int ret = av_read_frame(decoder->avfc, input_packet);
if(ret<0)
break;
if (decoder->avfc->streams[input_packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
{
if (transcode_video(decoder, encoder, input_packet, input_frame))
return -1;
av_packet_unref(input_packet);
}
else if (decoder->avfc->streams[input_packet->stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO)
{
if (remux(&input_packet, &encoder->avfc, decoder->audio_avs->time_base, encoder->audio_avs->time_base))
return -1;
}
else
{
logging("Ignoring all nonvideo or audio packets.");
}
}
if (encode_video(decoder, encoder, NULL))
return -1;
av_write_trailer(encoder->avfc);
if (input_frame != NULL)
{
av_frame_free(&input_frame);
input_frame = NULL;
}
if (input_packet != NULL)
{
av_packet_free(&input_packet);
input_packet = NULL;
}
avformat_close_input(&decoder->avfc);
avformat_free_context(decoder->avfc);
decoder->avfc = NULL;
avformat_free_context(encoder->avfc);
encoder->avfc = NULL;
avcodec_free_context(&decoder->video_avcc);
decoder->video_avcc = NULL;
avcodec_free_context(&decoder->audio_avcc);
decoder->audio_avcc = NULL;
free(decoder);
decoder = NULL;
free(encoder);
encoder = NULL;
return 0;
}
We may correct the DTS timestamps when remuxing the audio packets for making sure that the DTS timestamps are monotonously increased.
The "non monotonically increasing dts" issue is mainly a result of streaming the input video file in a loop.
Each time the loop starts, the timestamps starts from the beginning.
The remuxing procedure copies the timestamps from the input to the output, so there is a "non-increased DTS scenario" each time the loop restarts.
We have to fix the DTS timestamps to keep them monotonously increased.
We may also fix the PTS timestamps the same way, because the PTS should match the DTS in a valid audio stream.
Note:
For keeping the synchronization between the audio and the video, we may also have to fix the timesteps of video stream.
After fixing the audio, the video encoding is also not working.
My answer is going to address only the "non monotonically increasing dts" issue.
Fixing the video stream seems too challenging.
Note:
Using AV1 video encoder may raise other issues, since AV1 encoding may not meet the Realtime constraints (can't keep up with the rate of streaming input).
For getting a playable output file, I used
libx264encoder instead oflibsvtav1.In case your CPU is "strong" enough, AV1 may also work.
For "manually" fixing the DTS (and PTS) timestamps, we may use the following structure for tracking the timestamps and the required offsets:
Pass a pointer to the structure to
remuxfunction:After
av_packet_rescale_ts(*pkt, decoder_tb, encoder_tb), keep the original timestamps, and add offsets that fixes the timestamps:After adding the offset, we may check if the timestamps are not monotonically increased.
In case they are not monotonically increased, fix the timestamp to be monotonically increased, and update the offset (to be used in the next packet):
We also have to store the timestamps for to be used in the next packet:
Now we can execute
av_interleaved_write_framewithout getting an error:Complete updated code:
(I can't post both samples due to characters limit).
Note:
Update:
The source of the problem is related to the timestamps of the input audio and video due to the looping.
We may correct the timestamps of the input using
setptsandasetptsfilters:ffmpeg -re -stream_loop -1 -i small_bunny_1080p_60fps.mp4 -filter_complex "[0:v]setpts=N/FRAME_RATE/TB[v];[0:a]asetpts=N/SR/TB[a]" -map "[v]" -map "[a]" -ac 2 -f rtsp -rtsp_transport tcp rtsp://localhost:8554/mystreamWe may still need a DTS timestamps protection at the beginning (but we may simplify the logic).
For getting valid output file, we have to close the output file gracefully.
Instead of
while(1), we may loop untilEsckey is pressed.In Windows, we may use
_kbhit()and_getch()functions for getting the last key pressed without blocking.Updated code sample:
Now both the audio and the video looks and sounds OK.