I'm trying to combine multiple MP4 files in Delphi with the FFMPEG video library. I have the headers unit with all the functions. All videos are MPEG-4, and so is the destination output file.
I found this question on Stack Overflow asking the same question. To combine video files while keeping the audio and video tracks. I have translated the answers to Delphi, and while the code is executed successfully, the output file is invalid and cannot be played.
Here is my implementation:
var
Files: TArray<PAnsiChar>;
Output: PAnsiChar;
I, S: integer;
i_fmt_ctx: PAVFormatContext;
i_video_stream: PAVStream;
o_fmt_ctx: PAVFormatContext;
o_video_stream: PAVStream;
P: PPAVStream;
begin
SetLength(Files, 2);
Files[0] := PAnsiChar('.\Clips\file9.mp4');
Files[1] := PAnsiChar('.\Clips\file10.mp4');
Output := '.\Output\out.mp4';
avcodec_register_all();
av_register_all();
(* should set to NULL so that avformat_open_input() allocate a new one *)
i_fmt_ctx := nil;
if avformat_open_input(@i_fmt_ctx, Files[0], nil, nil) <> 0 then
raise Exception.Create('Could not open file');
if avformat_find_stream_info(i_fmt_ctx, nil) < 0 then
raise Exception.Create('Could not find stream info');
(* Find 1st video stream *)
i_video_stream := nil;
P := i_fmt_ctx.streams;
for i := 0 to i_fmt_ctx.nb_streams-1 do begin
if P^.codec.codec_type = AVMEDIA_TYPE_VIDEO then
begin
i_video_stream := P^;
Break;
end;
Inc(P);
end;
if i_video_stream = nil then
raise Exception.Create('Could not find video stream');
avformat_alloc_output_context2(@o_fmt_ctx, nil, nil, Output);
(*
since all input files are supposed to be identical (framerate, dimension, color format, ...)
we can safely set output codec values from first input file
*)
o_video_stream := avformat_new_stream(o_fmt_ctx, nil);
var c: PAVCodecContext;
c := o_video_stream.codec;
c.bit_rate := 400000;
c.codec_id := i_video_stream.codec.codec_id;
c.codec_type := i_video_stream.codec.codec_type;
c.time_base.num := i_video_stream.time_base.num;
c.time_base.den := i_video_stream.time_base.den;
//fprintf(stderr, "time_base.num = %d time_base.den = %d\n", c->time_base.num, c->time_base.den);
c.width := i_video_stream.codec.width;
c.height := i_video_stream.codec.height;
c.pix_fmt := i_video_stream.codec.pix_fmt;
//printf("%d %d %d", c->width, c->height, c->pix_fmt);
c.flags := i_video_stream.codec.flags;
c.flags := c.flags or CODEC_FLAG_GLOBAL_HEADER;
c.me_range := i_video_stream.codec.me_range;
c.max_qdiff := i_video_stream.codec.max_qdiff;
c.qmin := i_video_stream.codec.qmin;
c.qmax := i_video_stream.codec.qmax;
c.qcompress := i_video_stream.codec.qcompress;
c.extradata := i_video_stream.codec.extradata;
c.extradata_size := i_video_stream.codec.extradata_size;
avio_open(@o_fmt_ctx.pb, Output, AVIO_FLAG_WRITE);
(* yes! this is redundant *)
avformat_close_input(@i_fmt_ctx);
avformat_write_header(o_fmt_ctx, nil);
var last_pts: integer; last_pts := 0;
var last_dts: integer; last_dts := 0;
for i := 1 to High(Files) do begin
i_fmt_ctx := nil;
if avformat_open_input(@i_fmt_ctx, Files[i], nil, nil) <> 0 then
raise Exception.Create('Could not open input file');
if avformat_find_stream_info(i_fmt_ctx, nil) < 0 then
raise Exception.Create('Could not find stream info');
av_dump_format(i_fmt_ctx, 0, Files[i], 0);
(* we only use first video stream of each input file *)
i_video_stream := nil;
P := i_fmt_ctx.streams;
for S := 0 to i_fmt_ctx.nb_streams-1 do
begin
if (P^.codec.codec_type = AVMEDIA_TYPE_VIDEO) then
begin
i_video_stream := P^;
break;
end;
Inc(P);
end;
if i_video_stream = nil then
raise Exception.Create('Could not find video stream');
var pts, dts: int64;
pts := 0; dts := 0;
while true do begin
var i_pkt: TAVPacket;
av_init_packet( @i_pkt );
i_pkt.size := 0;
i_pkt.data := nil;
if av_read_frame(i_fmt_ctx, @i_pkt) < 0 then
break;
(*
pts and dts should increase monotonically
pts should be >= dts
*)
i_pkt.flags := i_pkt.flags or AV_PKT_FLAG_KEY;
pts := i_pkt.pts;
Inc(i_pkt.pts, last_pts);
dts := i_pkt.dts;
Inc(i_pkt.dts, last_dts);
i_pkt.stream_index := 0;
// Write
av_interleaved_write_frame(o_fmt_ctx, @i_pkt);
end;
Inc(last_dts, dts);
Inc(last_pts, pts);
avformat_close_input(@i_fmt_ctx)
end;
av_write_trailer(o_fmt_ctx);
avcodec_close(o_fmt_ctx.streams^.codec);
av_freep(&o_fmt_ctx.streams^.codec);
av_freep(&o_fmt_ctx.streams);
avio_close(o_fmt_ctx.pb);
av_free(o_fmt_ctx);
Which is a translation of Михаил Чеботарев's answer.
Even if the code worked, I see no handling of the AVMEDIA_TYPE_AUDIO stream, which means this answer is 1/2 of the problem, since It only combines the video stream.
Another approach I tried was using the UBitmaps2Video FFMPEG implementation, which is successfully able to merge the video files, but only the video stream, no audio.
I tried manually converting the audio stream with the Bass Audio Library. It was able to read the audio and write It in a single WAV file, which then I converted to MP3. Finally muxing the combined video file and the MP3 file with MuxStreams2. Unfortunately, the audio and video do not align properly. I was unable to pinpoint the issue.
Currently, the only functional option is using the precompiled FFMPEG Executables and using ShellExecute with the according parameters to combine the videos. This more exactly:
ffmpeg -f concat -safe 0 -i video-list.txt -c copy output.mp4
But I would still rather use the FFMPEG headers in Delphi to combine the videos that way, as that gives the option for Progress indicatiors, more control of the playback and the ability to pause the thread at any point.
So, why does my implementation to merge video files not work. And what is a good method to include the audio stream as well?