Many improvements to FFmpegReader, including

Better bounds checking, and support at the end of the video file Improved debug output, which helped resolve many issues Implemented the newest audio decoding API (old one was depreciated) Now support packed and planar audio data Far superier audio resampling support
2026-03-02 08:53:52 -08:00 · 2015-02-05 00:00:52 -06:00
parent de1a30b6cb
commit a6087dab3a
6 changed files with 146 additions and 89 deletions
--- a/src/FFmpegReader.cpp
+++ b/src/FFmpegReader.cpp
@@ -35,8 +35,8 @@ using namespace openshot;
 FFmpegReader::FFmpegReader(string path) throw(InvalidFile, NoStreamsFound, InvalidCodec)
 	: last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0),
 	  audio_pts_offset(99999), video_pts_offset(99999), path(path), is_video_seek(true), check_interlace(false),
-	  check_fps(false), enable_seek(true), rescaler_position(0), num_of_rescalers(32), is_open(false),
-	  seek_audio_frame_found(0), seek_video_frame_found(0), resampleCtx(NULL), prev_samples(0), prev_pts(0),
+	  check_fps(false), enable_seek(true), rescaler_position(0), num_of_rescalers(OPEN_MP_NUM_PROCESSORS), is_open(false),
+	  seek_audio_frame_found(0), seek_video_frame_found(0), prev_samples(0), prev_pts(0),
 	  pts_total(0), pts_counter(0), is_duration_known(false), largest_frame_processed(0) {

 	// Initialize FFMpeg, and register all formats and codecs
@@ -230,10 +230,6 @@ void FFmpegReader::Close()
 			avcodec_close(aCodecCtx);
 		}

-		// Close audio resample context
-		if (resampleCtx)
-			audio_resample_close(resampleCtx);
-
 		// Clear final cache
 		final_cache.Clear();
 		working_cache.Clear();
@@ -262,6 +258,9 @@ void FFmpegReader::UpdateAudioInfo()
 	info.file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
 	info.acodec = aCodecCtx->codec->name;
 	info.channels = aCodecCtx->channels;
+	if (aCodecCtx->channel_layout == 0)
+		aCodecCtx->channel_layout = av_get_default_channel_layout( aCodecCtx->channels );;
+	info.channel_layout = (ChannelLayout) aCodecCtx->channel_layout;
 	info.sample_rate = aCodecCtx->sample_rate;
 	info.audio_bit_rate = aCodecCtx->bit_rate;

@@ -305,8 +304,17 @@ void FFmpegReader::UpdateVideoInfo()
 	info.video_bit_rate = pFormatCtx->bit_rate;
 	if (!check_fps)
 	{
+		// set frames per second (fps)
 		info.fps.num = pStream->r_frame_rate.num;
 		info.fps.den = pStream->r_frame_rate.den;
+
+		// Check for blank fps
+		if (info.fps.den == 0)
+		{
+			// use average fps
+			info.fps.num = pStream->avg_frame_rate.num;
+			info.fps.den = pStream->avg_frame_rate.den;
+		}
 	}
 	if (pStream->sample_aspect_ratio.num != 0)
 	{
@@ -399,11 +407,13 @@ tr1::shared_ptr<Frame> FFmpegReader::GetFrame(int requested_frame) throw(OutOfBo
 		throw InvalidFile("Could not detect the duration of the video or audio stream.", path);

 	// Debug output
+	#pragma omp critical (debug_output)
 	AppendDebugMethod("FFmpegReader::GetFrame", "requested_frame", requested_frame, "last_frame", last_frame, "", -1, "", -1, "", -1, "", -1);

 	// Check the cache for this frame
 	if (final_cache.Exists(requested_frame)) {
 		// Debug output
+		#pragma omp critical (debug_output)
 		AppendDebugMethod("FFmpegReader::GetFrame", "returned cached frame", requested_frame, "", -1, "", -1, "", -1, "", -1, "", -1);

 		// Return the cached frame
@@ -467,6 +477,7 @@ tr1::shared_ptr<Frame> FFmpegReader::ReadStream(int requested_frame)
 	omp_set_nested(true);

 	// Debug output
+	#pragma omp critical (debug_output)
 	AppendDebugMethod("FFmpegReader::ReadStream", "requested_frame", requested_frame, "OPEN_MP_NUM_PROCESSORS", OPEN_MP_NUM_PROCESSORS, "", -1, "", -1, "", -1, "", -1);

 	#pragma omp parallel
@@ -481,21 +492,20 @@ tr1::shared_ptr<Frame> FFmpegReader::ReadStream(int requested_frame)

 				// Wait if too many frames are being processed
 				while (processing_video_frames.size() + processing_audio_frames.size() >= minimum_packets)
-					usleep(20 * 1000);
+					usleep(50);

 				// Get the next packet (if any)
 				if (packet_error < 0)
 				{
 					// Break loop when no more packets found
 					end_of_stream = true;
-
-					// Wait for all frames to be processed
-					while (processing_video_frames.size() + processing_audio_frames.size() == 0)
-						usleep(20 * 1000);
-
 					break;
 				}

+				// Debug output
+				#pragma omp critical (debug_output)
+				AppendDebugMethod("FFmpegReader::ReadStream (GetNextPacket)", "requested_frame", requested_frame, "processing_video_frames.size()", processing_video_frames.size(), "processing_audio_frames.size()", processing_audio_frames.size(), "minimum_packets", minimum_packets, "packets_processed", packets_processed, "", -1);
+
 				// Video packet
 				if (packet->stream_index == videoStream)
 				{
@@ -581,7 +591,7 @@ tr1::shared_ptr<Frame> FFmpegReader::ReadStream(int requested_frame)

 	// Debug output
 	#pragma omp critical (debug_output)
-	AppendDebugMethod("FFmpegReader::ReadStream (Completed)", "packets_processed", packets_processed, "end_of_stream", end_of_stream, "largest_frame_processed", largest_frame_processed, "", -1, "", -1, "", -1);
+	AppendDebugMethod("FFmpegReader::ReadStream (Completed)", "packets_processed", packets_processed, "end_of_stream", end_of_stream, "largest_frame_processed", largest_frame_processed, "Working Cache Count", working_cache.Count(), "", -1, "", -1);

 	// End of stream?
 	if (end_of_stream) {
@@ -597,9 +607,20 @@ tr1::shared_ptr<Frame> FFmpegReader::ReadStream(int requested_frame)
 	if (final_cache.Exists(requested_frame))
 		// Return prepared frame
 		return final_cache.GetFrame(requested_frame);
-	else
-		// Frame not found (possibily end of stream)
-		throw OutOfBoundsFrame("Frame not found in video (invalid frame requested)", requested_frame, info.video_length);
+	else {
+
+		// Check if largest frame is still cached
+		if (final_cache.Exists(largest_frame_processed)) {
+			// return the largest processed frame (assuming it was the last in the video file)
+			return final_cache.GetFrame(largest_frame_processed);
+		}
+		else {
+			// The largest processed frame is no longer in cache, return a blank frame
+			tr1::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
+			f->AddColor(info.width, info.height, "#000");
+			return f;
+		}
+	}

 }

@@ -692,7 +713,7 @@ bool FFmpegReader::CheckSeek(bool is_video)
 		{
 			// SEEKED TOO FAR
 			#pragma omp critical (debug_output)
-			AppendDebugMethod("FFmpegReader::CheckSeek (Too far, seek again)", "is_video_seek", is_video_seek, "current_pts", packet->pts, "seeking_pts", seeking_pts, "seeking_frame", seeking_frame, "seek_video_frame_found", seek_video_frame_found, "seek_audio_frame_found", seek_audio_frame_found);
+			AppendDebugMethod("FFmpegReader::CheckSeek (Too far, seek again)", "is_video_seek", is_video_seek, "max_seeked_frame", max_seeked_frame, "seeking_frame", seeking_frame, "seeking_pts", seeking_pts, "seek_video_frame_found", seek_video_frame_found, "seek_audio_frame_found", seek_audio_frame_found);

 			// Seek again... to the nearest Keyframe
 			Seek(seeking_frame - 10);
@@ -779,7 +800,7 @@ void FFmpegReader::ProcessVideoPacket(int requested_frame)

 		// Determine required buffer size and allocate buffer
 		numBytes = avpicture_get_size(PIX_FMT_RGBA, width, height);
-		buffer = (uint8_t *) av_malloc(numBytes * sizeof(uint8_t));
+		buffer = (uint8_t *) av_malloc(numBytes * sizeof(uint8_t) * 2);

 		// Assign appropriate parts of buffer to image planes in pFrameRGB
 		// Note that pFrameRGB is an AVFrame, but AVFrame is a superset
@@ -863,34 +884,38 @@ void FFmpegReader::ProcessAudioPacket(int requested_frame, int target_frame, int
 	#pragma omp critical (debug_output)
 	AppendDebugMethod("FFmpegReader::ProcessAudioPacket (Before)", "requested_frame", requested_frame, "target_frame", target_frame, "starting_sample", starting_sample, "", -1, "", -1, "", -1);

+	// Init an AVFrame to hold the decoded audio samples
+	int frame_finished = 0;
+	AVFrame *audio_frame = avcodec_alloc_frame();
+	avcodec_get_frame_defaults(audio_frame);
+
 	// Allocate audio buffer
 	int16_t *audio_buf = new int16_t[AVCODEC_MAX_AUDIO_FRAME_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
-
 	int packet_samples = 0;
-	uint8_t *original_packet_data = my_packet->data;
-	int original_packet_size = my_packet->size;
+	int data_size = 0;

-	while (my_packet->size > 0) {
-		// re-initialize buffer size (it gets changed in the avcodec_decode_audio2 method call)
-		int buf_size = AVCODEC_MAX_AUDIO_FRAME_SIZE + FF_INPUT_BUFFER_PADDING_SIZE;
-		int used = avcodec_decode_audio3(aCodecCtx, (short *)audio_buf, &buf_size, my_packet);
+	// re-initialize buffer size (it gets changed in the avcodec_decode_audio2 method call)
+	int buf_size = AVCODEC_MAX_AUDIO_FRAME_SIZE + FF_INPUT_BUFFER_PADDING_SIZE;
+	int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, my_packet);

-		if (used < 0) {
-			// Throw exception
-			throw ErrorDecodingAudio("Error decoding audio samples", target_frame);
-		}
+	if (used <= 0) {
+		// Throw exception
+		throw ErrorDecodingAudio("Error decoding audio samples", target_frame);
+
+	} else if (frame_finished) {
+
+		// determine how many samples were decoded
+		int planar = av_sample_fmt_is_planar(aCodecCtx->sample_fmt);
+		int plane_size = -1;
+		data_size = av_samples_get_buffer_size(&plane_size,
+				aCodecCtx->channels,
+				audio_frame->nb_samples,
+				aCodecCtx->sample_fmt, 1);

 		// Calculate total number of samples
-		packet_samples += (buf_size / av_get_bytes_per_sample(aCodecCtx->sample_fmt));
-
-		// process samples...
-		my_packet->data += used;
-		my_packet->size -= used;
+		packet_samples = audio_frame->nb_samples * aCodecCtx->channels;
 	}

-	// Restore packet size and data (to avoid crash in av_free_packet)
-	my_packet->data = original_packet_data;
-	my_packet->size = original_packet_size;

 	// Estimate the # of samples and the end of this packet's location (to prevent GAPS for the next timestamp)
 	int pts_remaining_samples = packet_samples / info.channels; // Adjust for zero based array
@@ -941,53 +966,59 @@ void FFmpegReader::ProcessAudioPacket(int requested_frame, int target_frame, int
 	#pragma omp critical (packet_cache)
 	RemoveAVPacket(my_packet);

-	#pragma omp task firstprivate(requested_frame, target_frame, my_cache, starting_sample, audio_buf)
+	// TODO: Disable OpenMP on audio packet processing. It is not currently possible to reassemble the packets
+	// in order without creating small gaps and/or overlapping sample values.
+	#pragma xxx omp task firstprivate(requested_frame, target_frame, my_cache, starting_sample, audio_buf)
 	{

-		// create a new array (to hold the re-sampled audio)
-		int16_t *converted_audio = NULL;
+		#pragma omp critical (debug_output)
+		AppendDebugMethod("FFmpegReader::ProcessAudioPacket (ReSample)", "packet_samples", packet_samples, "info.channels", info.channels, "info.sample_rate", info.sample_rate, "aCodecCtx->sample_fmt", aCodecCtx->sample_fmt, "AV_SAMPLE_FMT_S16", AV_SAMPLE_FMT_S16, "", -1);

-		// Re-sample audio samples (if needed)
-		if(aCodecCtx->sample_fmt != AV_SAMPLE_FMT_S16) {
-			// Init resample buffer
-			converted_audio = new int16_t[AVCODEC_MAX_AUDIO_FRAME_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];

-			// Audio needs to be converted
-			// Create an audio resample context object (used to convert audio samples)
-			//if (!resampleCtx)
-			ReSampleContext *resampleCtx = av_audio_resample_init(
-						info.channels,
-						info.channels,
-						info.sample_rate,
-						info.sample_rate,
-						AV_SAMPLE_FMT_S16,
-						aCodecCtx->sample_fmt,
-						0, 0, 0, 0.0f);
+		// Create output frame
+		AVFrame *audio_converted = avcodec_alloc_frame();
+		avcodec_get_frame_defaults(audio_converted);
+		audio_converted->nb_samples = audio_frame->nb_samples;
+		av_samples_alloc(audio_converted->data, audio_converted->linesize, info.channels, audio_frame->nb_samples, AV_SAMPLE_FMT_S16, 1);

-			if (!resampleCtx)
-				throw InvalidCodec("Failed to convert audio samples to 16 bit (SAMPLE_FMT_S16).", path);
-			else
-			{
-				// Re-sample audio
-				audio_resample(resampleCtx, (short *)converted_audio, (short *)audio_buf, packet_samples);
+		// setup resample context
+		AVAudioResampleContext *avr = avresample_alloc_context();
+		av_opt_set_int(avr,  "in_channel_layout", aCodecCtx->channel_layout, 0);
+		av_opt_set_int(avr, "out_channel_layout", aCodecCtx->channel_layout, 0);
+		av_opt_set_int(avr,  "in_sample_fmt",     aCodecCtx->sample_fmt,     0);
+		av_opt_set_int(avr, "out_sample_fmt",     AV_SAMPLE_FMT_S16,     0);
+		av_opt_set_int(avr,  "in_sample_rate",    info.sample_rate,    0);
+		av_opt_set_int(avr, "out_sample_rate",    info.sample_rate,    0);
+		av_opt_set_int(avr,  "in_channels",       info.channels,    0);
+		av_opt_set_int(avr, "out_channels",       info.channels,    0);
+		int r = avresample_open(avr);
+		int nb_samples = 0;

-				// Copy audio samples over original samples
-				memcpy(audio_buf, converted_audio, packet_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16));
+		// Convert audio samples
+		nb_samples = avresample_convert(avr, 	// audio resample context
+				audio_converted->data, 		// output data pointers
+				audio_converted->linesize[0], 	// output plane size, in bytes. (0 if unknown)
+				audio_converted->nb_samples,	// maximum number of samples that the output buffer can hold
+				audio_frame->data,				// input data pointers
+				audio_frame->linesize[0],		// input plane size, in bytes (0 if unknown)
+				audio_frame->nb_samples);		// number of input samples to convert
+
+		// Copy audio samples over original samples
+		memcpy(audio_buf, audio_converted->data[0], audio_converted->nb_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) * info.channels);
+
+		// Deallocate resample buffer
+		avresample_close(avr);
+		avresample_free(&avr);
+		avr = NULL;
+
+		// Free frames
+		avcodec_free_frame(&audio_frame);
+		avcodec_free_frame(&audio_converted);

-				// Deallocate resample buffer
-				delete[] converted_audio;
-				converted_audio = NULL;
-			}

-			// Close audio resample context
-			if (resampleCtx)
-			{
-				audio_resample_close(resampleCtx);
-				resampleCtx = NULL;
-			}
-		}

 		int starting_frame_number = -1;
+		bool partial_frame = true;
 		for (int channel_filter = 0; channel_filter < info.channels; channel_filter++)
 		{
 			// Array of floats (to hold samples for each channel)
@@ -1005,7 +1036,6 @@ void FFmpegReader::ProcessAudioPacket(int requested_frame, int target_frame, int
 			int position = 0;
 			for (int sample = 0; sample < packet_samples; sample++)
 			{
-
 				// Only add samples for current channel
 				if (channel_filter == channel)
 				{
@@ -1052,6 +1082,16 @@ void FFmpegReader::ProcessAudioPacket(int requested_frame, int target_frame, int
 				// some louder samples from maxing out at 1.0 (not sure why this happens)
 				f->AddAudio(true, channel_filter, start, iterate_channel_buffer, samples, 0.98f);

+				// Determine if this frame was "partially" filled in
+				if (samples_per_frame == start + samples)
+					partial_frame = false;
+				else
+					partial_frame = true;
+
+				// Debug output
+				#pragma omp critical (debug_output)
+				AppendDebugMethod("FFmpegReader::ProcessAudioPacket (f->AddAudio)", "frame", starting_frame_number, "start", start, "samples", samples, "channel", channel_filter, "partial_frame", partial_frame, "samples_per_frame", samples_per_frame);
+
 				#pragma omp critical (openshot_cache)
 					// Add or update cache
 					my_cache->Add(f->number, f);
@@ -1085,6 +1125,9 @@ void FFmpegReader::ProcessAudioPacket(int requested_frame, int target_frame, int
 		{
 			// Update all frames as completed
 			for (int f = target_frame; f < starting_frame_number; f++) {
+				if (f == (starting_frame_number - 1) && partial_frame)
+					// ignore partial frames (always the last frame processed)
+					break;
 				processing_audio_frames.erase(f);
 				processed_audio_frames[f] = f;
 			}
@@ -1117,6 +1160,8 @@ void FFmpegReader::Seek(int requested_frame) throw(TooManySeeks)
 	working_cache.Clear();

 	// Clear processed lists
+	processing_audio_frames.clear();
+	processing_video_frames.clear();
 	processed_video_frames.clear();
 	processed_audio_frames.clear();

@@ -1195,12 +1240,11 @@ void FFmpegReader::Seek(int requested_frame) throw(TooManySeeks)

 			// init seek flags
 			is_seeking = true;
-			if (seeking_pts != 0) {
-				seeking_pts = seek_target;
-				seeking_frame = requested_frame;
-				seek_audio_frame_found = 0; // used to detect which frames to throw away after a seek
-				seek_video_frame_found = 0; // used to detect which frames to throw away after a seek
-			}
+			seeking_pts = seek_target;
+			seeking_frame = requested_frame;
+			seek_audio_frame_found = 0; // used to detect which frames to throw away after a seek
+			seek_video_frame_found = 0; // used to detect which frames to throw away after a seek
+
 		}
 		else
 		{
@@ -1319,8 +1363,7 @@ AudioLocation FFmpegReader::GetAudioPTSLocation(int pts)
 	AudioLocation location = {whole_frame, sample_start};

 	// Compare to previous audio packet (and fix small gaps due to varying PTS timestamps)
-	if (previous_packet_location.frame != -1 && location.is_near(previous_packet_location, samples_per_frame, samples_per_frame) &&
-		(location.frame != previous_packet_location.frame && location.sample_start != previous_packet_location.sample_start))
+	if (previous_packet_location.frame != -1 && location.is_near(previous_packet_location, samples_per_frame, samples_per_frame))
 	{
 		int orig_frame = location.frame;
 		int orig_start = location.sample_start;
@@ -1366,7 +1409,9 @@ tr1::shared_ptr<Frame> FFmpegReader::CreateFrame(int requested_frame)
 	{
 		// Create a new frame on the working cache
 		tr1::shared_ptr<Frame> f(new Frame(requested_frame, info.width, info.height, "#000000", Frame::GetSamplesPerFrame(requested_frame, info.fps, info.sample_rate), info.channels));
-		f->SetPixelRatio(info.pixel_ratio.num, info.pixel_ratio.den);
+		f->SetPixelRatio(info.pixel_ratio.num, info.pixel_ratio.den); // update pixel ratio
+		f->ChannelsLayout(info.channel_layout); // update audio channel layout from the parent reader
+		f->SampleRate(info.sample_rate); // update the frame's sample rate of the parent reader

 		working_cache.Add(requested_frame, f);

@@ -1412,6 +1457,10 @@ void FFmpegReader::CheckWorkingFrames(bool end_of_stream)
 		bool is_audio_ready = processed_audio_frames.count(f->number);
 		bool is_seek_trash = IsPartialFrame(f->number);

+		// Adjust for available streams
+		if (!info.has_video) is_video_ready = true;
+		if (!info.has_audio) is_audio_ready = true;
+
 		// Debug output
 		#pragma omp critical (debug_output)
 		AppendDebugMethod("FFmpegReader::CheckWorkingFrames", "frame_number", f->number, "is_video_ready", is_video_ready, "is_audio_ready", is_audio_ready, "processed_video_frames.count(f->number)", processed_video_frames.count(f->number), "processed_audio_frames.count(f->number)", processed_audio_frames.count(f->number), "", -1);
@@ -1556,13 +1605,11 @@ void FFmpegReader::CheckFPS()
 		{
 			// Update FPS for this reader instance
 			info.fps = Fraction(avg_fps, 1);
-			cout << "UPDATED FPS FROM " << fps << " to " << info.fps.ToDouble() << " (Average FPS)" << endl;
 		}
 		else
 		{
 			// Update FPS for this reader instance (to 1/2 the original framerate)
 			info.fps = Fraction(info.fps.num / 2, info.fps.den);
-			cout << "UPDATED FPS FROM " << fps << " to " << info.fps.ToDouble() << " (1/2 FPS)" << endl;
 		}
 	}