diff --git a/include/FFmpegReader.h b/include/FFmpegReader.h index 50886bc9..b6307b96 100644 --- a/include/FFmpegReader.h +++ b/include/FFmpegReader.h @@ -42,6 +42,35 @@ namespace openshot { int frame; int sample_start; + + int is_near(audio_packet_location location, int samples_per_frame, int amount) + { + int sample_diff = abs(location.sample_start - sample_start); + if (location.frame == frame && sample_diff >= 0 && sample_diff <= amount) + // close + return true; + + // new frame is after + if (location.frame > frame) + { + // remaining samples + new samples + int sample_diff = (samples_per_frame - sample_start) + location.sample_start; + if (sample_diff >= 0 && sample_diff <= amount) + return true; + } + + // new frame is before + if (location.frame < frame) + { + // remaining new samples + old samples + int sample_diff = (samples_per_frame - location.sample_start) + sample_start; + if (sample_diff >= 0 && sample_diff <= amount) + return true; + } + + // not close + return false; + } }; /** @@ -82,6 +111,13 @@ namespace openshot map processing_audio_frames; audio_packet_location previous_packet_location; + // DEBUG VARIABLES (FOR AUDIO ISSUES) + bool display_debug; + int prev_samples; + int prev_pts; + int pts_total; + int pts_counter; + bool is_seeking; int seeking_pts; int seeking_frame; diff --git a/src/FFmpegReader.cpp b/src/FFmpegReader.cpp index d1a847fc..fd0ca780 100644 --- a/src/FFmpegReader.cpp +++ b/src/FFmpegReader.cpp @@ -6,7 +6,8 @@ FFmpegReader::FFmpegReader(string path) throw(InvalidFile, NoStreamsFound, Inval : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), audio_pts_offset(99999), video_pts_offset(99999), path(path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), rescaler_position(0), num_of_rescalers(32), is_open(false), - seek_audio_frame_found(-1), seek_video_frame_found(-1), resampleCtx(NULL) { + seek_audio_frame_found(-1), seek_video_frame_found(-1), resampleCtx(NULL), prev_samples(0), prev_pts(0), + pts_total(0), pts_counter(0), display_debug(false) { // Init FileInfo struct (clear all values) InitFileInfo(); @@ -722,11 +723,10 @@ void FFmpegReader::ProcessAudioPacket(int requested_frame, int target_frame, int while (my_packet->size > 0) { // re-initialize buffer size (it gets changed in the avcodec_decode_audio2 method call) int buf_size = AVCODEC_MAX_AUDIO_FRAME_SIZE + FF_INPUT_BUFFER_PADDING_SIZE; - int used = avcodec_decode_audio3(aCodecCtx, audio_buf, &buf_size, my_packet); + int used = avcodec_decode_audio3(aCodecCtx, (short *)audio_buf, &buf_size, my_packet); if (used < 0) { // Throw exception - my_packet->size = 0; throw ErrorDecodingAudio("Error decoding audio samples", target_frame); } @@ -739,7 +739,23 @@ void FFmpegReader::ProcessAudioPacket(int requested_frame, int target_frame, int } // Estimate the # of samples and the end of this packet's location (to prevent GAPS for the next timestamp) - int pts_remaining_samples = round(packet_samples / info.channels) - 1; // Adjust for zero based array + int pts_remaining_samples = packet_samples / info.channels; // Adjust for zero based array + + // DEBUG (FOR AUDIO ISSUES) - Get the audio packet start time (in seconds) + int adjusted_pts = packet->pts + audio_pts_offset; + double audio_seconds = double(adjusted_pts) * info.audio_timebase.ToDouble(); + double sample_seconds = float(pts_total) / info.sample_rate; + + if (display_debug) + cout << pts_counter << ") PTS: " << adjusted_pts << ", Offset: " << audio_pts_offset << ", PTS Diff: " << (adjusted_pts - prev_pts) << ", Samples: " << pts_remaining_samples << ", Sample PTS ratio: " << (float(adjusted_pts - prev_pts) / pts_remaining_samples) << ", Sample Diff: " << (pts_remaining_samples - prev_samples) << ", Total: " << pts_total << ", PTS Seconds: " << audio_seconds << ", Sample Seconds: " << sample_seconds << ", Seconds Diff: " << (audio_seconds - sample_seconds) << ", raw samples: " << packet_samples << endl; + + // DEBUG (FOR AUDIO ISSUES) + prev_pts = adjusted_pts; + pts_total += pts_remaining_samples; + pts_counter++; + prev_samples = pts_remaining_samples; + + while (pts_remaining_samples) { // Get Samples per frame (for this frame number) @@ -784,8 +800,8 @@ void FFmpegReader::ProcessAudioPacket(int requested_frame, int target_frame, int // Audio needs to be converted // Create an audio resample context object (used to convert audio samples) - if (!resampleCtx) - resampleCtx = av_audio_resample_init( + //if (!resampleCtx) + ReSampleContext *resampleCtx = av_audio_resample_init( info.channels, info.channels, info.sample_rate, @@ -808,6 +824,13 @@ void FFmpegReader::ProcessAudioPacket(int requested_frame, int target_frame, int delete[] converted_audio; converted_audio = NULL; } + + // Close audio resample context + if (resampleCtx) + { + audio_resample_close(resampleCtx); + resampleCtx = NULL; + } } int starting_frame_number = -1; @@ -815,7 +838,7 @@ void FFmpegReader::ProcessAudioPacket(int requested_frame, int target_frame, int { // Array of floats (to hold samples for each channel) starting_frame_number = target_frame; - int channel_buffer_size = round(packet_samples / info.channels); + int channel_buffer_size = packet_samples / info.channels; float *channel_buffer = new float[channel_buffer_size]; // Init buffer array @@ -1109,19 +1132,28 @@ audio_packet_location FFmpegReader::GetAudioPTSLocation(int pts) // Calculate the sample # to start on int sample_start = round(double(samples_per_frame) * sample_start_percentage); + // Protect against broken (i.e. negative) timestamps + if (whole_frame < 1) + whole_frame = 1; + if (sample_start < 0) + sample_start = 0; + // Prepare final audio packet location audio_packet_location location = {whole_frame, sample_start}; + // Compare to previous audio packet (and fix small gaps due to varying PTS timestamps) - int diff_previous_packet = abs(location.sample_start - previous_packet_location.sample_start); - if (location.frame == previous_packet_location.frame && diff_previous_packet >= 0 && diff_previous_packet <= 100) + if (location.is_near(previous_packet_location, samples_per_frame, 1000)) { int orig_frame = location.frame; int orig_start = location.sample_start; // Update sample start, to prevent gaps in audio if (previous_packet_location.sample_start + 1 <= samples_per_frame) - location.sample_start = previous_packet_location.sample_start + 1; + { + location.sample_start = previous_packet_location.sample_start; + location.frame = previous_packet_location.frame; + } else { // set to next frame (since we exceeded the # of samples on a frame) @@ -1131,6 +1163,8 @@ audio_packet_location FFmpegReader::GetAudioPTSLocation(int pts) //cout << "GAP DETECTED!!! Changing frame " << orig_frame << ":" << orig_start << " to frame " << location.frame << ":" << location.sample_start << endl; } + //else + // cout << "NOT NEAR!!! frame " << location.frame << ":" << location.sample_start << " prev frame " << previous_packet_location.frame << ":" << previous_packet_location.sample_start << endl; // Set previous location previous_packet_location = location; diff --git a/src/FFmpegWriter.cpp b/src/FFmpegWriter.cpp index 60802c42..3de4408b 100644 --- a/src/FFmpegWriter.cpp +++ b/src/FFmpegWriter.cpp @@ -561,7 +561,7 @@ void FFmpegWriter::flush_encoders() cout << "Flushing AUDIO buffer!" << endl; // Increment PTS (in samples and scaled to the codec's timebase) - write_audio_count += av_rescale_q(audio_codec->frame_size / av_get_bytes_per_sample(audio_codec->sample_fmt), (AVRational){1, info.sample_rate}, audio_codec->time_base); + write_audio_count += av_rescale_q(audio_codec->frame_size / audio_codec->channels, (AVRational){1, info.sample_rate}, audio_codec->time_base); AVPacket pkt; av_init_packet(&pkt); @@ -579,7 +579,7 @@ void FFmpegWriter::flush_encoders() error_description = av_err2str(error_code); #endif - cout << "error encoding audio: " << error_code << ": " << error_description << endl; + cout << "error encoding audio (flush): " << error_code << ": " << error_description << endl; //throw ErrorEncodingAudio("Error while flushing audio frame", -1); } if (!got_packet) { @@ -604,7 +604,7 @@ void FFmpegWriter::flush_encoders() pkt.flags |= AV_PKT_FLAG_KEY; // Write packet - error_code = av_write_frame(oc, &pkt); + error_code = av_interleaved_write_frame(oc, &pkt); if (error_code != 0) { string error_description = "Unknown"; @@ -1022,11 +1022,11 @@ void FFmpegWriter::write_audio_packets(bool final) break; // Increment PTS (in samples and scaled to the codec's timebase) - write_audio_count += av_rescale_q(audio_input_position / av_get_bytes_per_sample(audio_codec->sample_fmt), (AVRational){1, info.sample_rate}, audio_codec->time_base); + write_audio_count += av_rescale_q(audio_input_position / audio_codec->channels, (AVRational){1, info.sample_rate}, audio_codec->time_base); // Create AVFrame (and fill it with samples) AVFrame *frame_final = avcodec_alloc_frame(); - frame_final->nb_samples = audio_input_position / av_get_bytes_per_sample(audio_codec->sample_fmt); + frame_final->nb_samples = audio_input_frame_size / audio_codec->channels; //av_get_bytes_per_sample(audio_codec->sample_fmt); frame_final->pts = write_audio_count; // Set the AVFrame's PTS avcodec_fill_audio_frame(frame_final, audio_codec->channels, audio_codec->sample_fmt, (uint8_t *) samples, audio_input_position * av_get_bytes_per_sample(audio_codec->sample_fmt), 1); @@ -1079,7 +1079,14 @@ void FFmpegWriter::write_audio_packets(bool final) } if (error_code < 0) - cout << "Error encoding audio: " << error_code << endl; + { + string error_description = "Unknown"; + + #ifdef av_err2str + error_description = av_err2str(error_code); + #endif + cout << "Error encoding audio: " << error_code << ": " << error_description << endl; + } // deallocate AVFrame //av_free(frame_final->data[0]); diff --git a/src/Frame.cpp b/src/Frame.cpp index aa6fef6f..bc8a9b14 100644 --- a/src/Frame.cpp +++ b/src/Frame.cpp @@ -528,6 +528,11 @@ void Frame::AddImage(tr1::shared_ptr new_image, float alpha) // Add audio samples to a specific channel void Frame::AddAudio(bool replaceSamples, int destChannel, int destStartSample, const float* source, int numSamples, float gainToApplyToSource = 1.0f) { + + // DEBUG CODE (FOR AUDIO ISSUES) + //cout << "AddAudio: Frame: " << number << ", replaceSamples: " << replaceSamples << ", destChannel: " << destChannel << ", destStartSample: " << destStartSample << ", numSamples: " << numSamples << ", gain: " << gainToApplyToSource << ", end: " << (destStartSample + numSamples) << endl; + //cout << " " << source[0] << "," << source[1] << "," << source[2] << " ... " << "," << source[numSamples-2] << "," << source[numSamples-1] << "," << source[numSamples] << "," << source[numSamples+1] << endl; + // Extend audio buffer (if needed) if (destStartSample + numSamples > audio->getNumSamples()) audio->setSize(audio->getNumChannels(), destStartSample + numSamples, true, true, false); diff --git a/src/Main.cpp b/src/Main.cpp index ad805f5e..3cb5e0be 100644 --- a/src/Main.cpp +++ b/src/Main.cpp @@ -60,11 +60,12 @@ int main() // Add some clips //Clip c1(new FFmpegReader("/home/jonathan/Apps/videcho_site/media/user_files/videos/bd0bf442-3221-11e2-8bf6-001fd00ee3aa.webm")); - Clip c1(new FFmpegReader("/home/jonathan/Videos/Movie Music/02 - Shattered [Turn The Car Around] (Album Version).mp3")); - //Clip c1(new FFmpegReader("/home/jonathan/sintel.webm")); + //Clip c1(new FFmpegReader("/home/jonathan/Videos/Movie Music/02 - Shattered [Turn The Car Around] (Album Version).mp3")); + FFmpegReader r1("/home/jonathan/Desktop/sintel.webm"); + Clip c1(new FFmpegReader("/home/jonathan/Videos/big-buck-bunny_trailer.webm")); Clip c2(new ImageReader("/home/jonathan/Desktop/Logo.png")); - Clip c3(new FFmpegReader("/home/jonathan/Videos/Movie Music/01 Whip It.mp3")); - //Clip c3(new FFmpegReader("/home/jonathan/Desktop/IncognitoCory_-_April_Song.mp3")); + Clip c3(new FFmpegReader("/home/jonathan/Desktop/sintel.webm")); + //Clip c3(new FFmpegReader("/home/jonathan/Videos/Movie Music/01 Whip It.mp3")); c1.Position(0.0); c1.gravity = GRAVITY_CENTER; c1.scale = SCALE_FIT; @@ -183,15 +184,16 @@ int main() //t.AddClip(&c2); //t.AddClip(&c3); + r1.Open(); // Create a writer FFmpegWriter w("/home/jonathan/output.webm"); - w.DisplayInfo(); + r1.DisplayInfo(); // Set options //w.SetAudioOptions(true, "libmp3lame", 44100, 2, 128000, false); w.SetAudioOptions(true, "libvorbis", 44100, 2, 128000); - w.SetVideoOptions(true, "libvpx", Fraction(24, 1), 624, 348, Fraction(1,1), false, false, 2000000); + w.SetVideoOptions(true, "libvpx", Fraction(24,1), 624, 348, Fraction(1,1), false, false, 2000000); // Prepare Streams w.PrepareStreams(); @@ -202,12 +204,12 @@ int main() // Output stream info w.OutputStreamInfo(); - for (int frame = 1; frame <= 300; frame++) + for (int frame = 200; frame <= 400; frame++) { - tr1::shared_ptr f = t.GetFrame(frame); + tr1::shared_ptr f = r1.GetFrame(frame); if (f) { - //if (frame >= 20) + //if (frame >= 13) // f->DisplayWaveform(); // Write frame