diff --git a/include/FFmpegReader.h b/include/FFmpegReader.h index e2c4863a..fcc995ae 100644 --- a/include/FFmpegReader.h +++ b/include/FFmpegReader.h @@ -97,6 +97,9 @@ namespace openshot AVFormatContext *pFormatCtx; int i, videoStream, audioStream; AVCodecContext *pCodecCtx, *aCodecCtx; + #if (LIBAVFORMAT_VERSION_MAJOR >= 57) + AVBufferRef *hw_device_ctx = NULL; //PM + #endif AVStream *pStream, *aStream; AVPacket *packet; AVFrame *pFrame; diff --git a/include/FFmpegUtilities.h b/include/FFmpegUtilities.h index 346da541..0cc08f52 100644 --- a/include/FFmpegUtilities.h +++ b/include/FFmpegUtilities.h @@ -42,6 +42,9 @@ extern "C" { #include #include + #if (LIBAVFORMAT_VERSION_MAJOR >= 57) + #include //PM + #endif #include // Change this to the first version swrescale works #if (LIBAVFORMAT_VERSION_MAJOR >= 57) diff --git a/src/FFmpegReader.cpp b/src/FFmpegReader.cpp index adf957f1..86b7cb0d 100644 --- a/src/FFmpegReader.cpp +++ b/src/FFmpegReader.cpp @@ -32,6 +32,9 @@ using namespace openshot; +int hw_de_on = 1; // Is set in UI +int hw_de_supported = 0; // Is set by FFmpegReader + FFmpegReader::FFmpegReader(string path) : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), audio_pts_offset(99999), video_pts_offset(99999), path(path), is_video_seek(true), check_interlace(false), @@ -103,6 +106,45 @@ bool AudioLocation::is_near(AudioLocation location, int samples_per_frame, int64 return false; } +#if IS_FFMPEG_3_2 +#if defined(__linux__) +#pragma message "You are compiling with experimental hardware decode" + +static enum AVPixelFormat get_vaapi_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts) +{ + const enum AVPixelFormat *p; + + for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) { + if (*p == AV_PIX_FMT_VAAPI) + return *p; + } + ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (Unable to decode this file using VA-API.)", "", -1, "", -1, "", -1, "", -1, "", -1, "", -1); + hw_de_supported = 0; + return AV_PIX_FMT_NONE; +} + +int is_hardware_decode_supported(int codecid) +{ + int ret; + switch (codecid) { + case AV_CODEC_ID_H264: + case AV_CODEC_ID_MPEG2VIDEO: + case AV_CODEC_ID_VC1: + case AV_CODEC_ID_WMV1: + case AV_CODEC_ID_WMV2: + case AV_CODEC_ID_WMV3: + ret = 1; + break; + default : + ret = 0; + break; + } + return ret; +} + +#endif +#endif + void FFmpegReader::Open() { // Open reader if not already open @@ -111,6 +153,14 @@ void FFmpegReader::Open() // Initialize format context pFormatCtx = NULL; + char * val = getenv( "OS2_DECODE_HW" ); + if (val == NULL) { + hw_de_on = 0; + } + else{ + hw_de_on = (val[0] == '1')? 1 : 0; + } + // Open video file if (avformat_open_input(&pFormatCtx, path.c_str(), NULL, NULL) != 0) throw InvalidFile("File could not be opened.", path); @@ -151,7 +201,11 @@ void FFmpegReader::Open() // Get codec and codec context from stream AVCodec *pCodec = avcodec_find_decoder(codecId); pCodecCtx = AV_GET_CODEC_CONTEXT(pStream, pCodec); - + #if IS_FFMPEG_3_2 + #if defined(__linux__) + hw_de_supported = is_hardware_decode_supported(pCodecCtx->codec_id); + #endif + #endif // Set number of threads equal to number of processors (not to exceed 16) pCodecCtx->thread_count = min(OPEN_MP_NUM_PROCESSORS, 16); @@ -163,6 +217,23 @@ void FFmpegReader::Open() AVDictionary *opts = NULL; av_dict_set(&opts, "strict", "experimental", 0); + #if IS_FFMPEG_3_2 + #if defined(__linux__) + if (hw_de_on & hw_de_supported) { + // Open Hardware Acceleration + hw_device_ctx = NULL; + pCodecCtx->get_format = get_vaapi_format; + if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_VAAPI, NULL, NULL, 0) >= 0) { + if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) { + throw InvalidCodec("Hardware device reference create failed.", path); + } + } + else { + throw InvalidCodec("Hardware device create failed.", path); + } + } + #endif + #endif // Open video codec if (avcodec_open2(pCodecCtx, pCodec, &opts) < 0) throw InvalidCodec("A video codec was found, but could not be opened.", path); @@ -252,6 +323,16 @@ void FFmpegReader::Close() { avcodec_flush_buffers(pCodecCtx); AV_FREE_CONTEXT(pCodecCtx); + #if IS_FFMPEG_3_2 + #if defined(__linux__) + if (hw_de_on) { + if (hw_device_ctx) { + av_buffer_unref(&hw_device_ctx); + hw_device_ctx = NULL; + } + } + #endif + #endif } if (info.has_audio) { @@ -703,9 +784,13 @@ std::shared_ptr FFmpegReader::ReadStream(int64_t requested_frame) int FFmpegReader::GetNextPacket() { int found_packet = 0; - AVPacket *next_packet = new AVPacket(); + AVPacket *next_packet; + #pragma omp critical(getnextpacket) + { + next_packet = new AVPacket(); found_packet = av_read_frame(pFormatCtx, next_packet); + if (packet) { // Remove previous packet before getting next one RemoveAVPacket(packet); @@ -717,7 +802,7 @@ int FFmpegReader::GetNextPacket() // Update current packet pointer packet = next_packet; } - +} // Return if packet was found (or error number) return found_packet; } @@ -734,17 +819,51 @@ bool FFmpegReader::GetAVFrame() { #if IS_FFMPEG_3_2 frameFinished = 0; + ret = avcodec_send_packet(pCodecCtx, packet); + if (ret < 0 || ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (Packet not sent)", "", -1, "", -1, "", -1, "", -1, "", -1, "", -1); } else { + AVFrame *next_frame2; + #if defined(__linux__) + if (hw_de_on && hw_de_supported) { + next_frame2 = AV_ALLOCATE_FRAME(); + } + else + #endif + { + next_frame2 = next_frame; + } pFrame = new AVFrame(); while (ret >= 0) { - ret = avcodec_receive_frame(pCodecCtx, next_frame); + ret = avcodec_receive_frame(pCodecCtx, next_frame2); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { break; } + if (ret != 0) { + ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (invalid return frame received)", "", -1, "", -1, "", -1, "", -1, "", -1, "", -1); + } + #if defined(__linux__) + if (hw_de_on && hw_de_supported) { + int err; + if (next_frame2->format == AV_PIX_FMT_VAAPI) { + next_frame->format = AV_PIX_FMT_YUV420P; + if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) { + ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (Failed to transfer data to output frame)", "", -1, "", -1, "", -1, "", -1, "", -1, "", -1); + } + if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) { + ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (Failed to copy props to output frame)", "", -1, "", -1, "", -1, "", -1, "", -1, "", -1); + } + } + } + else + #endif + { // No hardware acceleration used -> no copy from GPU memory needed + next_frame = next_frame2; + } + //} // TODO also handle possible further frames // Use only the first frame like avcodec_decode_video2 if (frameFinished == 0 ) { @@ -759,6 +878,11 @@ bool FFmpegReader::GetAVFrame() } } } + #if defined(__linux__) + if (hw_de_on && hw_de_supported) { + AV_FREE_FRAME(&next_frame2); + } + #endif } #else avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet); diff --git a/src/FFmpegWriter.cpp b/src/FFmpegWriter.cpp index ed4fc3fb..a62e8287 100644 --- a/src/FFmpegWriter.cpp +++ b/src/FFmpegWriter.cpp @@ -32,6 +32,49 @@ using namespace openshot; +#if IS_FFMPEG_3_2 +int hw_en_on = 1; // Is set in UI +int hw_en_supported = 0; // Is set by FFmpegWriter +static AVBufferRef *hw_device_ctx = NULL; +AVFrame *hw_frame = NULL; + +static int set_hwframe_ctx(AVCodecContext *ctx, AVBufferRef *hw_device_ctx, int64_t width, int64_t height) +{ + AVBufferRef *hw_frames_ref; + AVHWFramesContext *frames_ctx = NULL; + int err = 0; + + if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) { + fprintf(stderr, "Failed to create VAAPI frame context.\n"); + return -1; + } + frames_ctx = (AVHWFramesContext *)(hw_frames_ref->data); + frames_ctx->format = AV_PIX_FMT_VAAPI; + frames_ctx->sw_format = AV_PIX_FMT_NV12; + frames_ctx->width = width; + frames_ctx->height = height; + frames_ctx->initial_pool_size = 20; + if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) { + fprintf(stderr, "Failed to initialize VAAPI frame context." + "Error code: %s\n",av_err2str(err)); + av_buffer_unref(&hw_frames_ref); + return err; + } + ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref); + if (!ctx->hw_frames_ctx) + err = AVERROR(ENOMEM); + + av_buffer_unref(&hw_frames_ref); + return err; +} +#endif + +#if IS_FFMPEG_3_2 +#if defined(__linux__) +#pragma message "You are compiling with experimental hardware encode" +#endif +#endif + FFmpegWriter::FFmpegWriter(string path) : path(path), fmt(NULL), oc(NULL), audio_st(NULL), video_st(NULL), audio_pts(0), video_pts(0), samples(NULL), audio_outbuf(NULL), audio_outbuf_size(0), audio_input_frame_size(0), audio_input_position(0), @@ -116,7 +159,26 @@ void FFmpegWriter::SetVideoOptions(bool has_video, string codec, Fraction fps, i // Set the video options if (codec.length() > 0) { - AVCodec *new_codec = avcodec_find_encoder_by_name(codec.c_str()); + AVCodec *new_codec; + // Check if the codec selected is a hardware accelerated codec + #if IS_FFMPEG_3_2 + #if defined(__linux__) + if ( (strcmp(codec.c_str(),"h264_vaapi") == 0)) { + new_codec = avcodec_find_encoder_by_name(codec.c_str()); + hw_en_on = 1; + hw_en_supported = 1; + } + else { + new_codec = avcodec_find_encoder_by_name(codec.c_str()); + hw_en_on = 0; + hw_en_supported = 0; + } + #else // is FFmpeg 3 but not linux + new_codec = avcodec_find_encoder_by_name(codec.c_str()); + #endif //__linux__ + #else // not ffmpeg 3 + new_codec = avcodec_find_encoder_by_name(codec.c_str()); + #endif //IS_FFMPEG_3_2 if (new_codec == NULL) throw InvalidCodec("A valid video codec could not be found for this file.", path); else { @@ -506,6 +568,7 @@ void FFmpegWriter::write_queued_frames() is_writing = false; } // end omp single + } // end omp parallel // Raise exception from main thread @@ -735,6 +798,16 @@ void FFmpegWriter::close_video(AVFormatContext *oc, AVStream *st) { AV_FREE_CONTEXT(video_codec); video_codec = NULL; + #if IS_FFMPEG_3_2 + #if defined(__linux__) + if (hw_en_on && hw_en_supported) { + if (hw_device_ctx) { + av_buffer_unref(&hw_device_ctx); + hw_device_ctx = NULL; + } + } + #endif + #endif } // Close the audio codec @@ -1102,6 +1175,23 @@ void FFmpegWriter::open_video(AVFormatContext *oc, AVStream *st) // Set number of threads equal to number of processors (not to exceed 16) video_codec->thread_count = min(OPEN_MP_NUM_PROCESSORS, 16); + #if IS_FFMPEG_3_2 + #if defined(__linux__) + if (hw_en_on && hw_en_supported) { + // Use the hw device given in the environment variable HW_DEVICE_SET or the default if not set + char *dev_hw = getenv( "HW_DEVICE_SET" ); + // Check if it is there and writable + if( dev_hw != NULL && access( dev_hw, W_OK ) == -1 ) { + dev_hw = NULL; // use default + } + if (av_hwdevice_ctx_create(&hw_device_ctx, AV_HWDEVICE_TYPE_VAAPI, + dev_hw, NULL, 0) < 0) { + cerr << "FFmpegWriter::open_video : Codec name: " << info.vcodec.c_str() << " ERROR creating\n"; + throw InvalidCodec("Could not create hwdevice", path); + } + } + #endif + #endif /* find the video encoder */ codec = avcodec_find_encoder_by_name(info.vcodec.c_str()); if (!codec) @@ -1117,6 +1207,24 @@ void FFmpegWriter::open_video(AVFormatContext *oc, AVStream *st) AVDictionary *opts = NULL; av_dict_set(&opts, "strict", "experimental", 0); + #if IS_FFMPEG_3_2 + #if defined(__linux__) + if (hw_en_on && hw_en_supported) { + video_codec->max_b_frames = 0; // At least this GPU doesn't support b-frames + video_codec->pix_fmt = AV_PIX_FMT_VAAPI; + video_codec->profile = FF_PROFILE_H264_BASELINE | FF_PROFILE_H264_CONSTRAINED; + av_opt_set(video_codec->priv_data,"preset","slow",0); + av_opt_set(video_codec->priv_data,"tune","zerolatency",0); + av_opt_set(video_codec->priv_data, "vprofile", "baseline", AV_OPT_SEARCH_CHILDREN); + // set hw_frames_ctx for encoder's AVCodecContext + int err; + if ((err = set_hwframe_ctx(video_codec, hw_device_ctx, info.width, info.height)) < 0) { + fprintf(stderr, "Failed to set hwframe context.\n"); + } + } + #endif + #endif + /* open the codec */ if (avcodec_open2(video_codec, codec, &opts) < 0) throw InvalidCodec("Could not open codec", path); @@ -1566,7 +1674,15 @@ void FFmpegWriter::process_video_packet(std::shared_ptr frame) // Init AVFrame for source image & final (converted image) frame_source = allocate_avframe(PIX_FMT_RGBA, source_image_width, source_image_height, &bytes_source, (uint8_t*) pixels); #if IS_FFMPEG_3_2 - AVFrame *frame_final = allocate_avframe((AVPixelFormat)(video_st->codecpar->format), info.width, info.height, &bytes_final, NULL); + AVFrame *frame_final; + #if defined(__linux__) + if (hw_en_on && hw_en_supported) { + frame_final = allocate_avframe(AV_PIX_FMT_NV12, info.width, info.height, &bytes_final, NULL); + } else + #endif + { + frame_final = allocate_avframe((AVPixelFormat)(video_st->codecpar->format), info.width, info.height, &bytes_final, NULL); + } #else AVFrame *frame_final = allocate_avframe(video_codec->pix_fmt, info.width, info.height, &bytes_final, NULL); #endif @@ -1641,14 +1757,41 @@ bool FFmpegWriter::write_video_packet(std::shared_ptr frame, AVFrame* fra // Assign the initial AVFrame PTS from the frame counter frame_final->pts = write_video_count; - + #if IS_FFMPEG_3_2 + #if defined(__linux__) + if (hw_en_on && hw_en_supported) { + if (!(hw_frame = av_frame_alloc())) { + fprintf(stderr, "Error code: av_hwframe_alloc\n"); + } + if (av_hwframe_get_buffer(video_codec->hw_frames_ctx, hw_frame, 0) < 0) { + fprintf(stderr, "Error code: av_hwframe_get_buffer\n"); + } + if (!hw_frame->hw_frames_ctx) { + fprintf(stderr, "Error hw_frames_ctx.\n"); + } + hw_frame->format = AV_PIX_FMT_NV12; + if ( av_hwframe_transfer_data(hw_frame, frame_final, 0) < 0) { + fprintf(stderr, "Error while transferring frame data to surface.\n"); + } + av_frame_copy_props(hw_frame, frame_final); + } + #endif + #endif /* encode the image */ int got_packet_ptr = 0; int error_code = 0; #if IS_FFMPEG_3_2 // Write video packet (latest version of FFmpeg) int frameFinished = 0; - int ret = avcodec_send_frame(video_codec, frame_final); + int ret; + #if defined(__linux__) + #if IS_FFMPEG_3_2 + if (hw_en_on && hw_en_supported) { + ret = avcodec_send_frame(video_codec, hw_frame); //hw_frame!!! + } else + #endif + #endif + ret = avcodec_send_frame(video_codec, frame_final); error_code = ret; if (ret < 0 ) { ZmqLogger::Instance()->AppendDebugMethod("FFmpegWriter::write_video_packet (Frame not sent)", "", -1, "", -1, "", -1, "", -1, "", -1, "", -1); @@ -1709,6 +1852,7 @@ bool FFmpegWriter::write_video_packet(std::shared_ptr frame, AVFrame* fra //pkt.pts = pkt.dts = write_video_count; // set the timestamp +// av_packet_rescale_ts(&pkt, video_st->time_base,video_codec->time_base); if (pkt.pts != AV_NOPTS_VALUE) pkt.pts = av_rescale_q(pkt.pts, video_codec->time_base, video_st->time_base); if (pkt.dts != AV_NOPTS_VALUE) @@ -1732,6 +1876,16 @@ bool FFmpegWriter::write_video_packet(std::shared_ptr frame, AVFrame* fra // Deallocate packet AV_FREE_PACKET(&pkt); + #if IS_FFMPEG_3_2 + #if defined(__linux__) + if (hw_en_on && hw_en_supported) { + if (hw_frame) { + av_frame_free(&hw_frame); + hw_frame = NULL; + } + } + #endif + #endif } // Success @@ -1752,7 +1906,16 @@ void FFmpegWriter::InitScalers(int source_width, int source_height) for (int x = 0; x < num_of_rescalers; x++) { // Init the software scaler from FFMpeg + #if IS_FFMPEG_3_2 + #if defined(__linux__) + if (hw_en_on && hw_en_supported) { + img_convert_ctx = sws_getContext(source_width, source_height, PIX_FMT_RGBA, info.width, info.height, AV_PIX_FMT_NV12, SWS_BILINEAR, NULL, NULL, NULL); + } else + #endif + #endif + { img_convert_ctx = sws_getContext(source_width, source_height, PIX_FMT_RGBA, info.width, info.height, AV_GET_CODEC_PIXEL_FORMAT(video_st, video_st->codec), SWS_BILINEAR, NULL, NULL, NULL); + } // Add rescaler to vector image_rescalers.push_back(img_convert_ctx);