diff --git a/media/libstagefright/binding/H264.cpp b/media/libstagefright/binding/H264.cpp new file mode 100644 index 00000000000..b3305482ff6 --- /dev/null +++ b/media/libstagefright/binding/H264.cpp @@ -0,0 +1,317 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mozilla/ArrayUtils.h" +#include "mp4_demuxer/AnnexB.h" +#include "mp4_demuxer/ByteReader.h" +#include "mp4_demuxer/ByteWriter.h" +#include "mp4_demuxer/H264.h" +#include + +using namespace mozilla; + +namespace mp4_demuxer +{ + +class BitReader +{ +public: + explicit BitReader(const ByteBuffer& aBuffer) + : mBitReader(aBuffer.Elements(), aBuffer.Length()) + { + } + + uint32_t ReadBits(size_t aNum) + { + MOZ_ASSERT(mBitReader.numBitsLeft()); + MOZ_ASSERT(aNum <= 32); + if (mBitReader.numBitsLeft() < aNum) { + return 0; + } + return mBitReader.getBits(aNum); + } + + uint32_t ReadBit() + { + return ReadBits(1); + } + + // Read unsigned integer Exp-Golomb-coded. + uint32_t ReadUE() + { + uint32_t i = 0; + + while (ReadBit() == 0 && i < 32) { + i++; + } + if (i == 32) { + MOZ_ASSERT(false); + return 0; + } + uint32_t r = ReadBits(i); + r += (1 << i) - 1; + return r; + } + + // Read signed integer Exp-Golomb-coded. + int32_t ReadSE() + { + int32_t r = ReadUE(); + if (r & 1) { + return (r+1) / 2; + } else { + return -r / 2; + } + } + +private: + stagefright::ABitReader mBitReader; +}; + +SPSData::SPSData() +{ + PodZero(this); + chroma_format_idc = 1; +} + +/* static */ already_AddRefed +H264::DecodeNALUnit(const ByteBuffer* aNAL) +{ + MOZ_ASSERT(aNAL); + + if (aNAL->Length() < 4) { + return nullptr; + } + + nsRefPtr rbsp = new ByteBuffer; + ByteReader reader(*aNAL); + uint8_t nal_unit_type = reader.ReadU8() & 0x1f; + uint32_t nalUnitHeaderBytes = 1; + if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) { + bool svc_extension_flag = false; + bool avc_3d_extension_flag = false; + if (nal_unit_type != 21) { + svc_extension_flag = reader.PeekU8() & 0x80; + } else { + avc_3d_extension_flag = reader.PeekU8() & 0x80; + } + if (svc_extension_flag) { + nalUnitHeaderBytes += 3; + } else if (avc_3d_extension_flag) { + nalUnitHeaderBytes += 2; + } else { + nalUnitHeaderBytes += 3; + } + } + if (!reader.Read(nalUnitHeaderBytes - 1)) { + return nullptr; + } + uint32_t zeros = 0; + while (reader.Remaining()) { + uint8_t byte = reader.ReadU8(); + if (zeros < 2 || byte == 0x03) { + rbsp->AppendElement(byte); + } + if (byte == 0) { + zeros++; + } else { + zeros = 0; + } + } + return rbsp.forget(); +} + +/* static */ bool +H264::DecodeSPS(const ByteBuffer* aSPS, SPSData& aDest) +{ + MOZ_ASSERT(aSPS); + BitReader br(*aSPS); + + int32_t lastScale; + int32_t nextScale; + int32_t deltaScale; + + aDest.profile_idc = br.ReadBits(8); + aDest.constraint_set0_flag = br.ReadBit(); + aDest.constraint_set1_flag = br.ReadBit(); + aDest.constraint_set2_flag = br.ReadBit(); + aDest.constraint_set3_flag = br.ReadBit(); + aDest.constraint_set4_flag = br.ReadBit(); + aDest.constraint_set5_flag = br.ReadBit(); + br.ReadBits(2); // reserved_zero_2bits + aDest.level_idc = br.ReadBits(8); + aDest.seq_parameter_set_id = br.ReadUE(); + if (aDest.profile_idc == 100 || aDest.profile_idc == 110 || + aDest.profile_idc == 122 || aDest.profile_idc == 244 || + aDest.profile_idc == 44 || aDest.profile_idc == 83 || + aDest.profile_idc == 86 || aDest.profile_idc == 118 || + aDest.profile_idc == 128 || aDest.profile_idc == 138 || + aDest.profile_idc == 139 || aDest.profile_idc == 134) { + if ((aDest.chroma_format_idc = br.ReadUE()) == 3) { + aDest.separate_colour_plane_flag = br.ReadBit(); + } + br.ReadUE(); // bit_depth_luma_minus8 + br.ReadUE(); // bit_depth_chroma_minus8 + br.ReadBit(); // qpprime_y_zero_transform_bypass_flag + if (br.ReadBit()) { // seq_scaling_matrix_present_flag + for (int idx = 0; idx < ((aDest.chroma_format_idc != 3) ? 8 : 12); ++idx) { + if (br.ReadBit()) { // Scaling list present + lastScale = nextScale = 8; + int sl_n = (idx < 6) ? 16 : 64; + for (int sl_i = 0; sl_i < sl_n; sl_i++) { + if (nextScale) { + deltaScale = br.ReadSE(); + nextScale = (lastScale + deltaScale + 256) % 256; + } + lastScale = (nextScale == 0) ? lastScale : nextScale; + } + } + } + } + } + aDest.log2_max_frame_num = br.ReadUE() + 4; + aDest.pic_order_cnt_type = br.ReadUE(); + if (aDest.pic_order_cnt_type == 0) { + aDest.log2_max_pic_order_cnt_lsb = br.ReadUE() + 4; + } else if (aDest.pic_order_cnt_type == 1) { + aDest.delta_pic_order_always_zero_flag = br.ReadBit(); + aDest.offset_for_non_ref_pic = br.ReadSE(); + aDest.offset_for_top_to_bottom_field = br.ReadSE(); + uint32_t num_ref_frames_in_pic_order_cnt_cycle = br.ReadUE(); + for (uint32_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) { + br.ReadSE(); // offset_for_ref_frame[i] + } + } + aDest.max_num_ref_frames = br.ReadUE(); + aDest.gaps_in_frame_num_allowed_flag = br.ReadBit(); + aDest.pic_width_in_mbs = br.ReadUE() + 1; + aDest.pic_height_in_map_units = br.ReadUE() + 1; + aDest.frame_mbs_only_flag = br.ReadBit(); + if (!aDest.frame_mbs_only_flag) { + aDest.pic_height_in_map_units *= 2; + aDest.mb_adaptive_frame_field_flag = br.ReadBit(); + } + br.ReadBit(); // direct_8x8_inference_flag + aDest.frame_cropping_flag = br.ReadBit(); + if (aDest.frame_cropping_flag) { + aDest.frame_crop_left_offset = br.ReadUE(); + aDest.frame_crop_right_offset = br.ReadUE(); + aDest.frame_crop_top_offset = br.ReadUE(); + aDest.frame_crop_bottom_offset = br.ReadUE(); + } + + // Calculate common values. + + // FFmpeg and VLC ignore the left and top cropping. Do the same here. + + uint8_t ChromaArrayType = + aDest.separate_colour_plane_flag ? 0 : aDest.chroma_format_idc; + // Calculate width. + uint32_t CropUnitX = 1; + uint32_t SubWidthC = aDest.chroma_format_idc == 3 ? 1 : 2; + if (ChromaArrayType != 0) { + CropUnitX = SubWidthC; + } + uint32_t cropX = CropUnitX * aDest.frame_crop_right_offset; + aDest.pic_width = aDest.pic_width_in_mbs * 16 - cropX; + + // Calculate Height + uint32_t CropUnitY = 2 - aDest.frame_mbs_only_flag; + uint32_t SubHeightC = aDest.chroma_format_idc <= 1 ? 2 : 1; + if (ChromaArrayType != 0) + CropUnitY *= SubHeightC; + uint32_t cropY = CropUnitY * aDest.frame_crop_bottom_offset; + aDest.pic_height = aDest.pic_height_in_map_units * 16 - cropY; + + aDest.interlaced = !aDest.frame_mbs_only_flag; + return true; +} + +/* static */ void +H264::vui_parameters(BitReader& aBr, SPSData& aDest) +{ + aDest.aspect_ratio_info_present_flag = aBr.ReadBit(); + if (aDest.aspect_ratio_info_present_flag) + { + aDest.aspect_ratio_idc = aBr.ReadBits(8); + + if (aDest.aspect_ratio_idc == 255 /* EXTENDED_SAR */) { + aDest.sar_width = aBr.ReadBits(16); + aDest.sar_height = aBr.ReadBits(16); + } + } + else { + aDest.sar_width = aDest.sar_height = 0; + } + + if (aBr.ReadBit()) { //overscan_info_present_flag + aDest.overscan_appropriate_flag = aBr.ReadBit(); + } + if (aBr.ReadBit()) { //video_signal_type_present_flag + aDest.video_format = aBr.ReadBits(3); + aDest.video_full_range_flag = aBr.ReadBit(); + aDest.colour_description_present_flag = aBr.ReadBit(); + if (aDest.colour_description_present_flag) { + aDest.colour_primaries = aBr.ReadBits(8); + aDest.transfer_characteristics = aBr.ReadBits(8); + aDest.matrix_coefficients = aBr.ReadBits(8); + } + } + aDest.chroma_loc_info_present_flag = aBr.ReadBit(); + + if (aDest.chroma_loc_info_present_flag) { + aDest.chroma_sample_loc_type_top_field = aBr.ReadUE(); + aDest.chroma_sample_loc_type_bottom_field = aBr.ReadUE(); + } + + if (aBr.ReadBit()) { //timing_info_present_flag + aDest.num_units_in_tick = aBr.ReadBits(32); + aDest.time_scale = aBr.ReadBits(32); + aDest.fixed_frame_rate_flag = aBr.ReadBit(); + } + + // Here we ignore nal_hrd parameters and bitstream restriction content. +} + +/* static */ bool +H264::DecodeSPSFromExtraData(const ByteBuffer* aExtraData, SPSData& aDest) +{ + if (!AnnexB::HasSPS(aExtraData)) { + return false; + } + ByteReader reader(*aExtraData); + + if (!reader.Read(5)) { + return false; + } + + if (!(reader.ReadU8() & 0x1f)) { + // No SPS. + reader.DiscardRemaining(); + return false; + } + uint16_t length = reader.ReadU16(); + + if ((reader.PeekU8() & 0x1f) != 7) { + // Not a SPS NAL type. + reader.DiscardRemaining(); + return false; + } + + const uint8_t* ptr = reader.Read(length); + if (!ptr) { + return false; + } + + nsRefPtr rawNAL = new ByteBuffer; + rawNAL->AppendElements(ptr, length); + + nsRefPtr sps = DecodeNALUnit(rawNAL); + + reader.DiscardRemaining(); + + return DecodeSPS(sps, aDest); +} + +} // namespace mp4_demuxer diff --git a/media/libstagefright/binding/include/mp4_demuxer/H264.h b/media/libstagefright/binding/include/mp4_demuxer/H264.h new file mode 100644 index 00000000000..27abcddeb46 --- /dev/null +++ b/media/libstagefright/binding/include/mp4_demuxer/H264.h @@ -0,0 +1,261 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MP4_DEMUXER_H264_H_ +#define MP4_DEMUXER_H264_H_ + +#include "mp4_demuxer/DecoderData.h" + +namespace mp4_demuxer +{ + +class BitReader; + +struct SPSData +{ + /* Decoded Members */ + /* + pic_width is the decoded width according to: + pic_width = ((pic_width_in_mbs_minus1 + 1) * 16) + - (frame_crop_left_offset + frame_crop_right_offset) * 2 + */ + uint32_t pic_width; + /* + pic_height is the decoded height according to: + pic_height = (2 - frame_mbs_only_flag) * ((pic_height_in_map_units_minus1 + 1) * 16) + - (frame_crop_top_offset + frame_crop_bottom_offset) * 2 + */ + uint32_t pic_height; + + bool interlaced; + + /* + H264 decoding parameters according to ITU-T H.264 (T-REC-H.264-201402-I/en) + http://www.itu.int/rec/T-REC-H.264-201402-I/en + */ + + bool constraint_set0_flag; + bool constraint_set1_flag; + bool constraint_set2_flag; + bool constraint_set3_flag; + bool constraint_set4_flag; + bool constraint_set5_flag; + + /* + profile_idc and level_idc indicate the profile and level to which the coded + video sequence conforms when the SVC sequence parameter set is the active + SVC sequence parameter set. + */ + uint8_t profile_idc; + uint8_t level_idc; + + /* + seq_parameter_set_id identifies the sequence parameter set that is referred + to by the picture parameter set. The value of seq_parameter_set_id shall be + in the range of 0 to 31, inclusive. + */ + uint8_t seq_parameter_set_id; + + /* + When the value of chroma_format_idc is equal to 1, the nominal vertical + and horizontal relative locations of luma and chroma samples in frames are + shown in Figure 6-1. Alternative chroma sample relative locations may be + indicated in video usability information (see Annex E). + */ + uint8_t chroma_format_idc; + + /* + If separate_colour_plane_flag is equal to 0, each of the two chroma arrays + has the same height and width as the luma array. Otherwise + (separate_colour_plane_flag is equal to 1), the three colour planes are + separately processed as monochrome sampled pictures. + */ + bool separate_colour_plane_flag; + + /* + log2_max_frame_num_minus4 specifies the value of the variable + MaxFrameNum that is used in frame_num related derivations as + follows: + + MaxFrameNum = 2( log2_max_frame_num_minus4 + 4 ). The value of + log2_max_frame_num_minus4 shall be in the range of 0 to 12, inclusive. + */ + uint8_t log2_max_frame_num; + + /* + pic_order_cnt_type specifies the method to decode picture order + count (as specified in subclause 8.2.1). The value of + pic_order_cnt_type shall be in the range of 0 to 2, inclusive. + */ + uint8_t pic_order_cnt_type; + + /* + log2_max_pic_order_cnt_lsb_minus4 specifies the value of the + variable MaxPicOrderCntLsb that is used in the decoding + process for picture order count as specified in subclause + 8.2.1 as follows: + + MaxPicOrderCntLsb = 2( log2_max_pic_order_cnt_lsb_minus4 + 4 ) + + The value of log2_max_pic_order_cnt_lsb_minus4 shall be in + the range of 0 to 12, inclusive. + */ + uint8_t log2_max_pic_order_cnt_lsb; + + /* + delta_pic_order_always_zero_flag equal to 1 specifies that + delta_pic_order_cnt[ 0 ] and delta_pic_order_cnt[ 1 ] are + not present in the slice headers of the sequence and shall + be inferred to be equal to + 0. delta_pic_order_always_zero_flag + */ + bool delta_pic_order_always_zero_flag; + + /* + offset_for_non_ref_pic is used to calculate the picture + order count of a non-reference picture as specified in + 8.2.1. The value of offset_for_non_ref_pic shall be in the + range of -231 to 231 - 1, inclusive. + */ + int8_t offset_for_non_ref_pic; + + /* + offset_for_top_to_bottom_field is used to calculate the + picture order count of a bottom field as specified in + subclause 8.2.1. The value of offset_for_top_to_bottom_field + shall be in the range of -231 to 231 - 1, inclusive. + */ + int8_t offset_for_top_to_bottom_field; + + /* + max_num_ref_frames specifies the maximum number of short-term and + long-term reference frames, complementary reference field pairs, + and non-paired reference fields that may be used by the decoding + process for inter prediction of any picture in the + sequence. max_num_ref_frames also determines the size of the sliding + window operation as specified in subclause 8.2.5.3. The value of + max_num_ref_frames shall be in the range of 0 to MaxDpbSize (as + specified in subclause A.3.1 or A.3.2), inclusive. + */ + uint32_t max_num_ref_frames; + + /* + gaps_in_frame_num_value_allowed_flag specifies the allowed + values of frame_num as specified in subclause 7.4.3 and the + decoding process in case of an inferred gap between values of + frame_num as specified in subclause 8.2.5.2. + */ + bool gaps_in_frame_num_allowed_flag; + + /* + pic_width_in_mbs_minus1 plus 1 specifies the width of each + decoded picture in units of macroblocks. 16 macroblocks in a row + */ + uint32_t pic_width_in_mbs; + + /* + pic_height_in_map_units_minus1 plus 1 specifies the height in + slice group map units of a decoded frame or field. 16 + macroblocks in each column. + */ + uint32_t pic_height_in_map_units; + + /* + frame_mbs_only_flag equal to 0 specifies that coded pictures of + the coded video sequence may either be coded fields or coded + frames. frame_mbs_only_flag equal to 1 specifies that every + coded picture of the coded video sequence is a coded frame + containing only frame macroblocks. + */ + bool frame_mbs_only_flag; + + /* + mb_adaptive_frame_field_flag equal to 0 specifies no + switching between frame and field macroblocks within a + picture. mb_adaptive_frame_field_flag equal to 1 specifies + the possible use of switching between frame and field + macroblocks within frames. When mb_adaptive_frame_field_flag + is not present, it shall be inferred to be equal to 0. + */ + bool mb_adaptive_frame_field_flag; + + /* + frame_cropping_flag equal to 1 specifies that the frame cropping + offset parameters follow next in the sequence parameter + set. frame_cropping_flag equal to 0 specifies that the frame + cropping offset parameters are not present. + */ + bool frame_cropping_flag; + uint32_t frame_crop_left_offset;; + uint32_t frame_crop_right_offset; + uint32_t frame_crop_top_offset; + uint32_t frame_crop_bottom_offset; + + // VUI Parameters + + /* + vui_parameters_present_flag equal to 1 specifies that the + vui_parameters( ) syntax structure as specified in Annex E is + present. vui_parameters_present_flag equal to 0 specifies that + the vui_parameters( ) syntax structure as specified in Annex E + is not present. + */ + bool vui_parameters_present_flag; + + /* + aspect_ratio_info_present_flag equal to 1 specifies that + aspect_ratio_idc is present. aspect_ratio_info_present_flag + equal to 0 specifies that aspect_ratio_idc is not present. + */ + bool aspect_ratio_info_present_flag; + + /* + aspect_ratio_idc specifies the value of the sample aspect + ratio of the luma samples. Table E-1 shows the meaning of + the code. When aspect_ratio_idc indicates Extended_SAR, the + sample aspect ratio is represented by sar_width and + sar_height. When the aspect_ratio_idc syntax element is not + present, aspect_ratio_idc value shall be inferred to be + equal to 0. + */ + uint8_t aspect_ratio_idc; + uint32_t sar_width; + uint32_t sar_height; + + bool overscan_info_present_flag; + bool overscan_appropriate_flag; + + uint8_t video_format; + bool video_full_range_flag; + bool colour_description_present_flag; + uint8_t colour_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coefficients; + bool chroma_loc_info_present_flag; + uint32_t chroma_sample_loc_type_top_field; + uint32_t chroma_sample_loc_type_bottom_field; + uint32_t num_units_in_tick; + uint32_t time_scale; + bool fixed_frame_rate_flag; + + SPSData(); +}; + +class H264 +{ +public: + static bool DecodeSPSFromExtraData(const ByteBuffer* aExtraData, SPSData& aDest); + /* Extract RAW BYTE SEQUENCE PAYLOAD from NAL content. + Returns nullptr if invalid content. */ + static already_AddRefed DecodeNALUnit(const ByteBuffer* aNAL); + /* Decode SPS NAL RBSP and fill SPSData structure */ + static bool DecodeSPS(const ByteBuffer* aSPS, SPSData& aDest); + +private: + static void vui_parameters(BitReader& aBr, SPSData& aDest); +}; + +} // namespace mp4_demuxer + +#endif // MP4_DEMUXER_H264_H_ diff --git a/media/libstagefright/moz.build b/media/libstagefright/moz.build index 4fdaf488f92..f8e7c91a870 100644 --- a/media/libstagefright/moz.build +++ b/media/libstagefright/moz.build @@ -55,6 +55,7 @@ EXPORTS.mp4_demuxer += [ 'binding/include/mp4_demuxer/ByteReader.h', 'binding/include/mp4_demuxer/ByteWriter.h', 'binding/include/mp4_demuxer/DecoderData.h', + 'binding/include/mp4_demuxer/H264.h', 'binding/include/mp4_demuxer/Interval.h', 'binding/include/mp4_demuxer/MoofParser.h', 'binding/include/mp4_demuxer/mp4_demuxer.h', @@ -75,6 +76,7 @@ UNIFIED_SOURCES += [ 'binding/Box.cpp', 'binding/BufferStream.cpp', 'binding/DecoderData.cpp', + 'binding/H264.cpp', 'binding/Index.cpp', 'binding/MoofParser.cpp', 'binding/mp4_demuxer.cpp',