Bug 1111328: Add H264 SPS NAL decoder. r=kentuckyfriedtakahe

This commit is contained in:
Jean-Yves Avenard 2014-12-24 23:11:35 +11:00
parent 0de868f259
commit 186b832d70
3 changed files with 580 additions and 0 deletions

View File

@ -0,0 +1,317 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/ArrayUtils.h"
#include "mp4_demuxer/AnnexB.h"
#include "mp4_demuxer/ByteReader.h"
#include "mp4_demuxer/ByteWriter.h"
#include "mp4_demuxer/H264.h"
#include <media/stagefright/foundation/ABitReader.h>
using namespace mozilla;
namespace mp4_demuxer
{
class BitReader
{
public:
explicit BitReader(const ByteBuffer& aBuffer)
: mBitReader(aBuffer.Elements(), aBuffer.Length())
{
}
uint32_t ReadBits(size_t aNum)
{
MOZ_ASSERT(mBitReader.numBitsLeft());
MOZ_ASSERT(aNum <= 32);
if (mBitReader.numBitsLeft() < aNum) {
return 0;
}
return mBitReader.getBits(aNum);
}
uint32_t ReadBit()
{
return ReadBits(1);
}
// Read unsigned integer Exp-Golomb-coded.
uint32_t ReadUE()
{
uint32_t i = 0;
while (ReadBit() == 0 && i < 32) {
i++;
}
if (i == 32) {
MOZ_ASSERT(false);
return 0;
}
uint32_t r = ReadBits(i);
r += (1 << i) - 1;
return r;
}
// Read signed integer Exp-Golomb-coded.
int32_t ReadSE()
{
int32_t r = ReadUE();
if (r & 1) {
return (r+1) / 2;
} else {
return -r / 2;
}
}
private:
stagefright::ABitReader mBitReader;
};
SPSData::SPSData()
{
PodZero(this);
chroma_format_idc = 1;
}
/* static */ already_AddRefed<ByteBuffer>
H264::DecodeNALUnit(const ByteBuffer* aNAL)
{
MOZ_ASSERT(aNAL);
if (aNAL->Length() < 4) {
return nullptr;
}
nsRefPtr<ByteBuffer> rbsp = new ByteBuffer;
ByteReader reader(*aNAL);
uint8_t nal_unit_type = reader.ReadU8() & 0x1f;
uint32_t nalUnitHeaderBytes = 1;
if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
bool svc_extension_flag = false;
bool avc_3d_extension_flag = false;
if (nal_unit_type != 21) {
svc_extension_flag = reader.PeekU8() & 0x80;
} else {
avc_3d_extension_flag = reader.PeekU8() & 0x80;
}
if (svc_extension_flag) {
nalUnitHeaderBytes += 3;
} else if (avc_3d_extension_flag) {
nalUnitHeaderBytes += 2;
} else {
nalUnitHeaderBytes += 3;
}
}
if (!reader.Read(nalUnitHeaderBytes - 1)) {
return nullptr;
}
uint32_t zeros = 0;
while (reader.Remaining()) {
uint8_t byte = reader.ReadU8();
if (zeros < 2 || byte == 0x03) {
rbsp->AppendElement(byte);
}
if (byte == 0) {
zeros++;
} else {
zeros = 0;
}
}
return rbsp.forget();
}
/* static */ bool
H264::DecodeSPS(const ByteBuffer* aSPS, SPSData& aDest)
{
MOZ_ASSERT(aSPS);
BitReader br(*aSPS);
int32_t lastScale;
int32_t nextScale;
int32_t deltaScale;
aDest.profile_idc = br.ReadBits(8);
aDest.constraint_set0_flag = br.ReadBit();
aDest.constraint_set1_flag = br.ReadBit();
aDest.constraint_set2_flag = br.ReadBit();
aDest.constraint_set3_flag = br.ReadBit();
aDest.constraint_set4_flag = br.ReadBit();
aDest.constraint_set5_flag = br.ReadBit();
br.ReadBits(2); // reserved_zero_2bits
aDest.level_idc = br.ReadBits(8);
aDest.seq_parameter_set_id = br.ReadUE();
if (aDest.profile_idc == 100 || aDest.profile_idc == 110 ||
aDest.profile_idc == 122 || aDest.profile_idc == 244 ||
aDest.profile_idc == 44 || aDest.profile_idc == 83 ||
aDest.profile_idc == 86 || aDest.profile_idc == 118 ||
aDest.profile_idc == 128 || aDest.profile_idc == 138 ||
aDest.profile_idc == 139 || aDest.profile_idc == 134) {
if ((aDest.chroma_format_idc = br.ReadUE()) == 3) {
aDest.separate_colour_plane_flag = br.ReadBit();
}
br.ReadUE(); // bit_depth_luma_minus8
br.ReadUE(); // bit_depth_chroma_minus8
br.ReadBit(); // qpprime_y_zero_transform_bypass_flag
if (br.ReadBit()) { // seq_scaling_matrix_present_flag
for (int idx = 0; idx < ((aDest.chroma_format_idc != 3) ? 8 : 12); ++idx) {
if (br.ReadBit()) { // Scaling list present
lastScale = nextScale = 8;
int sl_n = (idx < 6) ? 16 : 64;
for (int sl_i = 0; sl_i < sl_n; sl_i++) {
if (nextScale) {
deltaScale = br.ReadSE();
nextScale = (lastScale + deltaScale + 256) % 256;
}
lastScale = (nextScale == 0) ? lastScale : nextScale;
}
}
}
}
}
aDest.log2_max_frame_num = br.ReadUE() + 4;
aDest.pic_order_cnt_type = br.ReadUE();
if (aDest.pic_order_cnt_type == 0) {
aDest.log2_max_pic_order_cnt_lsb = br.ReadUE() + 4;
} else if (aDest.pic_order_cnt_type == 1) {
aDest.delta_pic_order_always_zero_flag = br.ReadBit();
aDest.offset_for_non_ref_pic = br.ReadSE();
aDest.offset_for_top_to_bottom_field = br.ReadSE();
uint32_t num_ref_frames_in_pic_order_cnt_cycle = br.ReadUE();
for (uint32_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) {
br.ReadSE(); // offset_for_ref_frame[i]
}
}
aDest.max_num_ref_frames = br.ReadUE();
aDest.gaps_in_frame_num_allowed_flag = br.ReadBit();
aDest.pic_width_in_mbs = br.ReadUE() + 1;
aDest.pic_height_in_map_units = br.ReadUE() + 1;
aDest.frame_mbs_only_flag = br.ReadBit();
if (!aDest.frame_mbs_only_flag) {
aDest.pic_height_in_map_units *= 2;
aDest.mb_adaptive_frame_field_flag = br.ReadBit();
}
br.ReadBit(); // direct_8x8_inference_flag
aDest.frame_cropping_flag = br.ReadBit();
if (aDest.frame_cropping_flag) {
aDest.frame_crop_left_offset = br.ReadUE();
aDest.frame_crop_right_offset = br.ReadUE();
aDest.frame_crop_top_offset = br.ReadUE();
aDest.frame_crop_bottom_offset = br.ReadUE();
}
// Calculate common values.
// FFmpeg and VLC ignore the left and top cropping. Do the same here.
uint8_t ChromaArrayType =
aDest.separate_colour_plane_flag ? 0 : aDest.chroma_format_idc;
// Calculate width.
uint32_t CropUnitX = 1;
uint32_t SubWidthC = aDest.chroma_format_idc == 3 ? 1 : 2;
if (ChromaArrayType != 0) {
CropUnitX = SubWidthC;
}
uint32_t cropX = CropUnitX * aDest.frame_crop_right_offset;
aDest.pic_width = aDest.pic_width_in_mbs * 16 - cropX;
// Calculate Height
uint32_t CropUnitY = 2 - aDest.frame_mbs_only_flag;
uint32_t SubHeightC = aDest.chroma_format_idc <= 1 ? 2 : 1;
if (ChromaArrayType != 0)
CropUnitY *= SubHeightC;
uint32_t cropY = CropUnitY * aDest.frame_crop_bottom_offset;
aDest.pic_height = aDest.pic_height_in_map_units * 16 - cropY;
aDest.interlaced = !aDest.frame_mbs_only_flag;
return true;
}
/* static */ void
H264::vui_parameters(BitReader& aBr, SPSData& aDest)
{
aDest.aspect_ratio_info_present_flag = aBr.ReadBit();
if (aDest.aspect_ratio_info_present_flag)
{
aDest.aspect_ratio_idc = aBr.ReadBits(8);
if (aDest.aspect_ratio_idc == 255 /* EXTENDED_SAR */) {
aDest.sar_width = aBr.ReadBits(16);
aDest.sar_height = aBr.ReadBits(16);
}
}
else {
aDest.sar_width = aDest.sar_height = 0;
}
if (aBr.ReadBit()) { //overscan_info_present_flag
aDest.overscan_appropriate_flag = aBr.ReadBit();
}
if (aBr.ReadBit()) { //video_signal_type_present_flag
aDest.video_format = aBr.ReadBits(3);
aDest.video_full_range_flag = aBr.ReadBit();
aDest.colour_description_present_flag = aBr.ReadBit();
if (aDest.colour_description_present_flag) {
aDest.colour_primaries = aBr.ReadBits(8);
aDest.transfer_characteristics = aBr.ReadBits(8);
aDest.matrix_coefficients = aBr.ReadBits(8);
}
}
aDest.chroma_loc_info_present_flag = aBr.ReadBit();
if (aDest.chroma_loc_info_present_flag) {
aDest.chroma_sample_loc_type_top_field = aBr.ReadUE();
aDest.chroma_sample_loc_type_bottom_field = aBr.ReadUE();
}
if (aBr.ReadBit()) { //timing_info_present_flag
aDest.num_units_in_tick = aBr.ReadBits(32);
aDest.time_scale = aBr.ReadBits(32);
aDest.fixed_frame_rate_flag = aBr.ReadBit();
}
// Here we ignore nal_hrd parameters and bitstream restriction content.
}
/* static */ bool
H264::DecodeSPSFromExtraData(const ByteBuffer* aExtraData, SPSData& aDest)
{
if (!AnnexB::HasSPS(aExtraData)) {
return false;
}
ByteReader reader(*aExtraData);
if (!reader.Read(5)) {
return false;
}
if (!(reader.ReadU8() & 0x1f)) {
// No SPS.
reader.DiscardRemaining();
return false;
}
uint16_t length = reader.ReadU16();
if ((reader.PeekU8() & 0x1f) != 7) {
// Not a SPS NAL type.
reader.DiscardRemaining();
return false;
}
const uint8_t* ptr = reader.Read(length);
if (!ptr) {
return false;
}
nsRefPtr<ByteBuffer> rawNAL = new ByteBuffer;
rawNAL->AppendElements(ptr, length);
nsRefPtr<ByteBuffer> sps = DecodeNALUnit(rawNAL);
reader.DiscardRemaining();
return DecodeSPS(sps, aDest);
}
} // namespace mp4_demuxer

View File

@ -0,0 +1,261 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef MP4_DEMUXER_H264_H_
#define MP4_DEMUXER_H264_H_
#include "mp4_demuxer/DecoderData.h"
namespace mp4_demuxer
{
class BitReader;
struct SPSData
{
/* Decoded Members */
/*
pic_width is the decoded width according to:
pic_width = ((pic_width_in_mbs_minus1 + 1) * 16)
- (frame_crop_left_offset + frame_crop_right_offset) * 2
*/
uint32_t pic_width;
/*
pic_height is the decoded height according to:
pic_height = (2 - frame_mbs_only_flag) * ((pic_height_in_map_units_minus1 + 1) * 16)
- (frame_crop_top_offset + frame_crop_bottom_offset) * 2
*/
uint32_t pic_height;
bool interlaced;
/*
H264 decoding parameters according to ITU-T H.264 (T-REC-H.264-201402-I/en)
http://www.itu.int/rec/T-REC-H.264-201402-I/en
*/
bool constraint_set0_flag;
bool constraint_set1_flag;
bool constraint_set2_flag;
bool constraint_set3_flag;
bool constraint_set4_flag;
bool constraint_set5_flag;
/*
profile_idc and level_idc indicate the profile and level to which the coded
video sequence conforms when the SVC sequence parameter set is the active
SVC sequence parameter set.
*/
uint8_t profile_idc;
uint8_t level_idc;
/*
seq_parameter_set_id identifies the sequence parameter set that is referred
to by the picture parameter set. The value of seq_parameter_set_id shall be
in the range of 0 to 31, inclusive.
*/
uint8_t seq_parameter_set_id;
/*
When the value of chroma_format_idc is equal to 1, the nominal vertical
and horizontal relative locations of luma and chroma samples in frames are
shown in Figure 6-1. Alternative chroma sample relative locations may be
indicated in video usability information (see Annex E).
*/
uint8_t chroma_format_idc;
/*
If separate_colour_plane_flag is equal to 0, each of the two chroma arrays
has the same height and width as the luma array. Otherwise
(separate_colour_plane_flag is equal to 1), the three colour planes are
separately processed as monochrome sampled pictures.
*/
bool separate_colour_plane_flag;
/*
log2_max_frame_num_minus4 specifies the value of the variable
MaxFrameNum that is used in frame_num related derivations as
follows:
MaxFrameNum = 2( log2_max_frame_num_minus4 + 4 ). The value of
log2_max_frame_num_minus4 shall be in the range of 0 to 12, inclusive.
*/
uint8_t log2_max_frame_num;
/*
pic_order_cnt_type specifies the method to decode picture order
count (as specified in subclause 8.2.1). The value of
pic_order_cnt_type shall be in the range of 0 to 2, inclusive.
*/
uint8_t pic_order_cnt_type;
/*
log2_max_pic_order_cnt_lsb_minus4 specifies the value of the
variable MaxPicOrderCntLsb that is used in the decoding
process for picture order count as specified in subclause
8.2.1 as follows:
MaxPicOrderCntLsb = 2( log2_max_pic_order_cnt_lsb_minus4 + 4 )
The value of log2_max_pic_order_cnt_lsb_minus4 shall be in
the range of 0 to 12, inclusive.
*/
uint8_t log2_max_pic_order_cnt_lsb;
/*
delta_pic_order_always_zero_flag equal to 1 specifies that
delta_pic_order_cnt[ 0 ] and delta_pic_order_cnt[ 1 ] are
not present in the slice headers of the sequence and shall
be inferred to be equal to
0. delta_pic_order_always_zero_flag
*/
bool delta_pic_order_always_zero_flag;
/*
offset_for_non_ref_pic is used to calculate the picture
order count of a non-reference picture as specified in
8.2.1. The value of offset_for_non_ref_pic shall be in the
range of -231 to 231 - 1, inclusive.
*/
int8_t offset_for_non_ref_pic;
/*
offset_for_top_to_bottom_field is used to calculate the
picture order count of a bottom field as specified in
subclause 8.2.1. The value of offset_for_top_to_bottom_field
shall be in the range of -231 to 231 - 1, inclusive.
*/
int8_t offset_for_top_to_bottom_field;
/*
max_num_ref_frames specifies the maximum number of short-term and
long-term reference frames, complementary reference field pairs,
and non-paired reference fields that may be used by the decoding
process for inter prediction of any picture in the
sequence. max_num_ref_frames also determines the size of the sliding
window operation as specified in subclause 8.2.5.3. The value of
max_num_ref_frames shall be in the range of 0 to MaxDpbSize (as
specified in subclause A.3.1 or A.3.2), inclusive.
*/
uint32_t max_num_ref_frames;
/*
gaps_in_frame_num_value_allowed_flag specifies the allowed
values of frame_num as specified in subclause 7.4.3 and the
decoding process in case of an inferred gap between values of
frame_num as specified in subclause 8.2.5.2.
*/
bool gaps_in_frame_num_allowed_flag;
/*
pic_width_in_mbs_minus1 plus 1 specifies the width of each
decoded picture in units of macroblocks. 16 macroblocks in a row
*/
uint32_t pic_width_in_mbs;
/*
pic_height_in_map_units_minus1 plus 1 specifies the height in
slice group map units of a decoded frame or field. 16
macroblocks in each column.
*/
uint32_t pic_height_in_map_units;
/*
frame_mbs_only_flag equal to 0 specifies that coded pictures of
the coded video sequence may either be coded fields or coded
frames. frame_mbs_only_flag equal to 1 specifies that every
coded picture of the coded video sequence is a coded frame
containing only frame macroblocks.
*/
bool frame_mbs_only_flag;
/*
mb_adaptive_frame_field_flag equal to 0 specifies no
switching between frame and field macroblocks within a
picture. mb_adaptive_frame_field_flag equal to 1 specifies
the possible use of switching between frame and field
macroblocks within frames. When mb_adaptive_frame_field_flag
is not present, it shall be inferred to be equal to 0.
*/
bool mb_adaptive_frame_field_flag;
/*
frame_cropping_flag equal to 1 specifies that the frame cropping
offset parameters follow next in the sequence parameter
set. frame_cropping_flag equal to 0 specifies that the frame
cropping offset parameters are not present.
*/
bool frame_cropping_flag;
uint32_t frame_crop_left_offset;;
uint32_t frame_crop_right_offset;
uint32_t frame_crop_top_offset;
uint32_t frame_crop_bottom_offset;
// VUI Parameters
/*
vui_parameters_present_flag equal to 1 specifies that the
vui_parameters( ) syntax structure as specified in Annex E is
present. vui_parameters_present_flag equal to 0 specifies that
the vui_parameters( ) syntax structure as specified in Annex E
is not present.
*/
bool vui_parameters_present_flag;
/*
aspect_ratio_info_present_flag equal to 1 specifies that
aspect_ratio_idc is present. aspect_ratio_info_present_flag
equal to 0 specifies that aspect_ratio_idc is not present.
*/
bool aspect_ratio_info_present_flag;
/*
aspect_ratio_idc specifies the value of the sample aspect
ratio of the luma samples. Table E-1 shows the meaning of
the code. When aspect_ratio_idc indicates Extended_SAR, the
sample aspect ratio is represented by sar_width and
sar_height. When the aspect_ratio_idc syntax element is not
present, aspect_ratio_idc value shall be inferred to be
equal to 0.
*/
uint8_t aspect_ratio_idc;
uint32_t sar_width;
uint32_t sar_height;
bool overscan_info_present_flag;
bool overscan_appropriate_flag;
uint8_t video_format;
bool video_full_range_flag;
bool colour_description_present_flag;
uint8_t colour_primaries;
uint8_t transfer_characteristics;
uint8_t matrix_coefficients;
bool chroma_loc_info_present_flag;
uint32_t chroma_sample_loc_type_top_field;
uint32_t chroma_sample_loc_type_bottom_field;
uint32_t num_units_in_tick;
uint32_t time_scale;
bool fixed_frame_rate_flag;
SPSData();
};
class H264
{
public:
static bool DecodeSPSFromExtraData(const ByteBuffer* aExtraData, SPSData& aDest);
/* Extract RAW BYTE SEQUENCE PAYLOAD from NAL content.
Returns nullptr if invalid content. */
static already_AddRefed<ByteBuffer> DecodeNALUnit(const ByteBuffer* aNAL);
/* Decode SPS NAL RBSP and fill SPSData structure */
static bool DecodeSPS(const ByteBuffer* aSPS, SPSData& aDest);
private:
static void vui_parameters(BitReader& aBr, SPSData& aDest);
};
} // namespace mp4_demuxer
#endif // MP4_DEMUXER_H264_H_

View File

@ -55,6 +55,7 @@ EXPORTS.mp4_demuxer += [
'binding/include/mp4_demuxer/ByteReader.h',
'binding/include/mp4_demuxer/ByteWriter.h',
'binding/include/mp4_demuxer/DecoderData.h',
'binding/include/mp4_demuxer/H264.h',
'binding/include/mp4_demuxer/Interval.h',
'binding/include/mp4_demuxer/MoofParser.h',
'binding/include/mp4_demuxer/mp4_demuxer.h',
@ -75,6 +76,7 @@ UNIFIED_SOURCES += [
'binding/Box.cpp',
'binding/BufferStream.cpp',
'binding/DecoderData.cpp',
'binding/H264.cpp',
'binding/Index.cpp',
'binding/MoofParser.cpp',
'binding/mp4_demuxer.cpp',