Bug 862088 - Add raw mp3 sniffing. r=padenot

Attempt to sniff raw mp3 with no id3 tag by looking for a header followed by a second header at the expected offset from the packet length. Adds mp3 sniffing with and without id3 tags to the mediasniffer xpcshell tests. These files are truncated to 512 bytes. Our method can require up to 1.5k of data, but these have 208/209 byte packets so sniffing still works. notags-bad.mp3 has a corrupt header, and verifies sniffing fails. notags-scan.mp3 has garbage before the header, and verifies sniffing fails. he_free.mp is a vbr test vector which isn't sniffable with this method. fl10.mp2 is an mp2 file, and verifies sniffing fails. ff-inst.exe verifies bug 875769 doesn't regress.
2024-09-13 09:24:08 -07:00 · 2013-06-04 12:12:00 -07:00 · 2013-06-04 12:12:00 -07:00 · 69510f6483
commit 69510f6483
parent 884b129d5d
13 changed files with 203 additions and 2 deletions
--- a/toolkit/components/mediasniffer/moz.build
+++ b/toolkit/components/mediasniffer/moz.build
@ -17,3 +17,6 @@ CPP_SOURCES += [
    'nsMediaSnifferModule.cpp',
 ]

+CSRCS += [
+    'mp3sniff.c',
+]
--- a/toolkit/components/mediasniffer/mp3sniff.c
+++ b/toolkit/components/mediasniffer/mp3sniff.c
@ -0,0 +1,154 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* MPEG format parsing */
+
+#include "mp3sniff.h"
+
+/* Maximum packet size is 320 kbits/s * 144 / 32 kHz + 1 padding byte */
+#define MP3_MAX_SIZE 1441
+
+typedef struct {
+  int version;
+  int layer;
+  int errp;
+  int bitrate;
+  int freq;
+  int pad;
+  int priv;
+  int mode;
+  int modex;
+  int copyright;
+  int original;
+  int emphasis;
+} mp3_header;
+
+/* Parse the 4-byte header in p and fill in the header struct. */
+static void mp3_parse(const uint8_t *p, mp3_header *header)
+{
+  const int bitrates[16] =
+	{0,  32000,  40000,  48000,  56000,  64000,  80000,  96000,
+         112000, 128000, 160000, 192000, 224000, 256000, 320000, 0};
+  const int samplerates[4] = {44100, 48000, 32000, 0};
+
+  header->version = (p[1] & 0x08) >> 3;
+  header->layer = 4 - ((p[1] & 0x06) >> 1);
+  header->errp = (p[1] & 0x01);
+
+  header->bitrate = bitrates[(p[2] & 0xf0) >> 4];
+  header->freq = samplerates[(p[2] & 0x0c) >> 2];
+  header->pad = (p[2] & 0x02) >> 1;
+  header->priv = (p[2] & 0x01);
+
+  header->mode = (p[3] & 0xc0) >> 6;
+  header->modex = (p[3] & 0x30) >> 4;
+  header->copyright = (p[3] & 0x08) >> 3;
+  header->original = (p[3] & 0x04) >> 2;
+  header->emphasis = (p[3] & 0x03);
+}
+
+/* calculate the size of an mp3 frame from its header */
+static int mp3_framesize(mp3_header *header)
+{
+  int size;
+  int scale;
+
+  if (header->layer == 1) scale = 48;
+  else scale = 144;
+
+  size = header->bitrate * scale / header->freq;
+  /* divide by an extra factor of 2 for MPEG-2? */
+
+  if (header->pad) size += 1;
+
+  return size;
+}
+
+static int is_mp3(const uint8_t *p, long length) {
+  /* Do we have enough room to see a 4 byte header? */
+  if (length < 4) return 0;
+  /* Do we have a sync pattern? */
+  if (p[0] == 0xff && (p[1]&0xe0) == 0xe0) {
+    /* Do we have any illegal field values? */
+    if (((p[1] & 0x06) >> 1) == 0) return 0;  /* No layer 4 */
+    if (((p[2] & 0xf0) >> 4) == 15) return 0; /* Bitrate can't be 1111 */
+    if (((p[2] & 0x0c) >> 2) == 3) return 0;  /* Samplerate can't be 11 */
+    /* Looks like a header. */
+    if ((4 - ((p[1] & 0x06) >> 1)) != 3) return 0; /* Only want level 3 */
+    return 1;
+  }
+  return 0;
+}
+
+/* Identify an ID3 tag based on its header. */
+/* http://id3.org/id3v2.4.0-structure */
+static int is_id3(const uint8_t *p, long length) {
+  /* Do we have enough room to see the header? */
+  if (length < 10) return 0;
+  /* Do we have a sync pattern? */
+  if (p[0] == 'I' && p[1] == 'D' && p[2] == '3') {
+    if (p[3] == 0xff || p[4] == 0xff) return 0; /* Illegal version. */
+    if (p[6] & 0x80 || p[7] & 0x80 ||
+        p[8] & 0x80) return 0; /* Bad length encoding. */
+    /* Looks like an id3 header. */
+    return 1;
+  }
+  return 0;
+}
+
+/* Calculate the size of an id3 tag structure from its header. */
+static int id3_framesize(const uint8_t *p, long length)
+{
+  int size;
+
+  /* Header is 10 bytes. */
+  if (length < 10) {
+    return 0;
+  }
+  /* Frame is header plus declared size. */
+  size = 10 + (p[9] | (p[8] << 7) | (p[7] << 14) | (p[6] << 21));
+
+  return size;
+}
+
+int mp3_sniff(const uint8_t *buf, long length)
+{
+  mp3_header header;
+  const uint8_t *p, *q;
+  long skip;
+  long avail;
+
+  p = buf;
+  q = p;
+  avail = length;
+  while (avail >= 4) {
+    if (is_id3(p, avail)) {
+      /* Skip over any id3 tags */
+      skip = id3_framesize(p, avail);
+      p += skip;
+      avail -= skip;
+    } else if (is_mp3(p, avail)) {
+      mp3_parse(p, &header);
+      skip = mp3_framesize(&header);
+      if (skip < 4 || skip + 4 > avail) {
+        return 0;
+      }
+      p += skip;
+      avail -= skip;
+      /* Check for a second header at the expected offset. */
+      if (is_mp3(p, avail)) {
+        /* Looks like mp3. */
+        return 1;
+      } else {
+        /* No second header. Not mp3. */
+        return 0;
+      }
+    } else {
+      /* No id3 tag or mp3 header. Not mp3. */
+      return 0;
+    }
+  }
+
+  return 0;
+}
--- a/toolkit/components/mediasniffer/mp3sniff.h
+++ b/toolkit/components/mediasniffer/mp3sniff.h
@ -0,0 +1,15 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <mozilla/StandardInteger.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int mp3_sniff(const uint8_t *buf, long length);
+
+#ifdef __cplusplus
+}
+#endif
--- a/toolkit/components/mediasniffer/nsMediaSniffer.cpp
+++ b/toolkit/components/mediasniffer/nsMediaSniffer.cpp
@ -10,6 +10,7 @@
 #include "nsString.h"
 #include "nsMimeTypes.h"
 #include "mozilla/ModuleUtils.h"
+#include "mp3sniff.h"
 #ifdef MOZ_WEBM
 #include "nestegg/nestegg.h"
 #endif
@ -84,6 +85,13 @@ static bool MatchesWebM(const uint8_t* aData, const uint32_t aLength)
 #endif
 }

+// This function implements mp3 sniffing based on parsing
+// packet headers and looking for expected boundaries.
+static bool MatchesMP3(const uint8_t* aData, const uint32_t aLength)
+{
+  return mp3_sniff(aData, (long)aLength);
+}
+
 NS_IMETHODIMP
 nsMediaSniffer::GetMIMETypeFromContent(nsIRequest* aRequest,
                                       const uint8_t* aData,
@ -134,6 +142,11 @@ nsMediaSniffer::GetMIMETypeFromContent(nsIRequest* aRequest,
    return NS_OK;
  }

+  if (MatchesMP3(aData, clampedLength)) {
+    aSniffedType.AssignLiteral(AUDIO_MP3);
+    return NS_OK;
+  }
+
  // Could not sniff the media type, we are required to set it to
  // application/octet-stream.
  aSniffedType.AssignLiteral(APPLICATION_OCTET_STREAM);
--- a/toolkit/components/mediasniffer/test/unit/data/ff-inst.exe
+++ b/toolkit/components/mediasniffer/test/unit/data/ff-inst.exe
--- a/toolkit/components/mediasniffer/test/unit/data/fl10.mp2
+++ b/toolkit/components/mediasniffer/test/unit/data/fl10.mp2
--- a/toolkit/components/mediasniffer/test/unit/data/he_free.mp3
+++ b/toolkit/components/mediasniffer/test/unit/data/he_free.mp3
--- a/toolkit/components/mediasniffer/test/unit/data/id3tags.mp3
+++ b/toolkit/components/mediasniffer/test/unit/data/id3tags.mp3
--- a/toolkit/components/mediasniffer/test/unit/data/notags-bad.mp3
+++ b/toolkit/components/mediasniffer/test/unit/data/notags-bad.mp3
--- a/toolkit/components/mediasniffer/test/unit/data/notags-scan.mp3
+++ b/toolkit/components/mediasniffer/test/unit/data/notags-scan.mp3
--- a/toolkit/components/mediasniffer/test/unit/data/notags.mp3
+++ b/toolkit/components/mediasniffer/test/unit/data/notags.mp3
--- a/toolkit/components/mediasniffer/test/unit/test_mediasniffer_webm.js
+++ b/toolkit/components/mediasniffer/test/unit/test_mediasniffer_webm.js
@ -18,15 +18,31 @@ var httpserver = new HttpServer();
 var testRan = 0;

 // The tests files we want to test, and the type we should have after sniffing.
-// Those file are real webm and mkv files truncated to 512 bytes.
 const tests = [
+  // Real webm and mkv files truncated to 512 bytes.
  { path: "data/file.webm", expected: "video/webm" },
  { path: "data/file.mkv", expected: "application/octet-stream" },
+  // MP3 files with and without id3 headers truncated to 512 bytes.
+  // NB these have 208/209 byte frames, but mp3 can require up to
+  // 1445 bytes to detect with our method.
+  { path: "data/id3tags.mp3", expected: "audio/mpeg" },
+  { path: "data/notags.mp3", expected: "audio/mpeg" },
+  // Padding bit flipped in the first header: sniffing should fail.
+  { path: "data/notags-bad.mp3", expected: "application/octet-stream" },
+  // Garbage before header: sniffing should fail.
+  { path: "data/notags-scan.mp3", expected: "application/octet-stream" },
+  // VBR from the layer III test patterns. We can't sniff this.
+  { path: "data/he_free.mp3", expected: "application/octet-stream" },
+  // Make sure we reject mp2, which has a similar header.
+  { path: "data/fl10.mp2", expected: "application/octet-stream" },
+  // Truncated ff installer regression test for bug 875769.
+  { path: "data/ff-inst.exe", expected: "application/octet-stream" },
 ];

 // A basic listener that reads checks the if we sniffed properly.
 var listener = {
  onStartRequest: function(request, context) {
+    do_print("Sniffing " + tests[testRan].path);
    do_check_eq(request.QueryInterface(Ci.nsIChannel).contentType, tests[testRan].expected);
  },

--- a/toolkit/components/mediasniffer/test/unit/xpcshell.ini
+++ b/toolkit/components/mediasniffer/test/unit/xpcshell.ini
@ -3,4 +3,4 @@ head =
 tail =

 [test_mediasniffer.js]
-[test_mediasniffer_webm.js]
+[test_mediasniffer_ext.js]