2012-05-29 08:52:43 -07:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
2009-06-28 15:44:22 -07:00
|
|
|
|
|
|
|
#include "nsICharsetConverterManager.h"
|
|
|
|
#include "nsServiceManagerUtils.h"
|
|
|
|
#include "nsEncoderDecoderUtils.h"
|
|
|
|
#include "nsTraceRefcnt.h"
|
|
|
|
|
2012-11-07 15:04:22 -08:00
|
|
|
#include "mozilla/dom/EncodingUtils.h"
|
|
|
|
|
|
|
|
using mozilla::dom::EncodingUtils;
|
2009-06-28 15:44:22 -07:00
|
|
|
|
|
|
|
void
|
|
|
|
nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes, nsIUnicodeDecoder** decoder, nsACString& charset)
|
|
|
|
{
|
|
|
|
readable = bytes;
|
|
|
|
stateLoop(stateSave);
|
2012-07-30 07:20:58 -07:00
|
|
|
readable = nullptr;
|
2009-06-28 15:44:22 -07:00
|
|
|
if (mUnicodeDecoder) {
|
|
|
|
mUnicodeDecoder.forget(decoder);
|
|
|
|
charset.Assign(mCharset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-09-28 23:19:26 -07:00
|
|
|
bool
|
2009-06-28 15:44:22 -07:00
|
|
|
nsHtml5MetaScanner::tryCharset(nsString* charset)
|
|
|
|
{
|
2010-07-30 03:03:54 -07:00
|
|
|
// This code needs to stay in sync with
|
|
|
|
// nsHtml5StreamParser::internalEncodingDeclaration. Unfortunately, the
|
|
|
|
// trickery with member fields here leads to some copy-paste reuse. :-(
|
2009-06-28 15:44:22 -07:00
|
|
|
nsresult res = NS_OK;
|
|
|
|
nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res);
|
|
|
|
if (NS_FAILED(res)) {
|
|
|
|
NS_ERROR("Could not get CharsetConverterManager service.");
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
2012-09-01 19:35:17 -07:00
|
|
|
nsAutoCString encoding;
|
2009-06-28 15:44:22 -07:00
|
|
|
CopyUTF16toUTF8(*charset, encoding);
|
2011-01-12 00:05:09 -08:00
|
|
|
encoding.Trim(" \t\r\n\f");
|
2010-07-30 03:03:54 -07:00
|
|
|
if (encoding.LowerCaseEqualsLiteral("utf-16") ||
|
|
|
|
encoding.LowerCaseEqualsLiteral("utf-16be") ||
|
|
|
|
encoding.LowerCaseEqualsLiteral("utf-16le")) {
|
2009-10-21 05:21:27 -07:00
|
|
|
mCharset.Assign("UTF-8");
|
2009-06-28 15:44:22 -07:00
|
|
|
res = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
|
|
|
|
if (NS_FAILED(res)) {
|
|
|
|
NS_ERROR("Could not get decoder for UTF-8.");
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
2011-10-17 07:59:28 -07:00
|
|
|
return true;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
2012-09-01 19:35:17 -07:00
|
|
|
nsAutoCString preferred;
|
2012-11-07 15:04:22 -08:00
|
|
|
if (!EncodingUtils::FindEncodingForLabel(encoding, preferred)) {
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
2010-07-30 03:03:54 -07:00
|
|
|
if (preferred.LowerCaseEqualsLiteral("utf-16") ||
|
|
|
|
preferred.LowerCaseEqualsLiteral("utf-16be") ||
|
|
|
|
preferred.LowerCaseEqualsLiteral("utf-16le") ||
|
|
|
|
preferred.LowerCaseEqualsLiteral("utf-7") ||
|
2012-10-16 00:42:54 -07:00
|
|
|
preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7")) {
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
|
|
|
res = convManager->GetUnicodeDecoderRaw(preferred.get(), getter_AddRefs(mUnicodeDecoder));
|
|
|
|
if (res == NS_ERROR_UCONV_NOCONV) {
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
} else if (NS_FAILED(res)) {
|
|
|
|
NS_ERROR("Getting an encoding decoder failed in a bad way.");
|
2012-07-30 07:20:58 -07:00
|
|
|
mUnicodeDecoder = nullptr;
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
} else {
|
|
|
|
NS_ASSERTION(mUnicodeDecoder, "Getter nsresult and object don't match.");
|
|
|
|
mCharset.Assign(preferred);
|
2011-10-17 07:59:28 -07:00
|
|
|
return true;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
|
|
|
}
|