2012-05-29 08:52:43 -07:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
2009-06-28 15:44:22 -07:00
|
|
|
|
|
|
|
#include "nsICharsetConverterManager.h"
|
|
|
|
#include "nsServiceManagerUtils.h"
|
2012-03-04 19:57:51 -08:00
|
|
|
#include "nsCharsetAlias.h"
|
2009-06-28 15:44:22 -07:00
|
|
|
#include "nsEncoderDecoderUtils.h"
|
|
|
|
#include "nsTraceRefcnt.h"
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
nsHtml5MetaScanner::sniff(nsHtml5ByteReadable* bytes, nsIUnicodeDecoder** decoder, nsACString& charset)
|
|
|
|
{
|
|
|
|
readable = bytes;
|
|
|
|
stateLoop(stateSave);
|
|
|
|
readable = nsnull;
|
|
|
|
if (mUnicodeDecoder) {
|
|
|
|
mUnicodeDecoder.forget(decoder);
|
|
|
|
charset.Assign(mCharset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-09-28 23:19:26 -07:00
|
|
|
bool
|
2009-06-28 15:44:22 -07:00
|
|
|
nsHtml5MetaScanner::tryCharset(nsString* charset)
|
|
|
|
{
|
2010-07-30 03:03:54 -07:00
|
|
|
// This code needs to stay in sync with
|
|
|
|
// nsHtml5StreamParser::internalEncodingDeclaration. Unfortunately, the
|
|
|
|
// trickery with member fields here leads to some copy-paste reuse. :-(
|
2009-06-28 15:44:22 -07:00
|
|
|
nsresult res = NS_OK;
|
|
|
|
nsCOMPtr<nsICharsetConverterManager> convManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &res);
|
|
|
|
if (NS_FAILED(res)) {
|
|
|
|
NS_ERROR("Could not get CharsetConverterManager service.");
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
|
|
|
nsCAutoString encoding;
|
|
|
|
CopyUTF16toUTF8(*charset, encoding);
|
2011-01-12 00:05:09 -08:00
|
|
|
encoding.Trim(" \t\r\n\f");
|
2010-07-30 03:03:54 -07:00
|
|
|
if (encoding.LowerCaseEqualsLiteral("utf-16") ||
|
|
|
|
encoding.LowerCaseEqualsLiteral("utf-16be") ||
|
|
|
|
encoding.LowerCaseEqualsLiteral("utf-16le")) {
|
2009-10-21 05:21:27 -07:00
|
|
|
mCharset.Assign("UTF-8");
|
2009-06-28 15:44:22 -07:00
|
|
|
res = convManager->GetUnicodeDecoderRaw(mCharset.get(), getter_AddRefs(mUnicodeDecoder));
|
|
|
|
if (NS_FAILED(res)) {
|
|
|
|
NS_ERROR("Could not get decoder for UTF-8.");
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
2011-10-17 07:59:28 -07:00
|
|
|
return true;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
|
|
|
nsCAutoString preferred;
|
2012-03-04 19:57:51 -08:00
|
|
|
res = nsCharsetAlias::GetPreferred(encoding, preferred);
|
2009-06-28 15:44:22 -07:00
|
|
|
if (NS_FAILED(res)) {
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
2010-07-30 03:03:54 -07:00
|
|
|
if (preferred.LowerCaseEqualsLiteral("utf-16") ||
|
|
|
|
preferred.LowerCaseEqualsLiteral("utf-16be") ||
|
|
|
|
preferred.LowerCaseEqualsLiteral("utf-16le") ||
|
|
|
|
preferred.LowerCaseEqualsLiteral("utf-7") ||
|
|
|
|
preferred.LowerCaseEqualsLiteral("jis_x0212-1990") ||
|
|
|
|
preferred.LowerCaseEqualsLiteral("x-jis0208") ||
|
|
|
|
preferred.LowerCaseEqualsLiteral("x-imap4-modified-utf7") ||
|
|
|
|
preferred.LowerCaseEqualsLiteral("x-user-defined")) {
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
|
|
|
res = convManager->GetUnicodeDecoderRaw(preferred.get(), getter_AddRefs(mUnicodeDecoder));
|
|
|
|
if (res == NS_ERROR_UCONV_NOCONV) {
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
} else if (NS_FAILED(res)) {
|
|
|
|
NS_ERROR("Getting an encoding decoder failed in a bad way.");
|
|
|
|
mUnicodeDecoder = nsnull;
|
2011-10-17 07:59:28 -07:00
|
|
|
return false;
|
2009-06-28 15:44:22 -07:00
|
|
|
} else {
|
|
|
|
NS_ASSERTION(mUnicodeDecoder, "Getter nsresult and object don't match.");
|
|
|
|
mCharset.Assign(preferred);
|
2011-10-17 07:59:28 -07:00
|
|
|
return true;
|
2009-06-28 15:44:22 -07:00
|
|
|
}
|
|
|
|
}
|