2007-03-22 10:30:00 -07:00
|
|
|
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
2012-05-21 04:12:37 -07:00
|
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
2007-03-22 10:30:00 -07:00
|
|
|
#include "nscore.h"
|
|
|
|
#include "nsCyrillicProb.h"
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
#include "nsCOMPtr.h"
|
|
|
|
#include "nsISupports.h"
|
|
|
|
#include "nsICharsetDetector.h"
|
2013-01-13 10:06:28 -08:00
|
|
|
#include "nsICharsetDetectionObserver.h"
|
|
|
|
#include "nsIStringCharsetDetector.h"
|
2007-03-22 10:30:00 -07:00
|
|
|
#include "nsCyrillicDetector.h"
|
|
|
|
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// Interface nsISupports [implementation]
|
2014-04-27 00:06:00 -07:00
|
|
|
NS_IMPL_ISUPPORTS(nsCyrXPCOMDetector, nsICharsetDetector)
|
|
|
|
NS_IMPL_ISUPPORTS(nsCyrXPCOMStringDetector, nsIStringCharsetDetector)
|
2007-03-22 10:30:00 -07:00
|
|
|
|
2012-08-22 08:56:38 -07:00
|
|
|
void nsCyrillicDetector::HandleData(const char* aBuf, uint32_t aLen)
|
2007-03-22 10:30:00 -07:00
|
|
|
{
|
2012-08-22 08:56:38 -07:00
|
|
|
uint8_t cls;
|
2007-03-22 10:30:00 -07:00
|
|
|
const char* b;
|
2012-08-22 08:56:38 -07:00
|
|
|
uint32_t i;
|
2007-03-22 10:30:00 -07:00
|
|
|
if(mDone)
|
|
|
|
return;
|
|
|
|
for(i=0, b=aBuf;i<aLen;i++,b++)
|
|
|
|
{
|
2012-08-22 08:56:38 -07:00
|
|
|
for(unsigned j=0;j<mItems;j++)
|
2007-03-22 10:30:00 -07:00
|
|
|
{
|
|
|
|
if( 0x80 & *b)
|
|
|
|
cls = mCyrillicClass[j][(*b) & 0x7F];
|
|
|
|
else
|
|
|
|
cls = 0;
|
|
|
|
NS_ASSERTION( cls <= 32 , "illegal character class");
|
|
|
|
mProb[j] += gCyrillicProb[mLastCls[j]][cls];
|
|
|
|
mLastCls[j] = cls;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// We now only based on the first block we receive
|
|
|
|
DataEnd();
|
|
|
|
}
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
#define THRESHOLD_RATIO 1.5f
|
|
|
|
void nsCyrillicDetector::DataEnd()
|
|
|
|
{
|
2012-08-22 08:56:38 -07:00
|
|
|
uint32_t max=0;
|
|
|
|
uint8_t maxIdx=0;
|
|
|
|
uint8_t j;
|
2007-03-22 10:30:00 -07:00
|
|
|
if(mDone)
|
|
|
|
return;
|
|
|
|
for(j=0;j<mItems;j++) {
|
|
|
|
if(mProb[j] > max)
|
|
|
|
{
|
|
|
|
max = mProb[j];
|
|
|
|
maxIdx= j;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if( 0 == max ) // if we didn't get any 8 bits data
|
|
|
|
return;
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
for(j=0;j<mItems;j++)
|
|
|
|
printf("Charset %s->\t%d\n", mCharsets[j], mProb[j]);
|
|
|
|
#endif
|
|
|
|
this->Report(mCharsets[maxIdx]);
|
2011-10-17 07:59:28 -07:00
|
|
|
mDone = true;
|
2007-03-22 10:30:00 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
2012-08-22 08:56:38 -07:00
|
|
|
nsCyrXPCOMDetector:: nsCyrXPCOMDetector(uint8_t aItems,
|
|
|
|
const uint8_t ** aCyrillicClass,
|
2007-03-22 10:30:00 -07:00
|
|
|
const char **aCharsets)
|
|
|
|
: nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
|
|
|
|
{
|
2012-07-30 07:20:58 -07:00
|
|
|
mObserver = nullptr;
|
2007-03-22 10:30:00 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
nsCyrXPCOMDetector::~nsCyrXPCOMDetector()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
NS_IMETHODIMP nsCyrXPCOMDetector::Init(
|
|
|
|
nsICharsetDetectionObserver* aObserver)
|
|
|
|
{
|
2012-07-30 07:20:58 -07:00
|
|
|
NS_ASSERTION(mObserver == nullptr , "Init twice");
|
|
|
|
if(nullptr == aObserver)
|
2007-03-22 10:30:00 -07:00
|
|
|
return NS_ERROR_ILLEGAL_VALUE;
|
|
|
|
|
|
|
|
mObserver = aObserver;
|
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------
|
|
|
|
NS_IMETHODIMP nsCyrXPCOMDetector::DoIt(
|
2012-08-22 08:56:38 -07:00
|
|
|
const char* aBuf, uint32_t aLen, bool* oDontFeedMe)
|
2007-03-22 10:30:00 -07:00
|
|
|
{
|
2012-07-30 07:20:58 -07:00
|
|
|
NS_ASSERTION(mObserver != nullptr , "have not init yet");
|
2007-03-22 10:30:00 -07:00
|
|
|
|
2012-07-30 07:20:58 -07:00
|
|
|
if((nullptr == aBuf) || (nullptr == oDontFeedMe))
|
2007-03-22 10:30:00 -07:00
|
|
|
return NS_ERROR_ILLEGAL_VALUE;
|
|
|
|
|
|
|
|
this->HandleData(aBuf, aLen);
|
2011-10-17 07:59:28 -07:00
|
|
|
*oDontFeedMe = false;
|
2007-03-22 10:30:00 -07:00
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------
|
|
|
|
NS_IMETHODIMP nsCyrXPCOMDetector::Done()
|
|
|
|
{
|
2012-07-30 07:20:58 -07:00
|
|
|
NS_ASSERTION(mObserver != nullptr , "have not init yet");
|
2007-03-22 10:30:00 -07:00
|
|
|
this->DataEnd();
|
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
//----------------------------------------------------------
|
|
|
|
void nsCyrXPCOMDetector::Report(const char* aCharset)
|
|
|
|
{
|
2012-07-30 07:20:58 -07:00
|
|
|
NS_ASSERTION(mObserver != nullptr , "have not init yet");
|
2007-03-22 10:30:00 -07:00
|
|
|
mObserver->Notify(aCharset, eBestAnswer);
|
|
|
|
}
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
2012-08-22 08:56:38 -07:00
|
|
|
nsCyrXPCOMStringDetector:: nsCyrXPCOMStringDetector(uint8_t aItems,
|
|
|
|
const uint8_t ** aCyrillicClass,
|
2007-03-22 10:30:00 -07:00
|
|
|
const char **aCharsets)
|
|
|
|
: nsCyrillicDetector(aItems, aCyrillicClass, aCharsets)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
nsCyrXPCOMStringDetector::~nsCyrXPCOMStringDetector()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
void nsCyrXPCOMStringDetector::Report(const char *aCharset)
|
|
|
|
{
|
|
|
|
mResult = aCharset;
|
|
|
|
}
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
2012-08-22 08:56:38 -07:00
|
|
|
NS_IMETHODIMP nsCyrXPCOMStringDetector::DoIt(const char* aBuf, uint32_t aLen,
|
2007-03-22 10:30:00 -07:00
|
|
|
const char** oCharset, nsDetectionConfident &oConf)
|
|
|
|
{
|
2012-07-30 07:20:58 -07:00
|
|
|
mResult = nullptr;
|
2011-10-17 07:59:28 -07:00
|
|
|
mDone = false;
|
2007-03-22 10:30:00 -07:00
|
|
|
this->HandleData(aBuf, aLen);
|
|
|
|
this->DataEnd();
|
|
|
|
*oCharset=mResult;
|
|
|
|
oConf = eBestAnswer;
|
|
|
|
return NS_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
|