KDECore
nsSJISProber.cpp
Go to the documentation of this file.00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 00002 /* -*- C++ -*- 00003 * Copyright (C) 1998 <developer@mozilla.org> 00004 * 00005 * 00006 * Permission is hereby granted, free of charge, to any person obtaining 00007 * a copy of this software and associated documentation files (the 00008 * "Software"), to deal in the Software without restriction, including 00009 * without limitation the rights to use, copy, modify, merge, publish, 00010 * distribute, sublicense, and/or sell copies of the Software, and to 00011 * permit persons to whom the Software is furnished to do so, subject to 00012 * the following conditions: 00013 * 00014 * The above copyright notice and this permission notice shall be included 00015 * in all copies or substantial portions of the Software. 00016 * 00017 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00018 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00019 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 00020 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 00021 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 00022 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 00023 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00024 */ 00025 00026 // for S-JIS encoding, obeserve characteristic: 00027 // 1, kana character (or hankaku?) often have hight frequency of appereance 00028 // 2, kana character often exist in group 00029 // 3, certain combination of kana is never used in japanese language 00030 00031 00032 00033 #include "nsSJISProber.h" 00034 00035 namespace kencodingprober { 00036 void nsSJISProber::Reset(void) 00037 { 00038 mCodingSM->Reset(); 00039 mState = eDetecting; 00040 mContextAnalyser.Reset(); 00041 mDistributionAnalyser.Reset(); 00042 } 00043 00044 nsProbingState nsSJISProber::HandleData(const char* aBuf, unsigned int aLen) 00045 { 00046 nsSMState codingState; 00047 00048 for (unsigned int i = 0; i < aLen; i++) 00049 { 00050 codingState = mCodingSM->NextState(aBuf[i]); 00051 if (codingState == eError) 00052 { 00053 mState = eNotMe; 00054 break; 00055 } 00056 if (codingState == eItsMe) 00057 { 00058 mState = eFoundIt; 00059 break; 00060 } 00061 if (codingState == eStart) 00062 { 00063 unsigned int charLen = mCodingSM->GetCurrentCharLen(); 00064 if (i == 0) 00065 { 00066 mLastChar[1] = aBuf[0]; 00067 mContextAnalyser.HandleOneChar(mLastChar+2-charLen, charLen); 00068 mDistributionAnalyser.HandleOneChar(mLastChar, charLen); 00069 } 00070 else 00071 { 00072 mContextAnalyser.HandleOneChar(aBuf+i+1-charLen, charLen); 00073 mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen); 00074 } 00075 } 00076 } 00077 00078 mLastChar[0] = aBuf[aLen-1]; 00079 00080 if (mState == eDetecting) 00081 if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) 00082 mState = eFoundIt; 00083 00084 return mState; 00085 } 00086 00087 float nsSJISProber::GetConfidence(void) 00088 { 00089 float contxtCf = mContextAnalyser.GetConfidence(); 00090 float distribCf = mDistributionAnalyser.GetConfidence(); 00091 00092 return (contxtCf > distribCf ? contxtCf : distribCf); 00093 } 00094 } 00095 00096