KDECore
CharDistribution.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #ifndef CharDistribution_h__
00027 #define CharDistribution_h__
00028
00029 #include "kdemacros.h"
00030
00031 #define ENOUGH_DATA_THRESHOLD 256
00032
00033 namespace kencodingprober {
00034 class KDE_NO_EXPORT CharDistributionAnalysis
00035 {
00036 public:
00037 CharDistributionAnalysis() {Reset();};
00038 virtual ~CharDistributionAnalysis() {};
00039
00040
00041 void HandleData(const char* , unsigned int ) {};
00042
00043
00044 void HandleOneChar(const char* aStr, unsigned int aCharLen)
00045 {
00046 int order;
00047
00048
00049 order = (aCharLen == 2) ? GetOrder(aStr) : -1;
00050
00051 if (order >= 0)
00052 {
00053 mTotalChars++;
00054
00055 if ((unsigned int)order < mTableSize)
00056 {
00057 if (512 > mCharToFreqOrder[order])
00058 mFreqChars++;
00059 }
00060 }
00061 };
00062
00063
00064 float GetConfidence();
00065
00066
00067 void Reset(void)
00068 {
00069 mDone = false;
00070 mTotalChars = 0;
00071 mFreqChars = 0;
00072 };
00073
00074
00075
00076 void SetOpion(){};
00077
00078
00079
00080 bool GotEnoughData() {return mTotalChars > ENOUGH_DATA_THRESHOLD;};
00081
00082 protected:
00083
00084
00085
00086 virtual int GetOrder(const char* ) {return -1;};
00087
00088
00089 bool mDone;
00090
00091
00092 unsigned int mFreqChars;
00093
00094
00095 unsigned int mTotalChars;
00096
00097
00098 const short *mCharToFreqOrder;
00099
00100
00101 unsigned int mTableSize;
00102
00103
00104
00105 float mTypicalDistributionRatio;
00106 };
00107
00108
00109 class KDE_NO_EXPORT EUCTWDistributionAnalysis: public CharDistributionAnalysis
00110 {
00111 public:
00112 EUCTWDistributionAnalysis();
00113 protected:
00114
00115
00116
00117
00118
00119 int GetOrder(const char* str)
00120 { if ((unsigned char)*str >= (unsigned char)0xc4)
00121 return 94*((unsigned char)str[0]-(unsigned char)0xc4) + (unsigned char)str[1] - (unsigned char)0xa1;
00122 else
00123 return -1;
00124 };
00125 };
00126
00127
00128 class KDE_NO_EXPORT EUCKRDistributionAnalysis : public CharDistributionAnalysis
00129 {
00130 public:
00131 EUCKRDistributionAnalysis();
00132 protected:
00133
00134
00135
00136
00137 int GetOrder(const char* str)
00138 { if ((unsigned char)*str >= (unsigned char)0xb0)
00139 return 94*((unsigned char)str[0]-(unsigned char)0xb0) + (unsigned char)str[1] - (unsigned char)0xa1;
00140 else
00141 return -1;
00142 };
00143 };
00144
00145 class KDE_NO_EXPORT GB2312DistributionAnalysis : public CharDistributionAnalysis
00146 {
00147 public:
00148 GB2312DistributionAnalysis();
00149 protected:
00150
00151
00152
00153
00154 int GetOrder(const char* str)
00155 { if ((unsigned char)*str >= (unsigned char)0xb0 && (unsigned char)str[1] >= (unsigned char)0xa1)
00156 return 94*((unsigned char)str[0]-(unsigned char)0xb0) + (unsigned char)str[1] - (unsigned char)0xa1;
00157 else
00158 return -1;
00159 };
00160 };
00161
00162
00163 class KDE_NO_EXPORT Big5DistributionAnalysis : public CharDistributionAnalysis
00164 {
00165 public:
00166 Big5DistributionAnalysis();
00167 protected:
00168
00169
00170
00171
00172 int GetOrder(const char* str)
00173 { if ((unsigned char)*str >= (unsigned char)0xa4)
00174 if ((unsigned char)str[1] >= (unsigned char)0xa1)
00175 return 157*((unsigned char)str[0]-(unsigned char)0xa4) + (unsigned char)str[1] - (unsigned char)0xa1 +63;
00176 else
00177 return 157*((unsigned char)str[0]-(unsigned char)0xa4) + (unsigned char)str[1] - (unsigned char)0x40;
00178 else
00179 return -1;
00180 };
00181 };
00182
00183 class KDE_NO_EXPORT SJISDistributionAnalysis : public CharDistributionAnalysis
00184 {
00185 public:
00186 SJISDistributionAnalysis();
00187 protected:
00188
00189
00190
00191
00192 int GetOrder(const char* str)
00193 {
00194 int order;
00195 if ((unsigned char)*str >= (unsigned char)0x81 && (unsigned char)*str <= (unsigned char)0x9f)
00196 order = 188 * ((unsigned char)str[0]-(unsigned char)0x81);
00197 else if ((unsigned char)*str >= (unsigned char)0xe0 && (unsigned char)*str <= (unsigned char)0xef)
00198 order = 188 * ((unsigned char)str[0]-(unsigned char)0xe0 + 31);
00199 else
00200 return -1;
00201 order += (unsigned char)*(str+1) - 0x40;
00202 if ((unsigned char)str[1] > (unsigned char)0x7f)
00203 order--;
00204 return order;
00205 };
00206 };
00207
00208 class KDE_NO_EXPORT EUCJPDistributionAnalysis : public CharDistributionAnalysis
00209 {
00210 public:
00211 EUCJPDistributionAnalysis();
00212 protected:
00213
00214
00215
00216
00217 int GetOrder(const char* str)
00218 { if ((unsigned char)*str >= (unsigned char)0xa0)
00219 return 94*((unsigned char)str[0]-(unsigned char)0xa1) + (unsigned char)str[1] - (unsigned char)0xa1;
00220 else
00221 return -1;
00222 };
00223 };
00224 }
00225 #endif //CharDistribution_h__
00226