KDECore
JpCntx.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #ifndef __JPCNTX_H__
00027 #define __JPCNTX_H__
00028
00029 #include "kdemacros.h"
00030
00031 #define NUM_OF_CATEGORY 6
00032
00033 #define ENOUGH_REL_THRESHOLD 100
00034 #define MAX_REL_THRESHOLD 1000
00035 namespace kencodingprober {
00036
00037 extern const char jp2CharContext[83][83];
00038
00039 class KDE_NO_EXPORT JapaneseContextAnalysis
00040 {
00041 public:
00042 JapaneseContextAnalysis() {Reset();};
00043 virtual ~JapaneseContextAnalysis() {};
00044
00045 void HandleData(const char* aBuf, unsigned int aLen);
00046
00047 void HandleOneChar(const char* aStr, unsigned int aCharLen)
00048 {
00049 int order;
00050
00051
00052 if (mTotalRel > MAX_REL_THRESHOLD) mDone = true;
00053 if (mDone) return;
00054
00055
00056 order = (aCharLen == 2) ? GetOrder(aStr) : -1;
00057 if (order != -1 && mLastCharOrder != -1)
00058 {
00059 mTotalRel++;
00060
00061 mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++;
00062 }
00063 mLastCharOrder = order;
00064 };
00065
00066 float GetConfidence();
00067 void Reset(void);
00068 void SetOpion(){};
00069 bool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;};
00070
00071 protected:
00072 virtual int GetOrder(const char* str, unsigned int *charLen) = 0;
00073 virtual int GetOrder(const char* str) = 0;
00074
00075
00076 unsigned int mRelSample[NUM_OF_CATEGORY];
00077
00078
00079 unsigned int mTotalRel;
00080
00081
00082 int mLastCharOrder;
00083
00084
00085
00086 unsigned int mNeedToSkipCharNum;
00087
00088
00089 bool mDone;
00090 };
00091
00092
00093 class KDE_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis
00094 {
00095
00096 protected:
00097 int GetOrder(const char* str, unsigned int *charLen);
00098
00099 int GetOrder(const char* str)
00100 {
00101
00102 if (*str == '\202' &&
00103 (unsigned char)*(str+1) >= (unsigned char)0x9f &&
00104 (unsigned char)*(str+1) <= (unsigned char)0xf1)
00105 return (unsigned char)*(str+1) - (unsigned char)0x9f;
00106 return -1;
00107 };
00108 };
00109
00110 class KDE_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis
00111 {
00112 protected:
00113 int GetOrder(const char* str, unsigned int *charLen);
00114 int GetOrder(const char* str)
00115
00116 {
00117 if (*str == '\244' &&
00118 (unsigned char)*(str+1) >= (unsigned char)0xa1 &&
00119 (unsigned char)*(str+1) <= (unsigned char)0xf3)
00120 return (unsigned char)*(str+1) - (unsigned char)0xa1;
00121 return -1;
00122 };
00123 };
00124 }
00125 #endif
00126