org.jcodings
Class Encoding

java.lang.Object
  extended by org.jcodings.Encoding
All Implemented Interfaces:
Cloneable
Direct Known Subclasses:
MultiByteEncoding, SingleByteEncoding

public abstract class Encoding
extends Object
implements Cloneable


Field Summary
static int CHAR_INVALID
           
protected  int hashCode
           
protected  boolean isAsciiCompatible
           
protected  boolean isDummy
           
protected  boolean isFixedWidth
           
protected  boolean isSingleByte
           
protected  int maxLength
           
protected  int minLength
           
protected  byte[] name
           
static byte NEW_LINE
           
 
Constructor Summary
protected Encoding(String name, int minLength, int maxLength)
           
protected Encoding(String name, int minLength, int maxLength, boolean isDummy)
           
 
Method Summary
abstract  void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, Object arg)
          Expand case folds given a character class (used for case insensitive matching)
static byte asciiToLower(int c)
           
static byte asciiToUpper(int c)
           
abstract  CaseFoldCodeItem[] caseFoldCodesByString(int flag, byte[] bytes, int p, int end)
          Expand AST string nodes into their folded alternatives (look at: Analyser.expandCaseFoldString) Oniguruma equivalent: get_case_fold_codes_by_str
abstract  int codeToMbc(int code, byte[] bytes, int p)
          Extracts code point into it's multibyte representation
abstract  int codeToMbcLength(int code)
          Returns character length given a code point Oniguruma equivalent: code_to_mbclen
abstract  int[] ctypeCodeRange(int ctype, IntHolder sbOut)
          Returns code range for a given character type Oniguruma equivalent: get_ctype_code_range
static int digitVal(int code)
           
 boolean equals(Object other)
           
 int getIndex()
           
 byte[] getName()
           
 int hashCode()
           
 boolean isAlnum(int code)
           
 boolean isAlpha(int code)
           
static boolean isAscii(byte b)
           
static boolean isAscii(int code)
           
 boolean isAsciiCompatible()
           
 boolean isBlank(int code)
           
 boolean isCntrl(int code)
           
abstract  boolean isCodeCType(int code, int ctype)
          Perform a check whether given code is of given character type (e.g.
 boolean isDigit(int code)
           
 boolean isDummy()
           
 boolean isFixedWidth()
           
 boolean isGraph(int code)
           
 boolean isLower(int code)
           
static boolean isMbcAscii(byte b)
           
 boolean isMbcCrnl(byte[] bytes, int p, int end)
           
 boolean isMbcHead(byte[] bytes, int p, int end)
           
 boolean isMbcWord(byte[] bytes, int p, int end)
           
abstract  boolean isNewLine(byte[] bytes, int p, int end)
          Returns true if bytes[p] is a head of a new line character Oniguruma equivalent: is_mbc_newline
 boolean isNewLine(int code)
           
 boolean isPrint(int code)
           
 boolean isPunct(int code)
           
abstract  boolean isReverseMatchAllowed(byte[] bytes, int p, int end)
          Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent: is_allowed_reverse_match
 boolean isSbWord(int code)
           
 boolean isSingleByte()
           
 boolean isSpace(int code)
           
 boolean isUpper(int code)
           
 boolean isWord(int code)
           
static boolean isWordGraphPrint(int ctype)
           
 boolean isXDigit(int code)
           
abstract  int leftAdjustCharHead(byte[] bytes, int p, int s, int end)
          Seeks the previous character head in a stream Oniguruma equivalent: left_adjust_char_head
abstract  int length(byte c)
          Returns character length given character head returns 1 for singlebyte encodings or performs direct length table lookup for multibyte ones.
abstract  int length(byte[] bytes, int p, int end)
          Returns character length given stream, character position and stream end returns 1 for singlebyte encodings or performs sanity validations for multibyte ones and returns the character length, missing characters in the stream otherwise
static Encoding load(String name)
           
 int maxLength()
          Returns maximum character byte length that can appear in an encoding Oniguruma equivalent: max_enc_len
 int maxLengthDistance()
           
abstract  int mbcCaseFold(int flag, byte[] bytes, IntHolder pp, int end, byte[] to)
          Performs case folding for a character at bytes[pp.value]
 int mbcodeStartPosition()
           
abstract  int mbcToCode(byte[] bytes, int p, int end)
          Returns code point for a character Oniguruma equivalent: mbc_to_code
 int minLength()
          Returns minimum character byte length that can appear in an encoding Oniguruma equivalent: min_enc_len
static int odigitVal(int code)
           
 int prevCharHead(byte[] bytes, int p, int s, int end)
           
abstract  int propertyNameToCType(byte[] bytes, int p, int end)
          Returns character type given character type name (used when e.g.
 Encoding replicate(byte[] name)
           
 int rightAdjustCharHead(byte[] bytes, int p, int s, int end)
           
 int rightAdjustCharHeadWithPrev(byte[] bytes, int p, int s, int end, IntHolder prev)
           
protected  void setName(byte[] name)
           
protected  void setName(String name)
           
 int step(byte[] bytes, int p, int end, int n)
           
 int stepBack(byte[] bytes, int p, int s, int end, int n)
           
 int strByteLengthNull(byte[] bytes, int p, int end)
           
abstract  int strCodeAt(byte[] bytes, int p, int end, int index)
           
abstract  int strLength(byte[] bytes, int p, int end)
           
 int strLengthNull(byte[] bytes, int p, int end)
           
 int strNCmp(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n)
           
 byte[] toLowerCaseTable()
          Returns lower case table if it's safe to use it directly, otherwise null Used for fast case insensitive matching for some singlebyte encodings
 String toString()
           
 int xdigitVal(int code)
           
 
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
 

Field Detail

CHAR_INVALID

public static final int CHAR_INVALID
See Also:
Constant Field Values

minLength

protected final int minLength

maxLength

protected final int maxLength

isFixedWidth

protected final boolean isFixedWidth

isSingleByte

protected final boolean isSingleByte

isDummy

protected final boolean isDummy

isAsciiCompatible

protected final boolean isAsciiCompatible

name

protected byte[] name

hashCode

protected int hashCode

NEW_LINE

public static final byte NEW_LINE
See Also:
Constant Field Values
Constructor Detail

Encoding

protected Encoding(String name,
                   int minLength,
                   int maxLength,
                   boolean isDummy)

Encoding

protected Encoding(String name,
                   int minLength,
                   int maxLength)
Method Detail

setName

protected final void setName(String name)

setName

protected final void setName(byte[] name)

toString

public final String toString()
Overrides:
toString in class Object

equals

public final boolean equals(Object other)
Overrides:
equals in class Object

hashCode

public final int hashCode()
Overrides:
hashCode in class Object

getIndex

public final int getIndex()

getName

public final byte[] getName()

isDummy

public final boolean isDummy()

isAsciiCompatible

public final boolean isAsciiCompatible()

replicate

public Encoding replicate(byte[] name)

length

public abstract int length(byte c)
Returns character length given character head returns 1 for singlebyte encodings or performs direct length table lookup for multibyte ones.

Parameters:
c - Character head Oniguruma equivalent: mbc_enc_len To be deprecated very soon (use length(byte[]bytes, int p, int end) version)

length

public abstract int length(byte[] bytes,
                           int p,
                           int end)
Returns character length given stream, character position and stream end returns 1 for singlebyte encodings or performs sanity validations for multibyte ones and returns the character length, missing characters in the stream otherwise

Returns:
0 Never > 0 Valid character, length returned -1 Illegal/malformed character < -1 (-1 - n) Number of missing bytes for character in p...end range Oniguruma equivalent: mbc_enc_len modified for 1.9 purposes,

maxLength

public final int maxLength()
Returns maximum character byte length that can appear in an encoding Oniguruma equivalent: max_enc_len


maxLengthDistance

public final int maxLengthDistance()

minLength

public final int minLength()
Returns minimum character byte length that can appear in an encoding Oniguruma equivalent: min_enc_len


isNewLine

public abstract boolean isNewLine(byte[] bytes,
                                  int p,
                                  int end)
Returns true if bytes[p] is a head of a new line character Oniguruma equivalent: is_mbc_newline


mbcToCode

public abstract int mbcToCode(byte[] bytes,
                              int p,
                              int end)
Returns code point for a character Oniguruma equivalent: mbc_to_code


codeToMbcLength

public abstract int codeToMbcLength(int code)
Returns character length given a code point Oniguruma equivalent: code_to_mbclen


codeToMbc

public abstract int codeToMbc(int code,
                              byte[] bytes,
                              int p)
Extracts code point into it's multibyte representation

Returns:
character length for the given code point Oniguruma equivalent: code_to_mbc

mbcCaseFold

public abstract int mbcCaseFold(int flag,
                                byte[] bytes,
                                IntHolder pp,
                                int end,
                                byte[] to)
Performs case folding for a character at bytes[pp.value]

Parameters:
flag - case fold flag
pp - an IntHolder that points at character head
to - a buffer where to extract case folded character Oniguruma equivalent: mbc_case_fold

toLowerCaseTable

public byte[] toLowerCaseTable()
Returns lower case table if it's safe to use it directly, otherwise null Used for fast case insensitive matching for some singlebyte encodings

Returns:
lower case table

applyAllCaseFold

public abstract void applyAllCaseFold(int flag,
                                      ApplyAllCaseFoldFunction fun,
                                      Object arg)
Expand case folds given a character class (used for case insensitive matching)

Parameters:
flag - case fold flag
fun - case folding functor (look at: ApplyCaseFold)
arg - case folding functor argument (look at: ApplyCaseFoldArg) Oniguruma equivalent: apply_all_case_fold

caseFoldCodesByString

public abstract CaseFoldCodeItem[] caseFoldCodesByString(int flag,
                                                         byte[] bytes,
                                                         int p,
                                                         int end)
Expand AST string nodes into their folded alternatives (look at: Analyser.expandCaseFoldString) Oniguruma equivalent: get_case_fold_codes_by_str


propertyNameToCType

public abstract int propertyNameToCType(byte[] bytes,
                                        int p,
                                        int end)
Returns character type given character type name (used when e.g. \p{Alpha}) Oniguruma equivalent: property_name_to_ctype


isCodeCType

public abstract boolean isCodeCType(int code,
                                    int ctype)
Perform a check whether given code is of given character type (e.g. used by isWord(someByte) and similar methods)

Parameters:
code - a code point of a character
ctype - a character type to check against Oniguruma equivalent: is_code_ctype

ctypeCodeRange

public abstract int[] ctypeCodeRange(int ctype,
                                     IntHolder sbOut)
Returns code range for a given character type Oniguruma equivalent: get_ctype_code_range


leftAdjustCharHead

public abstract int leftAdjustCharHead(byte[] bytes,
                                       int p,
                                       int s,
                                       int end)
Seeks the previous character head in a stream Oniguruma equivalent: left_adjust_char_head

Parameters:
bytes - byte stream
p - position
s - stop
end - end

isReverseMatchAllowed

public abstract boolean isReverseMatchAllowed(byte[] bytes,
                                              int p,
                                              int end)
Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent: is_allowed_reverse_match


rightAdjustCharHead

public final int rightAdjustCharHead(byte[] bytes,
                                     int p,
                                     int s,
                                     int end)

rightAdjustCharHeadWithPrev

public final int rightAdjustCharHeadWithPrev(byte[] bytes,
                                             int p,
                                             int s,
                                             int end,
                                             IntHolder prev)

prevCharHead

public final int prevCharHead(byte[] bytes,
                              int p,
                              int s,
                              int end)

stepBack

public final int stepBack(byte[] bytes,
                          int p,
                          int s,
                          int end,
                          int n)

step

public final int step(byte[] bytes,
                      int p,
                      int end,
                      int n)

strLength

public abstract int strLength(byte[] bytes,
                              int p,
                              int end)

strCodeAt

public abstract int strCodeAt(byte[] bytes,
                              int p,
                              int end,
                              int index)

strLengthNull

public final int strLengthNull(byte[] bytes,
                               int p,
                               int end)

strByteLengthNull

public final int strByteLengthNull(byte[] bytes,
                                   int p,
                                   int end)

strNCmp

public final int strNCmp(byte[] bytes,
                         int p,
                         int end,
                         byte[] ascii,
                         int asciiP,
                         int n)

isNewLine

public final boolean isNewLine(int code)

isGraph

public final boolean isGraph(int code)

isPrint

public final boolean isPrint(int code)

isAlnum

public final boolean isAlnum(int code)

isAlpha

public final boolean isAlpha(int code)

isLower

public final boolean isLower(int code)

isUpper

public final boolean isUpper(int code)

isCntrl

public final boolean isCntrl(int code)

isPunct

public final boolean isPunct(int code)

isSpace

public final boolean isSpace(int code)

isBlank

public final boolean isBlank(int code)

isDigit

public final boolean isDigit(int code)

isXDigit

public final boolean isXDigit(int code)

isWord

public final boolean isWord(int code)

isMbcWord

public final boolean isMbcWord(byte[] bytes,
                               int p,
                               int end)

isSbWord

public final boolean isSbWord(int code)

isMbcHead

public final boolean isMbcHead(byte[] bytes,
                               int p,
                               int end)

isMbcCrnl

public boolean isMbcCrnl(byte[] bytes,
                         int p,
                         int end)

digitVal

public static int digitVal(int code)

odigitVal

public static int odigitVal(int code)

xdigitVal

public final int xdigitVal(int code)

isMbcAscii

public static boolean isMbcAscii(byte b)

isAscii

public static boolean isAscii(int code)

isAscii

public static boolean isAscii(byte b)

asciiToLower

public static byte asciiToLower(int c)

asciiToUpper

public static byte asciiToUpper(int c)

isWordGraphPrint

public static boolean isWordGraphPrint(int ctype)

mbcodeStartPosition

public final int mbcodeStartPosition()

isSingleByte

public final boolean isSingleByte()

isFixedWidth

public final boolean isFixedWidth()

load

public static Encoding load(String name)


Copyright © 2011. All Rights Reserved.