Package nu.validator.htmlparser.impl
Class MetaScanner
- java.lang.Object
-
- nu.validator.htmlparser.impl.MetaScanner
-
- Direct Known Subclasses:
MetaSniffer
public abstract class MetaScanner extends java.lang.Object
-
-
Field Summary
Fields Modifier and Type Field Description private static intAprivate static intAFTER_ATTRIBUTE_NAMEprivate static intAFTER_ATTRIBUTE_VALUE_QUOTEDprivate static intATTRIBUTE_NAMEprivate static intATTRIBUTE_VALUE_DOUBLE_QUOTEDprivate static intATTRIBUTE_VALUE_SINGLE_QUOTEDprivate static intATTRIBUTE_VALUE_UNQUOTEDprivate static intBEFORE_ATTRIBUTE_NAMEprivate static intBEFORE_ATTRIBUTE_VALUEprivate java.lang.Stringcharsetprivate static char[]CHARSETConstant for "charset".private intcharsetIndexThe current position in recognizing the attribute name "charset".private static intCOMMENTprivate static intCOMMENT_ENDprivate static intCOMMENT_END_DASHprivate static intCOMMENT_STARTprivate static intCOMMENT_START_DASHprivate java.lang.Stringcontentprivate static char[]CONTENTConstant for "content".private static char[]CONTENT_TYPEConstant for "content-type".private intcontentIndexThe current position in recognizing the attribute name "content".private intcontentTypeIndexThe current position in recognizing the attribute value "content-type".private static intDATAprivate static intEprivate static char[]HTTP_EQUIVConstant for "http-equiv".private static intHTTP_EQUIV_CONTENT_TYPEprivate static intHTTP_EQUIV_NOT_SEENprivate static intHTTP_EQUIV_OTHERprivate inthttpEquivIndexThe current position in recognizing the attribute name "http-equive".private inthttpEquivStateprivate static intMprivate static intMARKUP_DECLARATION_HYPHENprivate static intMARKUP_DECLARATION_OPENprivate intmetaStateThe state of the state machine that recognizes the tag name "meta".private static intNOprotected ByteReadablereadableThe data source.private static intSCAN_UNTIL_GTprivate static intSELF_CLOSING_START_TAGprotected intstateSaveThe tokenizer state.private char[]strBufAccumulation buffer for attribute values.private intstrBufLenThe currently filled length of strBuf.private static intTprivate static intTAG_NAMEprivate static intTAG_OPEN
-
Constructor Summary
Constructors Constructor Description MetaScanner()
-
Method Summary
All Methods Instance Methods Abstract Methods Concrete Methods Modifier and Type Method Description private voidaddToBuffer(int c)Adds a character to the accumulation buffer.private voiddestructor()private voidhandleAttributeValue()Attempts to extract a charset name from the accumulation buffer.private voidhandleCharInAttributeValue(int c)private booleanhandleTag()private booleanhandleTagInner()protected intread()Reads a byte from the data source.protected voidstateLoop(int state)The runs the meta scanning algorithm.private inttoAsciiLowerCase(int c)protected abstract booleantryCharset(java.lang.String encoding)Tries to switch to an encoding.
-
-
-
Field Detail
-
CHARSET
private static final char[] CHARSET
Constant for "charset".
-
CONTENT
private static final char[] CONTENT
Constant for "content".
-
HTTP_EQUIV
private static final char[] HTTP_EQUIV
Constant for "http-equiv".
-
CONTENT_TYPE
private static final char[] CONTENT_TYPE
Constant for "content-type".
-
NO
private static final int NO
- See Also:
- Constant Field Values
-
M
private static final int M
- See Also:
- Constant Field Values
-
E
private static final int E
- See Also:
- Constant Field Values
-
T
private static final int T
- See Also:
- Constant Field Values
-
A
private static final int A
- See Also:
- Constant Field Values
-
DATA
private static final int DATA
- See Also:
- Constant Field Values
-
TAG_OPEN
private static final int TAG_OPEN
- See Also:
- Constant Field Values
-
SCAN_UNTIL_GT
private static final int SCAN_UNTIL_GT
- See Also:
- Constant Field Values
-
TAG_NAME
private static final int TAG_NAME
- See Also:
- Constant Field Values
-
BEFORE_ATTRIBUTE_NAME
private static final int BEFORE_ATTRIBUTE_NAME
- See Also:
- Constant Field Values
-
ATTRIBUTE_NAME
private static final int ATTRIBUTE_NAME
- See Also:
- Constant Field Values
-
AFTER_ATTRIBUTE_NAME
private static final int AFTER_ATTRIBUTE_NAME
- See Also:
- Constant Field Values
-
BEFORE_ATTRIBUTE_VALUE
private static final int BEFORE_ATTRIBUTE_VALUE
- See Also:
- Constant Field Values
-
ATTRIBUTE_VALUE_DOUBLE_QUOTED
private static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED
- See Also:
- Constant Field Values
-
ATTRIBUTE_VALUE_SINGLE_QUOTED
private static final int ATTRIBUTE_VALUE_SINGLE_QUOTED
- See Also:
- Constant Field Values
-
ATTRIBUTE_VALUE_UNQUOTED
private static final int ATTRIBUTE_VALUE_UNQUOTED
- See Also:
- Constant Field Values
-
AFTER_ATTRIBUTE_VALUE_QUOTED
private static final int AFTER_ATTRIBUTE_VALUE_QUOTED
- See Also:
- Constant Field Values
-
MARKUP_DECLARATION_OPEN
private static final int MARKUP_DECLARATION_OPEN
- See Also:
- Constant Field Values
-
MARKUP_DECLARATION_HYPHEN
private static final int MARKUP_DECLARATION_HYPHEN
- See Also:
- Constant Field Values
-
COMMENT_START
private static final int COMMENT_START
- See Also:
- Constant Field Values
-
COMMENT_START_DASH
private static final int COMMENT_START_DASH
- See Also:
- Constant Field Values
-
COMMENT
private static final int COMMENT
- See Also:
- Constant Field Values
-
COMMENT_END_DASH
private static final int COMMENT_END_DASH
- See Also:
- Constant Field Values
-
COMMENT_END
private static final int COMMENT_END
- See Also:
- Constant Field Values
-
SELF_CLOSING_START_TAG
private static final int SELF_CLOSING_START_TAG
- See Also:
- Constant Field Values
-
HTTP_EQUIV_NOT_SEEN
private static final int HTTP_EQUIV_NOT_SEEN
- See Also:
- Constant Field Values
-
HTTP_EQUIV_CONTENT_TYPE
private static final int HTTP_EQUIV_CONTENT_TYPE
- See Also:
- Constant Field Values
-
HTTP_EQUIV_OTHER
private static final int HTTP_EQUIV_OTHER
- See Also:
- Constant Field Values
-
readable
protected ByteReadable readable
The data source.
-
metaState
private int metaState
The state of the state machine that recognizes the tag name "meta".
-
contentIndex
private int contentIndex
The current position in recognizing the attribute name "content".
-
charsetIndex
private int charsetIndex
The current position in recognizing the attribute name "charset".
-
httpEquivIndex
private int httpEquivIndex
The current position in recognizing the attribute name "http-equive".
-
contentTypeIndex
private int contentTypeIndex
The current position in recognizing the attribute value "content-type".
-
stateSave
protected int stateSave
The tokenizer state.
-
strBufLen
private int strBufLen
The currently filled length of strBuf.
-
strBuf
private char[] strBuf
Accumulation buffer for attribute values.
-
content
private java.lang.String content
-
charset
private java.lang.String charset
-
httpEquivState
private int httpEquivState
-
-
Method Detail
-
destructor
private void destructor()
-
read
protected int read() throws java.io.IOExceptionReads a byte from the data source. -1 means end.- Returns:
- Throws:
java.io.IOException
-
stateLoop
protected final void stateLoop(int state) throws org.xml.sax.SAXException, java.io.IOExceptionThe runs the meta scanning algorithm.- Throws:
org.xml.sax.SAXExceptionjava.io.IOException
-
handleCharInAttributeValue
private void handleCharInAttributeValue(int c)
-
toAsciiLowerCase
private int toAsciiLowerCase(int c)
-
addToBuffer
private void addToBuffer(int c)
Adds a character to the accumulation buffer.- Parameters:
c- the character to add
-
handleAttributeValue
private void handleAttributeValue() throws org.xml.sax.SAXExceptionAttempts to extract a charset name from the accumulation buffer.- Throws:
org.xml.sax.SAXException
-
handleTag
private boolean handleTag() throws org.xml.sax.SAXException- Throws:
org.xml.sax.SAXException
-
handleTagInner
private boolean handleTagInner() throws org.xml.sax.SAXException- Throws:
org.xml.sax.SAXException
-
tryCharset
protected abstract boolean tryCharset(java.lang.String encoding) throws org.xml.sax.SAXExceptionTries to switch to an encoding.- Parameters:
encoding-- Returns:
trueif successful- Throws:
org.xml.sax.SAXException
-
-