final class XmlParser extends Object
SAXDriver
class as your entry point, as the
internal parser interfaces are subject to change.SAXDriver
Modifier and Type | Field and Description |
---|---|
static int |
ATTRIBUTE_CDATA
Constant: the attribute value is a string value.
|
static int |
ATTRIBUTE_DEFAULT_FIXED
Constant: the attribute was declared #FIXED.
|
static int |
ATTRIBUTE_DEFAULT_IMPLIED
Constant: the attribute was declared #IMPLIED.
|
static int |
ATTRIBUTE_DEFAULT_REQUIRED
Constant: the attribute was declared #REQUIRED.
|
static int |
ATTRIBUTE_DEFAULT_SPECIFIED
Constant: the attribute has a literal default value specified.
|
static int |
ATTRIBUTE_DEFAULT_UNDECLARED
Constant: the attribute is not declared.
|
static int |
ATTRIBUTE_ENTITIES
Constant: the attribute value is a list of entity names.
|
static int |
ATTRIBUTE_ENTITY
Constant: the attribute value is the name of an entity.
|
static int |
ATTRIBUTE_ENUMERATED
Constant: the attribute value is a token from an enumeration.
|
static int |
ATTRIBUTE_ID
Constant: the attribute value is a unique identifier.
|
static int |
ATTRIBUTE_IDREF
Constant: the attribute value is a reference to a unique identifier.
|
static int |
ATTRIBUTE_IDREFS
Constant: the attribute value is a list of ID references.
|
static int |
ATTRIBUTE_NMTOKEN
Constant: the attribute value is a name token.
|
static int |
ATTRIBUTE_NMTOKENS
Constant: the attribute value is a list of name tokens.
|
static int |
ATTRIBUTE_NOTATION
Constant: the attribute is the name of a notation.
|
static int |
ATTRIBUTE_UNDECLARED
Constant: the attribute has not been declared for this element type.
|
private static HashMap |
attributeTypeHash
Hash table of attribute types.
|
private int |
baseEncoding |
private char[] |
baseInputBuffer |
private int |
baseInputBufferLength |
private int |
baseInputBufferStart |
private InputStream |
baseInputStream |
private String |
basePublicId |
private Reader |
baseReader |
private String |
baseURI |
private int |
column |
static int |
CONTENT_ANY
Constant: the element has a content model of ANY.
|
static int |
CONTENT_ELEMENTS
Constant: the element has element content.
|
static int |
CONTENT_EMPTY
Constant: the element has declared content of EMPTY.
|
static int |
CONTENT_MIXED
Constant: the element has mixed content.
|
static int |
CONTENT_UNDECLARED
Constant: an element has not been declared.
|
private static int |
CONTEXT_LITERAL |
private static int |
CONTEXT_NORMAL |
private int |
currentByteCount |
private String |
currentElement |
private int |
currentElementContent |
private static int |
DATA_BUFFER_INITIAL |
private char[] |
dataBuffer |
private int |
dataBufferPos |
private HashMap |
elementInfo |
private int |
encoding |
private static int |
ENCODING_ASCII |
private static int |
ENCODING_EXTERNAL |
private static int |
ENCODING_ISO_8859_1 |
private static int |
ENCODING_UCS_2_12 |
private static int |
ENCODING_UCS_2_21 |
private static int |
ENCODING_UCS_4_1234 |
private static int |
ENCODING_UCS_4_2143 |
private static int |
ENCODING_UCS_4_3412 |
private static int |
ENCODING_UCS_4_4321 |
private static int |
ENCODING_UTF_8 |
static int |
ENTITY_INTERNAL
Constant: the entity is internal.
|
static int |
ENTITY_NDATA
Constant: the entity is external, non-XML data.
|
static int |
ENTITY_TEXT
Constant: the entity is external XML data.
|
static int |
ENTITY_UNDECLARED
Constant: the entity has not been declared.
|
private HashMap |
entityInfo |
private ArrayList |
entityStack |
private boolean |
expandPE |
private URLConnection |
externalEntity |
private SAXDriver |
handler |
private boolean |
inCDATA |
private boolean |
inLiteral |
private static int |
INPUT_BUFFER |
private static int |
INPUT_EXTERNAL |
private static int |
INPUT_INTERNAL |
private static int |
INPUT_NONE |
private static int |
INPUT_READER |
private static int |
INPUT_STREAM |
private ArrayList |
inputStack |
private InputStream |
is |
private int |
line |
private static int |
LIT_ATTRIBUTE |
private static int |
LIT_DISABLE_CREF |
private static int |
LIT_DISABLE_EREF |
private static int |
LIT_DISABLE_PE |
private static int |
LIT_ENTITY_CHECK |
private static int |
LIT_ENTITY_REF |
private static int |
LIT_NORMALIZE |
private static int |
NAME_BUFFER_INITIAL |
private char[] |
nameBuffer |
private int |
nameBufferPos |
private HashMap |
notationInfo |
private boolean |
peIsError |
private byte[] |
rawReadBuffer |
private static int |
READ_BUFFER_MAX |
private char[] |
readBuffer |
private int |
readBufferLength |
private int |
readBufferOverflow |
private int |
readBufferPos |
private Reader |
reader |
private boolean |
sawCR |
private int |
sourceType |
private static int |
SYMBOL_TABLE_LENGTH |
private Object[][] |
symbolTable |
private int |
tagAttributePos |
private String[] |
tagAttributes |
private static boolean |
USE_CHEATS |
Constructor and Description |
---|
XmlParser()
Construct a new parser with no associated handler.
|
Modifier and Type | Method and Description |
---|---|
private void |
cleanupVariables()
Clean up after the parse to allow some garbage collection.
|
private void |
copyIso8859_1ReadBuffer(int count,
char mask)
Convert a buffer of US-ASCII or ISO-8859-1-encoded bytes into
UTF-16 characters.
|
private void |
copyUcs2ReadBuffer(int count,
int shift1,
int shift2)
Convert a buffer of UCS-2-encoded bytes into UTF-16 characters
(as used in Java string manipulation).
|
private void |
copyUcs4ReadBuffer(int count,
int shift1,
int shift2,
int shift3,
int shift4)
Convert a buffer of UCS-4-encoded bytes into UTF-16 characters.
|
private void |
copyUtf8ReadBuffer(int count)
Convert a buffer of UTF-8-encoded bytes into UTF-16 characters.
|
private void |
dataBufferAppend(char c)
Add a character to the data buffer.
|
private void |
dataBufferAppend(char[] ch,
int start,
int length)
Append (part of) a character array to the data buffer.
|
private void |
dataBufferAppend(String s)
Add a string to the data buffer.
|
private void |
dataBufferFlush()
Flush the contents of the data buffer to the handler, as
appropriate, and reset the buffer for new input.
|
private void |
dataBufferNormalize()
Normalise whitespace in the data buffer.
|
private String |
dataBufferToString()
Convert the data buffer to a string.
|
private Iterator |
declaredAttributes(Object[] element)
Get the declared attributes for an element type.
|
Iterator |
declaredAttributes(String elname)
Get the declared attributes for an element type.
|
Iterator |
declaredElements()
Get the declared elements for an XML document.
|
Iterator |
declaredEntities()
Get declared entities.
|
Iterator |
declaredNotations()
Get declared notations.
|
private void |
detectEncoding()
Attempt to detect the encoding of an entity.
|
(package private) void |
doParse(String systemId,
String publicId,
Reader reader,
InputStream stream,
String encoding)
Parse an XML document from the character stream, byte stream, or URI
that you provide (in that order of preference).
|
private void |
encodingError(String message,
int value,
int offset)
Report a character encoding error.
|
private void |
error(String message)
Report typical case fatal errors.
|
private void |
error(String message,
char textFound,
String textExpected)
Report a serious error.
|
private void |
error(String message,
String textFound,
String textExpected)
Report an error.
|
private Object |
extendArray(Object array,
int currentSize,
int requiredSize)
Ensure the capacity of an array, allocating a new one if
necessary.
|
private void |
filterCR(boolean moreData)
Filter carriage returns in the read buffer.
|
private Object[] |
getAttribute(String elName,
String name)
Retrieve the three-member array representing an
attribute declaration.
|
String |
getAttributeDefaultValue(String name,
String aname)
Retrieve the default value of a declared attribute.
|
int |
getAttributeDefaultValueType(String name,
String aname)
Retrieve the default value type of a declared attribute.
|
String |
getAttributeExpandedValue(String name,
String aname)
Retrieve the expanded value of a declared attribute.
|
String |
getAttributeIterator(String name,
String aname)
Retrieve the allowed values for an enumerated attribute type.
|
int |
getAttributeType(String name,
String aname)
Retrieve the declared type of an attribute.
|
int |
getColumnNumber()
Return the current column number.
|
private int |
getContentType(Object[] element,
int defaultType)
Look up the content type of an element.
|
private HashMap |
getElementAttributes(String name)
Look up the attribute hash table for an element.
|
String |
getElementContentModel(String name)
Look up the content model of an element.
|
int |
getElementContentType(String name)
Look up the content type of an element.
|
String |
getEntityNotationName(String eName)
Get the notation name associated with an NDATA entity.
|
String |
getEntityPublicId(String ename)
Return an external entity's public identifier, if any.
|
String |
getEntitySystemId(String ename)
Return an external entity's system identifier.
|
int |
getEntityType(String ename)
Find the type of an entity.
|
String |
getEntityValue(String ename)
Return the value of an internal entity.
|
int |
getLineNumber()
Return the current line number.
|
private int |
getNextUtf8Byte(int pos,
int count)
Return the next byte value in a UTF-8 sequence.
|
String |
getNotationPublicId(String nname)
Look up the public identifier for a notation.
|
String |
getNotationSystemId(String nname)
Look up the system identifier for a notation.
|
private void |
initializeVariables()
Re-initialize the variables for each parse.
|
String |
intern(char[] ch,
int start,
int length)
Create an interned string from a character array.
|
private static boolean |
isExtender(char c) |
private boolean |
isWhitespace(char c)
Test if a character is whitespace.
|
private void |
parseAttDef(String elementName)
Parse a single attribute definition.
|
private void |
parseAttlistDecl()
Parse an attribute list declaration.
|
private void |
parseAttribute(String name)
Parse an attribute assignment.
|
private void |
parseCDSect()
Parse a CDATA section.
|
private void |
parseCharData()
Parse character data.
|
private void |
parseCharRef()
Read and interpret a character reference.
|
private void |
parseComment()
Skip a comment.
|
private void |
parseConditionalSect()
Parse a conditional section.
|
private void |
parseContent()
Parse the content of an element.
|
private void |
parseContentspec(String name)
Content specification.
|
private void |
parseCp()
Parse a content particle.
|
private void |
parseDefault(String elementName,
String name,
int type,
String myEnum)
Parse the default value for an attribute.
|
private void |
parseDoctypedecl()
Parse a document type declaration.
|
private void |
parseDocument()
Parse an XML document.
|
private void |
parseElement()
Parse an element, with its tags.
|
private void |
parseElementdecl()
Parse an element type declaration.
|
private void |
parseElements()
Parse an element-content model.
|
private void |
parseEntityDecl()
Parse an entity declaration.
|
private void |
parseEntityRef(boolean externalAllowed)
Parse and expand an entity reference.
|
private void |
parseEnumeration(boolean isNames)
Parse an enumeration.
|
private void |
parseEq()
Parse an equals sign surrounded by optional whitespace.
|
private void |
parseETag()
Parse an end tag.
|
private void |
parseMarkupdecl()
Parse a markup declaration in the internal or external DTD subset.
|
private void |
parseMisc()
Parse miscellaneous markup outside the document element and DOCTYPE
declaration.
|
private void |
parseMixed()
Parse mixed content.
|
private void |
parseNotationDecl()
Parse a notation declaration.
|
private void |
parseNotationType()
Parse a notation type for an attribute.
|
private void |
parsePEReference()
Parse and expand a parameter entity reference.
|
private void |
parsePI()
Parse a processing instruction and do a call-back.
|
private void |
parseProlog()
Parse the prolog of an XML document.
|
private String |
parseTextDecl(boolean ignoreEncoding)
Parse a text declaration.
|
private void |
parseUntil(String delim)
Read all data until we find the specified string.
|
private void |
parseWhitespace()
Parse whitespace characters, and leave them in the data buffer.
|
private String |
parseXMLDecl(boolean ignoreEncoding)
Parse the XML declaration.
|
private void |
popInput()
Restore a previous input source.
|
private void |
pushCharArray(String ename,
char[] ch,
int start,
int length)
Push a new internal input source.
|
private void |
pushInput(String ename)
Save the current input source onto the stack.
|
private void |
pushString(String ename,
String s)
This method pushes a string back onto input.
|
private void |
pushURL(String ename,
String publicId,
String systemId,
Reader reader,
InputStream stream,
String encoding)
Push a new external input source.
|
private void |
read8bitEncodingDeclaration()
Read just the encoding declaration (or XML declaration) at the
start of an external entity.
|
private int |
readAttType()
Parse the attribute type.
|
private char |
readCh()
Read a single character from the readBuffer.
|
private void |
readDataChunk()
Read a chunk of data from an external input source.
|
private String[] |
readExternalIds(boolean inNotation)
Try reading external identifiers.
|
private String |
readLiteral(int flags)
Read a literal.
|
private String |
readNmtoken(boolean isName)
Read a name or (when parsing an enumeration) name token.
|
private void |
require(char delim)
Require a character to appear, or throw an exception.
|
private void |
require(String delim)
Require a string to appear, or throw an exception.
|
private void |
requireWhitespace()
Require whitespace characters.
|
private void |
setAttribute(String elName,
String name,
int type,
String myEnum,
String value,
int valueType)
Register an attribute declaration for later retrieval.
|
private void |
setElement(String name,
int contentType,
String contentModel,
HashMap attributes)
Register an element.
|
private void |
setEntity(String eName,
int eClass,
String pubid,
String sysid,
String value,
String nName)
Register an entity declaration for later retrieval.
|
private void |
setExternalDataEntity(String eName,
String pubid,
String sysid,
String nName)
Register an external data entity.
|
private void |
setExternalTextEntity(String eName,
String pubid,
String sysid)
Register an external text entity.
|
(package private) void |
setHandler(SAXDriver handler)
Set the handler that will receive parsing events.
|
private void |
setInternalEntity(String eName,
String value)
Register an entity declaration for later retrieval.
|
private void |
setNotation(String nname,
String pubid,
String sysid)
Register a notation declaration for later retrieval.
|
private void |
setupDecoding(String encodingName)
Sets up internal state so that we can decode an entity using the
specified encoding.
|
private void |
skipWhitespace()
Skip whitespace characters.
|
private static boolean |
tryEncoding(byte[] sig,
byte b1,
byte b2)
Check for a two-byte signature.
|
private static boolean |
tryEncoding(byte[] sig,
byte b1,
byte b2,
byte b3,
byte b4)
Check for a four-byte signature.
|
private String |
tryEncodingDecl(boolean ignoreEncoding)
Check for an encoding declaration.
|
private boolean |
tryRead(char delim)
Return true if we can read the expected character.
|
private boolean |
tryRead(String delim)
Return true if we can read the expected string.
|
private boolean |
tryWhitespace()
Return true if we can read some whitespace.
|
private void |
unread(char c)
Push a single character back onto the current input stream.
|
private void |
unread(char[] ch,
int length)
Push a char array back onto the current input stream.
|
private static final boolean USE_CHEATS
public static final int CONTENT_UNDECLARED
public static final int CONTENT_ANY
public static final int CONTENT_EMPTY
public static final int CONTENT_MIXED
public static final int CONTENT_ELEMENTS
public static final int ENTITY_UNDECLARED
public static final int ENTITY_INTERNAL
public static final int ENTITY_NDATA
public static final int ENTITY_TEXT
public static final int ATTRIBUTE_UNDECLARED
public static final int ATTRIBUTE_CDATA
public static final int ATTRIBUTE_ID
public static final int ATTRIBUTE_IDREF
public static final int ATTRIBUTE_IDREFS
public static final int ATTRIBUTE_ENTITY
public static final int ATTRIBUTE_ENTITIES
public static final int ATTRIBUTE_NMTOKEN
public static final int ATTRIBUTE_NMTOKENS
public static final int ATTRIBUTE_ENUMERATED
public static final int ATTRIBUTE_NOTATION
private static HashMap attributeTypeHash
private static final int ENCODING_EXTERNAL
private static final int ENCODING_UTF_8
private static final int ENCODING_ISO_8859_1
private static final int ENCODING_UCS_2_12
private static final int ENCODING_UCS_2_21
private static final int ENCODING_UCS_4_1234
private static final int ENCODING_UCS_4_4321
private static final int ENCODING_UCS_4_2143
private static final int ENCODING_UCS_4_3412
private static final int ENCODING_ASCII
public static final int ATTRIBUTE_DEFAULT_UNDECLARED
public static final int ATTRIBUTE_DEFAULT_SPECIFIED
public static final int ATTRIBUTE_DEFAULT_IMPLIED
public static final int ATTRIBUTE_DEFAULT_REQUIRED
public static final int ATTRIBUTE_DEFAULT_FIXED
private static final int INPUT_NONE
private static final int INPUT_INTERNAL
private static final int INPUT_EXTERNAL
private static final int INPUT_STREAM
private static final int INPUT_BUFFER
private static final int INPUT_READER
private static final int LIT_ENTITY_REF
private static final int LIT_NORMALIZE
private static final int LIT_ATTRIBUTE
private static final int LIT_DISABLE_PE
private static final int LIT_DISABLE_CREF
private static final int LIT_DISABLE_EREF
private static final int LIT_ENTITY_CHECK
private static final int CONTEXT_NORMAL
private static final int CONTEXT_LITERAL
private InputStream is
private int line
private int column
private int sourceType
private ArrayList inputStack
private URLConnection externalEntity
private int encoding
private int currentByteCount
private char[] readBuffer
private int readBufferPos
private int readBufferLength
private int readBufferOverflow
private static final int READ_BUFFER_MAX
private byte[] rawReadBuffer
private static int DATA_BUFFER_INITIAL
private char[] dataBuffer
private int dataBufferPos
private static int NAME_BUFFER_INITIAL
private char[] nameBuffer
private int nameBufferPos
private HashMap elementInfo
private HashMap entityInfo
private HashMap notationInfo
private String currentElement
private int currentElementContent
private String basePublicId
private int baseEncoding
private Reader baseReader
private InputStream baseInputStream
private char[] baseInputBuffer
private int baseInputBufferStart
private int baseInputBufferLength
private ArrayList entityStack
private boolean inLiteral
private boolean expandPE
private boolean peIsError
private static final int SYMBOL_TABLE_LENGTH
private Object[][] symbolTable
private String[] tagAttributes
private int tagAttributePos
private boolean sawCR
private boolean inCDATA
XmlParser()
setHandler(org.dom4j.io.aelfred.SAXDriver)
,
#parse
void setHandler(SAXDriver handler)
handler
- The handler to receive callback events.#parse
void doParse(String systemId, String publicId, Reader reader, InputStream stream, String encoding) throws Exception
You may parse more than one document, but that must be done sequentially. Only one thread at a time may use this parser.
systemId
- The URI of the document; should never be null,
but may be so iff a reader or a stream is provided.publicId
- The public identifier of the document, or null.reader
- A character stream; must be null if stream isn't.stream
- A byte input stream; must be null if reader isn't.encoding
- The suggested encoding, or null if unknown.Exception
- Basically SAXException or IOExceptionprivate void error(String message, String textFound, String textExpected) throws SAXException
message
- The error message.textFound
- The text that caused the error (or null).SAXException
SAXDriver.error(java.lang.String, java.lang.String, int, int)
,
line
private void error(String message, char textFound, String textExpected) throws SAXException
message
- The error message.textFound
- The text that caused the error (or null).SAXException
private void error(String message) throws SAXException
SAXException
private void parseDocument() throws Exception
[1] document ::= prolog element Misc*
This is the top-level parsing function for a single XML document. As a minimum, a well-formed document must have a document element, and a valid document must have a prolog (one with doctype) as well.
Exception
private void parseComment() throws Exception
[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
(The <!--
has already been read.)
Exception
private void parsePI() throws SAXException, IOException
[16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )
(The <?
has already been read.)
SAXException
IOException
private void parseCDSect() throws Exception
[18] CDSect ::= CDStart CData CDEnd [19] CDStart ::= '<![CDATA[' [20] CData ::= (Char* - (Char* ']]>' Char*)) [21] CDEnd ::= ']]>'
(The '<![CDATA[' has already been read.)
Exception
private void parseProlog() throws Exception
[22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?
There are a couple of tricks here. First, it is necessary to declare the XML default attributes after the DTD (if present) has been read. [??] Second, it is not possible to expand general references in attribute value literals until after the entire DTD (if present) has been parsed.
We do not look for the XML declaration here, because it was handled by pushURL ().
Exception
pushURL
private String parseXMLDecl(boolean ignoreEncoding) throws SAXException, IOException
[23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"' ) [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')* [32] SDDecl ::= S 'standalone' Eq ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' ) [80] EncodingDecl ::= S 'encoding' Eq ( "'" EncName "'" | "'" EncName "'" ) [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
(The <?xml
and whitespace have already been read.)
SAXException
IOException
parseTextDecl(boolean)
,
setupDecoding(java.lang.String)
private String parseTextDecl(boolean ignoreEncoding) throws SAXException, IOException
[79] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' [80] EncodingDecl ::= S 'encoding' Eq ( '"' EncName '"' | "'" EncName "'" ) [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
(The <?xml
' and whitespace have already been read.)
SAXException
IOException
parseXMLDecl(boolean)
,
setupDecoding(java.lang.String)
private void setupDecoding(String encodingName) throws SAXException, IOException
It is also used after autodetection, at which point only very limited adjustments to the encoding may be used (switching between related builtin decoders).
encodingName
- The name of the encoding specified by the user.IOException
- if the encoding isn't supported either
internally to this parser, or by the hosting JVM.SAXException
parseXMLDecl(boolean)
,
parseTextDecl(boolean)
private void parseMisc() throws Exception
[27] Misc ::= Comment | PI | S
Exception
private void parseDoctypedecl() throws Exception
[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
(The <!DOCTYPE
has already been read.)
Exception
private void parseMarkupdecl() throws Exception
[29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl | NotationDecl | PI | Comment [30] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
Reading toplevel PE references is handled as a lexical issue by the caller, as is whitespace.
Exception
private void parseElement() throws Exception
[39] element ::= EmptyElementTag | STag content ETag [40] STag ::= '<' Name (S Attribute)* S? '>' [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>'
(The '<' has already been read.)
NOTE: this method actually chains onto parseContent (), if necessary, and parseContent () will take care of calling parseETag ().
Exception
private void parseAttribute(String name) throws Exception
[41] Attribute ::= Name Eq AttValue
name
- The name of the attribute's element.Exception
SAXDriver.attribute(java.lang.String, java.lang.String, boolean)
private void parseEq() throws SAXException, IOException
[25] Eq ::= S? '=' S?
SAXException
IOException
private void parseETag() throws Exception
[42] ETag ::= '' Name S? '>'
NOTE: parseContent () chains to here, we already read the "</".
Exception
private void parseContent() throws Exception
[43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* [67] Reference ::= EntityRef | CharRef
NOTE: consumes ETtag.
Exception
private void parseElementdecl() throws Exception
[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
NOTE: the '<!ELEMENT' has already been read.
Exception
private void parseContentspec(String name) throws Exception
[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements
Exception
private void parseElements() throws Exception
[47] elements ::= (choice | seq) ('?' | '*' | '+')? [49] choice ::= '(' S? cp (S? '|' S? cp)+ S? ')' [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
NOTE: the opening '(' and S have already been read.
Exception
private void parseCp() throws Exception
[48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
Exception
private void parseMixed() throws Exception
[51] Mixed ::= '(' S? ( '#PCDATA' (S? '|' S? Name)*) S? ')*' | '(' S? ('#PCDATA') S? ')'
Exception
private void parseAttlistDecl() throws Exception
[52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
NOTE: the '<!ATTLIST' has already been read.
Exception
private void parseAttDef(String elementName) throws Exception
[53] AttDef ::= S Name S AttType S DefaultDecl
Exception
private int readAttType() throws Exception
[54] AttType ::= StringType | TokenizedType | EnumeratedType [55] StringType ::= 'CDATA' [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' [57] EnumeratedType ::= NotationType | Enumeration
Exception
private void parseEnumeration(boolean isNames) throws Exception
[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
NOTE: the '(' has already been read.
Exception
private void parseNotationType() throws Exception
[58] NotationType ::= 'NOTATION' S '(' S? NameNtoks (S? '|' S? name)* S? ')'
NOTE: the 'NOTATION' has already been read
Exception
private void parseDefault(String elementName, String name, int type, String myEnum) throws Exception
[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
Exception
private void parseConditionalSect() throws Exception
[61] conditionalSect ::= includeSect || ignoreSect [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>' [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>' [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents* ']]>' Ignore )* [65] Ignore ::= Char* - (Char* ( '<![' | ']]>') Char* )
NOTE: the '>![' has already been read.
Exception
private void parseCharRef() throws SAXException, IOException
[66] CharRef ::= '' [0-9]+ ';' | '' [0-9a-fA-F]+ ';'
NOTE: the '' has already been read.
SAXException
IOException
private void parseEntityRef(boolean externalAllowed) throws SAXException, IOException
[68] EntityRef ::= '&' Name ';'
NOTE: the '&' has already been read.
externalAllowed
- External entities are allowed here.SAXException
IOException
private void parsePEReference() throws SAXException, IOException
[69] PEReference ::= '%' Name ';'
NOTE: the '%' has already been read.
SAXException
IOException
private void parseEntityDecl() throws Exception
[70] EntityDecl ::= GEDecl | PEDecl [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) [74] PEDef ::= EntityValue | ExternalID [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral [76] NDataDecl ::= S 'NDATA' S Name
NOTE: the '<!ENTITY' has already been read.
Exception
private void parseNotationDecl() throws Exception
[82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' [83] PublicID ::= 'PUBLIC' S PubidLiteral
NOTE: the '<!NOTATION' has already been read.
Exception
private void parseCharData() throws Exception
[14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
Exception
private void requireWhitespace() throws SAXException, IOException
SAXException
IOException
private void parseWhitespace() throws Exception
Exception
private void skipWhitespace() throws SAXException, IOException
[3] S ::= (#x20 | #x9 | #xd | #xa)+
SAXException
IOException
private String readNmtoken(boolean isName) throws SAXException, IOException
[5] Name ::= (Letter | '_' | ':') (NameChar)* [7] Nmtoken ::= (NameChar)+
SAXException
IOException
private static boolean isExtender(char c)
private String readLiteral(int flags) throws SAXException, IOException
[9] EntityValue ::= ... ([^%&] | PEReference | Reference)* ... [10] AttValue ::= ... ([^<&] | Reference)* ... [11] SystemLiteral ::= ... (URLchar - "'")* ... [12] PubidLiteral ::= ... (PubidChar - "'")* ...as well as the quoted strings in XML and text declarations (for version, encoding, and standalone) which have their own constraints.
SAXException
IOException
private String[] readExternalIds(boolean inNotation) throws Exception
inNotation
- Are we in a notation?Exception
private final boolean isWhitespace(char c)
[3] S ::= (#x20 | #x9 | #xd | #xa)+
c
- The character to test.private void dataBufferAppend(char c)
private void dataBufferAppend(String s)
private void dataBufferAppend(char[] ch, int start, int length)
private void dataBufferNormalize()
private String dataBufferToString()
private void dataBufferFlush() throws SAXException
SAXException
private void require(String delim) throws SAXException, IOException
Precondition: Entity expansion is not required.
Precondition: data buffer has no characters that will get sent to the application.
SAXException
IOException
private void require(char delim) throws SAXException, IOException
SAXException
IOException
public String intern(char[] ch, int start, int length)
==
instead of String.equals ()
.
This is much more efficient than constructing a non-interned string first, and then interning it.
ch
- an array of characters for building the string.start
- the starting position in the array.length
- the number of characters to place in the string.(String)
,
String.intern()
private Object extendArray(Object array, int currentSize, int requiredSize)
public Iterator declaredElements()
The results will be valid only after the DTD (if any) has been parsed.
getElementContentType(java.lang.String)
,
getElementContentModel(java.lang.String)
private int getContentType(Object[] element, int defaultType)
element
- element info vectordefaultType
- value for null vectorCONTENT_UNDECLARED
,
CONTENT_ANY
,
CONTENT_EMPTY
,
CONTENT_MIXED
,
CONTENT_ELEMENTS
public int getElementContentType(String name)
name
- The element type name.getElementContentModel(java.lang.String)
,
CONTENT_UNDECLARED
,
CONTENT_ANY
,
CONTENT_EMPTY
,
CONTENT_MIXED
,
CONTENT_ELEMENTS
public String getElementContentModel(String name)
The result will always be null unless the content type is CONTENT_ELEMENTS or CONTENT_MIXED.
name
- The element type name.getElementContentType(java.lang.String)
private void setElement(String name, int contentType, String contentModel, HashMap attributes) throws Exception
Exception
private HashMap getElementAttributes(String name)
private Iterator declaredAttributes(Object[] element)
elname
- The name of the element type.getAttributeType(java.lang.String, java.lang.String)
,
getAttributeIterator(java.lang.String, java.lang.String)
,
getAttributeDefaultValueType(java.lang.String, java.lang.String)
,
getAttributeDefaultValue(java.lang.String, java.lang.String)
,
getAttributeExpandedValue(java.lang.String, java.lang.String)
public Iterator declaredAttributes(String elname)
elname
- The name of the element type.getAttributeType(java.lang.String, java.lang.String)
,
getAttributeIterator(java.lang.String, java.lang.String)
,
getAttributeDefaultValueType(java.lang.String, java.lang.String)
,
getAttributeDefaultValue(java.lang.String, java.lang.String)
,
getAttributeExpandedValue(java.lang.String, java.lang.String)
public int getAttributeType(String name, String aname)
name
- The name of the associated element.aname
- The name of the attribute.ATTRIBUTE_UNDECLARED
,
ATTRIBUTE_CDATA
,
ATTRIBUTE_ID
,
ATTRIBUTE_IDREF
,
ATTRIBUTE_IDREFS
,
ATTRIBUTE_ENTITY
,
ATTRIBUTE_ENTITIES
,
ATTRIBUTE_NMTOKEN
,
ATTRIBUTE_NMTOKENS
,
ATTRIBUTE_ENUMERATED
,
ATTRIBUTE_NOTATION
public String getAttributeIterator(String name, String aname)
name
- The name of the associated element.aname
- The name of the attribute.ATTRIBUTE_ENUMERATED
,
ATTRIBUTE_NOTATION
public String getAttributeDefaultValue(String name, String aname)
name
- The name of the associated element.aname
- The name of the attribute.getAttributeExpandedValue(java.lang.String, java.lang.String)
public String getAttributeExpandedValue(String name, String aname) throws Exception
General entities will be expanded (once).
name
- The name of the associated element.aname
- The name of the attribute.Exception
getAttributeDefaultValue(java.lang.String, java.lang.String)
public int getAttributeDefaultValueType(String name, String aname)
private void setAttribute(String elName, String name, int type, String myEnum, String value, int valueType) throws Exception
Exception
private Object[] getAttribute(String elName, String name)
public Iterator declaredEntities()
getEntityType(java.lang.String)
,
getEntityPublicId(java.lang.String)
,
getEntitySystemId(java.lang.String)
,
getEntityValue(java.lang.String)
,
getEntityNotationName(java.lang.String)
public int getEntityType(String ename)
ENTITY_UNDECLARED
,
ENTITY_INTERNAL
,
ENTITY_NDATA
,
ENTITY_TEXT
public String getEntityPublicId(String ename)
ename
- The name of the external entity.getEntityType(java.lang.String)
public String getEntitySystemId(String ename)
ename
- The name of the external entity.getEntityType(java.lang.String)
public String getEntityValue(String ename)
ename
- The name of the internal entity.getEntityType(java.lang.String)
public String getEntityNotationName(String eName)
ename
- The NDATA entity name.getEntityType(java.lang.String)
private void setInternalEntity(String eName, String value)
private void setExternalDataEntity(String eName, String pubid, String sysid, String nName)
private void setExternalTextEntity(String eName, String pubid, String sysid)
private void setEntity(String eName, int eClass, String pubid, String sysid, String value, String nName)
public Iterator declaredNotations()
getNotationPublicId(java.lang.String)
,
getNotationSystemId(java.lang.String)
public String getNotationPublicId(String nname)
nname
- The name of the notation.getNotationSystemId(java.lang.String)
public String getNotationSystemId(String nname)
nname
- The name of the notation.getNotationPublicId(java.lang.String)
private void setNotation(String nname, String pubid, String sysid) throws Exception
Exception
public int getLineNumber()
public int getColumnNumber()
private char readCh() throws SAXException, IOException
The readDataChunk () method maintains the buffer.
If we hit the end of an entity, try to pop the stack and keep going.
(This approach doesn't really enforce XML's rules about entity boundaries, but this is not currently a validating parser).
This routine also attempts to keep track of the current position in external entities, but it's not entirely accurate.
SAXException
IOException
(char)
,
(String)
,
readDataChunk()
,
readBuffer
,
line
private void unread(char c) throws SAXException
This method usually pushes the character back onto the readBuffer, while the unread (String) method treats the string as a new internal entity.
I don't think that this would ever be called with readBufferPos = 0, because the methods always reads a character before unreading it, but just in case, I've added a boundary condition.
c
- The character to push back.SAXException
readCh()
,
(String)
,
(char[])
,
readBuffer
private void unread(char[] ch, int length) throws SAXException
NOTE: you must never push back characters that you haven't actually read: use pushString () instead.
SAXException
readCh()
,
(char)
,
(String)
,
readBuffer
,
pushString(java.lang.String, java.lang.String)
private void pushURL(String ename, String publicId, String systemId, Reader reader, InputStream stream, String encoding) throws SAXException, IOException
TODO: Right now, this method always attempts to autodetect the encoding; in the future, it should allow the caller to request an encoding explicitly, and it should also look at the headers with an HTTP connection.
url
- The java.net.URL object for the entity.SAXException
IOException
SAXDriver.resolveEntity(java.lang.String, java.lang.String)
,
pushString(java.lang.String, java.lang.String)
,
sourceType
,
pushInput(java.lang.String)
,
detectEncoding()
,
sourceType
,
readBuffer
private String tryEncodingDecl(boolean ignoreEncoding) throws SAXException, IOException
Because this part starts to fill parser buffers with this data, it's tricky to to a reader so that Java's built-in decoders can be used for the character encodings that aren't built in to this parser (such as EUC-JP, KOI8-R, Big5, etc).
SAXException
IOException
detectEncoding
private void detectEncoding() throws SAXException, IOException
The trick here (as suggested in the XML standard) is that any entity not in UTF-8, or in UCS-2 with a byte-order mark, must begin with an XML declaration or an encoding declaration; we simply have to look for "<?xml" in various encodings.
This method has no way to distinguish among 8-bit encodings. Instead, it sets up for UTF-8, then (possibly) revises its assumption later in setupDecoding (). Any ASCII-derived 8-bit encoding should work, but most will be rejected later by setupDecoding ().
I don't currently detect EBCDIC, since I'm concerned that it could also be a valid UTF-8 sequence; I'll have to do more checking later.
private static boolean tryEncoding(byte[] sig, byte b1, byte b2, byte b3, byte b4)
Utility routine for detectEncoding ().
Always looks for some part of "
sig
- The first four bytes read.b1
- The first byte of the signatureb2
- The second byte of the signatureb3
- The third byte of the signatureb4
- The fourth byte of the signaturedetectEncoding()
private static boolean tryEncoding(byte[] sig, byte b1, byte b2)
Looks for a UCS-2 byte-order mark.
Utility routine for detectEncoding ().
sig
- The first four bytes read.b1
- The first byte of the signatureb2
- The second byte of the signaturedetectEncoding()
private void pushString(String ename, String s) throws SAXException
It is useful either as the expansion of an internal entity, or for backtracking during the parse.
Call pushCharArray () to do the actual work.
s
- The string to push back onto input.SAXException
pushCharArray(java.lang.String, char[], int, int)
private void pushCharArray(String ename, char[] ch, int start, int length) throws SAXException
This method is useful for expanding an internal entity, or for unreading a string of characters. It creates a new readBuffer containing the characters in the array, instead of characters converted from an input byte stream.
ch
- The char array to push.SAXException
pushString(java.lang.String, java.lang.String)
,
pushURL(java.lang.String, java.lang.String, java.lang.String, java.io.Reader, java.io.InputStream, java.lang.String)
,
readBuffer
,
sourceType
,
pushInput(java.lang.String)
private void pushInput(String ename) throws SAXException
This method saves all of the global variables associated with the current input source, so that they can be restored when a new input source has finished. It also tests for entity recursion.
The method saves the following global variables onto a stack using a fixed-length array:
ename
- The name of the entity (if any) causing the new input.SAXException
popInput()
,
sourceType
,
externalEntity
,
readBuffer
,
readBufferPos
,
readBufferLength
,
line
,
encoding
private void popInput() throws SAXException, IOException
This method restores all of the global variables associated with the current input source.
EOFException
- If there are no more entries on the input stack.SAXException
IOException
pushInput(java.lang.String)
,
sourceType
,
externalEntity
,
readBuffer
,
readBufferPos
,
readBufferLength
,
line
,
encoding
private boolean tryRead(char delim) throws SAXException, IOException
Note that the character will be removed from the input stream on success, but will be put back on failure. Do not attempt to read the character again if the method succeeds.
delim
- The character that should appear next. For a
insensitive match, you must supply this in upper-case.SAXException
IOException
(String)
private boolean tryRead(String delim) throws SAXException, IOException
This is simply a convenience method.
Note that the string will be removed from the input stream on success, but will be put back on failure. Do not attempt to read the string again if the method succeeds.
This method will push back a character rather than an array whenever possible (probably the majority of cases).
NOTE: This method currently has a hard-coded limit of 100 characters for the delimiter.
delim
- The string that should appear next.SAXException
IOException
(char)
private boolean tryWhitespace() throws SAXException, IOException
This is simply a convenience method.
This method will push back a character rather than an array whenever possible (probably the majority of cases).
SAXException
IOException
private void parseUntil(String delim) throws SAXException, IOException
This is inefficient right now, since it calls tryRead () for every character.
delim
- The string delimiterSAXException
IOException
(String, boolean)
,
readCh()
private void read8bitEncodingDeclaration() throws SAXException, IOException
SAXException
IOException
private void readDataChunk() throws SAXException, IOException
This is simply a front-end that fills the rawReadBuffer with bytes, then calls the appropriate encoding handler.
SAXException
IOException
encoding
,
rawReadBuffer
,
readBuffer
,
filterCR(boolean)
,
copyUtf8ReadBuffer(int)
,
copyIso8859_1ReadBuffer(int, char)
,
#copyUcs_2ReadBuffer
,
#copyUcs_4ReadBuffer
private void filterCR(boolean moreData)
moreData
- true iff more data might come from the same sourcereadDataChunk()
,
readBuffer
,
readBufferOverflow
private void copyUtf8ReadBuffer(int count) throws SAXException, IOException
When readDataChunk () calls this method, the raw bytes are in rawReadBuffer, and the final characters will appear in readBuffer.
count
- The number of bytes to convert.SAXException
IOException
readDataChunk()
,
rawReadBuffer
,
readBuffer
,
getNextUtf8Byte(int, int)
private int getNextUtf8Byte(int pos, int count) throws SAXException, IOException
pos
- The current position in the rawReadBuffer.count
- The number of bytes in the rawReadBufferEOFException
- If the sequence is incomplete.SAXException
IOException
private void copyIso8859_1ReadBuffer(int count, char mask) throws IOException
When readDataChunk () calls this method, the raw bytes are in rawReadBuffer, and the final characters will appear in readBuffer.
count
- The number of bytes to convert.mask
- For ASCII conversion, 0x7f; else, 0xff.IOException
readDataChunk()
,
rawReadBuffer
,
readBuffer
private void copyUcs2ReadBuffer(int count, int shift1, int shift2) throws SAXException
When readDataChunk () calls this method, the raw bytes are in rawReadBuffer, and the final characters will appear in readBuffer.
count
- The number of bytes to convert.shift1
- The number of bits to shift byte 1.shift2
- The number of bits to shift byte 2SAXException
readDataChunk()
,
rawReadBuffer
,
readBuffer
private void copyUcs4ReadBuffer(int count, int shift1, int shift2, int shift3, int shift4) throws SAXException
When readDataChunk () calls this method, the raw bytes are in rawReadBuffer, and the final characters will appear in readBuffer.
Java has Unicode chars, and this routine uses surrogate pairs for ISO-10646 values between 0x00010000 and 0x000fffff. An exception is thrown if the ISO-10646 character has no Unicode representation.
count
- The number of bytes to convert.shift1
- The number of bits to shift byte 1.shift2
- The number of bits to shift byte 2shift3
- The number of bits to shift byte 2shift4
- The number of bits to shift byte 2SAXException
readDataChunk()
,
rawReadBuffer
,
readBuffer
private void encodingError(String message, int value, int offset) throws SAXException
SAXException
private void initializeVariables()
private void cleanupVariables()
WebARTS Library Licensed Under the GNU - General Public License. Other Libraries licensed under their respective Open Source Licenses