| /* XMLParser.java -- |
| Copyright (C) 2005 Free Software Foundation, Inc. |
| |
| This file is part of GNU Classpath. |
| |
| GNU Classpath is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| GNU Classpath is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GNU Classpath; see the file COPYING. If not, write to the |
| Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301 USA. |
| |
| Linking this library statically or dynamically with other modules is |
| making a combined work based on this library. Thus, the terms and |
| conditions of the GNU General Public License cover the whole |
| combination. |
| |
| As a special exception, the copyright holders of this library give you |
| permission to link this library with independent modules to produce an |
| executable, regardless of the license terms of these independent |
| modules, and to copy and distribute the resulting executable under |
| terms of your choice, provided that you also meet, for each linked |
| independent module, the terms and conditions of the license of that |
| module. An independent module is a module which is not derived from |
| or based on this library. If you modify this library, you may extend |
| this exception to your version of the library, but you are not |
| obligated to do so. If you do not wish to do so, delete this |
| exception statement from your version. |
| |
| Partly derived from code which carried the following notice: |
| |
| Copyright (c) 1997, 1998 by Microstar Software Ltd. |
| |
| AElfred is free for both commercial and non-commercial use and |
| redistribution, provided that Microstar's copyright and disclaimer are |
| retained intact. You are free to modify AElfred for your own use and |
| to redistribute AElfred with your modifications, provided that the |
| modifications are clearly documented. |
| |
| This program is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| merchantability or fitness for a particular purpose. Please use it AT |
| YOUR OWN RISK. |
| */ |
| |
| package gnu.xml.stream; |
| |
| import java.io.BufferedInputStream; |
| import java.io.EOFException; |
| import java.io.File; |
| import java.io.FileOutputStream; |
| import java.io.FileWriter; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.io.IOException; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.io.UnsupportedEncodingException; |
| import java.net.MalformedURLException; |
| import java.net.URL; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.LinkedHashMap; |
| import java.util.LinkedList; |
| import java.util.Map; |
| import java.util.NoSuchElementException; |
| import java.util.StringTokenizer; |
| |
| import javax.xml.XMLConstants; |
| import javax.xml.namespace.NamespaceContext; |
| import javax.xml.namespace.QName; |
| import javax.xml.stream.Location; |
| import javax.xml.stream.XMLInputFactory; |
| import javax.xml.stream.XMLReporter; |
| import javax.xml.stream.XMLResolver; |
| import javax.xml.stream.XMLStreamConstants; |
| import javax.xml.stream.XMLStreamException; |
| import javax.xml.stream.XMLStreamReader; |
| |
| import gnu.java.net.CRLFInputStream; |
| import gnu.classpath.debug.TeeInputStream; |
| import gnu.classpath.debug.TeeReader; |
| |
| /** |
| * An XML parser. |
| * This parser supports the following additional StAX properties: |
| * <table> |
| * <tr><td>gnu.xml.stream.stringInterning</td> |
| * <td>Boolean</td> |
| * <td>Indicates whether markup strings will be interned</td></tr> |
| * <tr><td>gnu.xml.stream.xmlBase</td> |
| * <td>Boolean</td> |
| * <td>Indicates whether XML Base processing will be performed</td></tr> |
| * <tr><td>gnu.xml.stream.baseURI</td> |
| * <td>String</td> |
| * <td>Returns the base URI of the current event</td></tr> |
| * </table> |
| * |
| * @see http://www.w3.org/TR/REC-xml/ |
| * @see http://www.w3.org/TR/xml11/ |
| * @see http://www.w3.org/TR/REC-xml-names |
| * @see http://www.w3.org/TR/xml-names11 |
| * @see http://www.w3.org/TR/xmlbase/ |
| * |
| * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a> |
| */ |
| public class XMLParser |
| implements XMLStreamReader, NamespaceContext |
| { |
| |
| // -- parser state machine states -- |
| private static final int INIT = 0; // start state |
| private static final int PROLOG = 1; // in prolog |
| private static final int CONTENT = 2; // in content |
| private static final int EMPTY_ELEMENT = 3; // empty element state |
| private static final int MISC = 4; // in Misc (after root element) |
| |
| // -- parameters for parsing literals -- |
| private final static int LIT_ENTITY_REF = 2; |
| private final static int LIT_NORMALIZE = 4; |
| private final static int LIT_ATTRIBUTE = 8; |
| private final static int LIT_DISABLE_PE = 16; |
| private final static int LIT_DISABLE_CREF = 32; |
| private final static int LIT_DISABLE_EREF = 64; |
| private final static int LIT_PUBID = 256; |
| |
| // -- types of attribute values -- |
| final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30; |
| final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31; |
| final static int ATTRIBUTE_DEFAULT_IMPLIED = 32; |
| final static int ATTRIBUTE_DEFAULT_REQUIRED = 33; |
| final static int ATTRIBUTE_DEFAULT_FIXED = 34; |
| |
| // -- additional event types -- |
| final static int START_ENTITY = 50; |
| final static int END_ENTITY = 51; |
| |
| /** |
| * The current input. |
| */ |
| private Input input; |
| |
| /** |
| * Stack of inputs representing XML general entities. |
| * The input representing the XML input stream or reader is always the |
| * first element in this stack. |
| */ |
| private LinkedList inputStack = new LinkedList(); |
| |
| /** |
| * Stack of start-entity events to be reported. |
| */ |
| private LinkedList startEntityStack = new LinkedList(); |
| |
| /** |
| * Stack of end-entity events to be reported. |
| */ |
| private LinkedList endEntityStack = new LinkedList(); |
| |
| /** |
| * Current parser state within the main state machine. |
| */ |
| private int state = INIT; |
| |
| /** |
| * The (type of the) current event. |
| */ |
| private int event; |
| |
| /** |
| * Whether we are looking ahead. Used by hasNext. |
| */ |
| private boolean lookahead; |
| |
| /** |
| * The element name stack. The first element in this stack will be the |
| * root element. |
| */ |
| private LinkedList stack = new LinkedList(); |
| |
| /** |
| * Stack of namespace contexts. These are maps specifying prefix-to-URI |
| * mappings. The first element in this stack is the most recent namespace |
| * context (i.e. the other way around from the element name stack). |
| */ |
| private LinkedList namespaces = new LinkedList(); |
| |
| /** |
| * The base-URI stack. This holds the base URI context for each element. |
| * The first element in this stack is the most recent context (i.e. the |
| * other way around from the element name stack). |
| */ |
| private LinkedList bases = new LinkedList(); |
| |
| /** |
| * The list of attributes for the current element, in the order defined in |
| * the XML stream. |
| */ |
| private ArrayList attrs = new ArrayList(); |
| |
| /** |
| * Buffer for text and character data. |
| */ |
| private StringBuffer buf = new StringBuffer(); |
| |
| /** |
| * Buffer for NMTOKEN strings (markup). |
| */ |
| private StringBuffer nmtokenBuf = new StringBuffer(); |
| |
| /** |
| * Buffer for string literals. (e.g. attribute values) |
| */ |
| private StringBuffer literalBuf = new StringBuffer(); |
| |
| /** |
| * Temporary Unicode character buffer used during character data reads. |
| */ |
| private int[] tmpBuf = new int[1024]; |
| |
| /** |
| * The element content model for the current element. |
| */ |
| private ContentModel currentContentModel; |
| |
| /** |
| * The validation stack. This holds lists of the elements seen for each |
| * element, in order to determine whether the names and order of these |
| * elements match the content model for the element. The last entry in |
| * this stack represents the current element. |
| */ |
| private LinkedList validationStack; |
| |
| /** |
| * These sets contain the IDs and the IDREFs seen in the document, to |
| * ensure that IDs are unique and that each IDREF refers to an ID in the |
| * document. |
| */ |
| private HashSet ids, idrefs; |
| |
| /** |
| * The target and data associated with the current processing instruction |
| * event. |
| */ |
| private String piTarget, piData; |
| |
| /** |
| * The XML version declared in the XML declaration. |
| */ |
| private String xmlVersion; |
| |
| /** |
| * The encoding declared in the XML declaration. |
| */ |
| private String xmlEncoding; |
| |
| /** |
| * The standalone value declared in the XML declaration. |
| */ |
| private Boolean xmlStandalone; |
| |
| /** |
| * The document type definition. |
| */ |
| Doctype doctype; |
| |
| /** |
| * State variables for determining parameter-entity expansion. |
| */ |
| private boolean expandPE, peIsError; |
| |
| /** |
| * Whether this is a validating parser. |
| */ |
| private final boolean validating; |
| |
| /** |
| * Whether strings representing markup will be interned. |
| */ |
| private final boolean stringInterning; |
| |
| /** |
| * If true, CDATA sections will be merged with adjacent text nodes into a |
| * single event. |
| */ |
| private final boolean coalescing; |
| |
| /** |
| * Whether to replace general entity references with their replacement |
| * text automatically during parsing. |
| * Otherwise entity-reference events will be issued. |
| */ |
| private final boolean replaceERefs; |
| |
| /** |
| * Whether to support external entities. |
| */ |
| private final boolean externalEntities; |
| |
| /** |
| * Whether to support DTDs. |
| */ |
| private final boolean supportDTD; |
| |
| /** |
| * Whether to support XML namespaces. If true, namespace information will |
| * be available. Otherwise namespaces will simply be reported as ordinary |
| * attributes. |
| */ |
| private final boolean namespaceAware; |
| |
| /** |
| * Whether to support XML Base. If true, URIs specified in xml:base |
| * attributes will be honoured when resolving external entities. |
| */ |
| private final boolean baseAware; |
| |
| /** |
| * Whether to report extended event types (START_ENTITY and END_ENTITY) |
| * in addition to the standard event types. Used by the SAX parser. |
| */ |
| private final boolean extendedEventTypes; |
| |
| /** |
| * The reporter to receive parsing warnings. |
| */ |
| final XMLReporter reporter; |
| |
| /** |
| * Callback interface for resolving external entities. |
| */ |
| final XMLResolver resolver; |
| |
| // -- Constants for testing the next kind of markup event -- |
| private static final String TEST_START_ELEMENT = "<"; |
| private static final String TEST_END_ELEMENT = "</"; |
| private static final String TEST_COMMENT = "<!--"; |
| private static final String TEST_PI = "<?"; |
| private static final String TEST_CDATA = "<![CDATA["; |
| private static final String TEST_XML_DECL = "<?xml"; |
| private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE"; |
| private static final String TEST_ELEMENT_DECL = "<!ELEMENT"; |
| private static final String TEST_ATTLIST_DECL = "<!ATTLIST"; |
| private static final String TEST_ENTITY_DECL = "<!ENTITY"; |
| private static final String TEST_NOTATION_DECL = "<!NOTATION"; |
| private static final String TEST_KET = ">"; |
| private static final String TEST_END_COMMENT = "--"; |
| private static final String TEST_END_PI = "?>"; |
| private static final String TEST_END_CDATA = "]]>"; |
| |
| /** |
| * The general entities predefined by the XML specification. |
| */ |
| private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap(); |
| static |
| { |
| PREDEFINED_ENTITIES.put("amp", "&"); |
| PREDEFINED_ENTITIES.put("lt", "<"); |
| PREDEFINED_ENTITIES.put("gt", ">"); |
| PREDEFINED_ENTITIES.put("apos", "'"); |
| PREDEFINED_ENTITIES.put("quot", "\""); |
| } |
| |
| /** |
| * Creates a new XML parser for the given input stream. |
| * This constructor should be used where possible, as it allows the |
| * encoding of the XML data to be correctly determined from the stream. |
| * @param in the input stream |
| * @param systemId the URL from which the input stream was retrieved |
| * (necessary if there are external entities to be resolved) |
| * @param validating if the parser is to be a validating parser |
| * @param namespaceAware if the parser should support XML Namespaces |
| * @param coalescing if CDATA sections should be merged into adjacent text |
| * nodes |
| * @param replaceERefs if entity references should be automatically |
| * replaced by their replacement text (otherwise they will be reported as |
| * entity-reference events) |
| * @param externalEntities if external entities should be loaded |
| * @param supportDTD if support for the XML DTD should be enabled |
| * @param baseAware if the parser should support XML Base to resolve |
| * external entities |
| * @param stringInterning whether strings will be interned during parsing |
| * @param reporter the reporter to receive warnings during processing |
| * @param resolver the callback interface used to resolve external |
| * entities |
| */ |
| public XMLParser(InputStream in, String systemId, |
| boolean validating, |
| boolean namespaceAware, |
| boolean coalescing, |
| boolean replaceERefs, |
| boolean externalEntities, |
| boolean supportDTD, |
| boolean baseAware, |
| boolean stringInterning, |
| boolean extendedEventTypes, |
| XMLReporter reporter, |
| XMLResolver resolver) |
| { |
| this.validating = validating; |
| this.namespaceAware = namespaceAware; |
| this.coalescing = coalescing; |
| this.replaceERefs = replaceERefs; |
| this.externalEntities = externalEntities; |
| this.supportDTD = supportDTD; |
| this.baseAware = baseAware; |
| this.stringInterning = stringInterning; |
| this.extendedEventTypes = extendedEventTypes; |
| this.reporter = reporter; |
| this.resolver = resolver; |
| if (validating) |
| { |
| validationStack = new LinkedList(); |
| ids = new HashSet(); |
| idrefs = new HashSet(); |
| } |
| String debug = System.getProperty("gnu.xml.debug.input"); |
| if (debug != null) |
| { |
| try |
| { |
| File file = File.createTempFile(debug, ".xml"); |
| in = new TeeInputStream(in, new FileOutputStream(file)); |
| } |
| catch (IOException e) |
| { |
| RuntimeException e2 = new RuntimeException(); |
| e2.initCause(e); |
| throw e2; |
| } |
| } |
| pushInput(new Input(in, null, null, systemId, null, null, false, true)); |
| } |
| |
| /** |
| * Creates a new XML parser for the given character stream. |
| * This constructor is only available for compatibility with the JAXP |
| * APIs, which permit XML to be parsed from a character stream. Because |
| * the encoding specified by the character stream may conflict with that |
| * specified in the XML declaration, this method should be avoided where |
| * possible. |
| * @param in the input stream |
| * @param systemId the URL from which the input stream was retrieved |
| * (necessary if there are external entities to be resolved) |
| * @param validating if the parser is to be a validating parser |
| * @param namespaceAware if the parser should support XML Namespaces |
| * @param coalescing if CDATA sections should be merged into adjacent text |
| * nodes |
| * @param replaceERefs if entity references should be automatically |
| * replaced by their replacement text (otherwise they will be reported as |
| * entity-reference events) |
| * @param externalEntities if external entities should be loaded |
| * @param supportDTD if support for the XML DTD should be enabled |
| * @param baseAware if the parser should support XML Base to resolve |
| * external entities |
| * @param stringInterning whether strings will be interned during parsing |
| * @param reporter the reporter to receive warnings during processing |
| * @param resolver the callback interface used to resolve external |
| * entities |
| */ |
| public XMLParser(Reader reader, String systemId, |
| boolean validating, |
| boolean namespaceAware, |
| boolean coalescing, |
| boolean replaceERefs, |
| boolean externalEntities, |
| boolean supportDTD, |
| boolean baseAware, |
| boolean stringInterning, |
| boolean extendedEventTypes, |
| XMLReporter reporter, |
| XMLResolver resolver) |
| { |
| this.validating = validating; |
| this.namespaceAware = namespaceAware; |
| this.coalescing = coalescing; |
| this.replaceERefs = replaceERefs; |
| this.externalEntities = externalEntities; |
| this.supportDTD = supportDTD; |
| this.baseAware = baseAware; |
| this.stringInterning = stringInterning; |
| this.extendedEventTypes = extendedEventTypes; |
| this.reporter = reporter; |
| this.resolver = resolver; |
| if (validating) |
| { |
| validationStack = new LinkedList(); |
| ids = new HashSet(); |
| idrefs = new HashSet(); |
| } |
| String debug = System.getProperty("gnu.xml.debug.input"); |
| if (debug != null) |
| { |
| try |
| { |
| File file = File.createTempFile(debug, ".xml"); |
| reader = new TeeReader(reader, new FileWriter(file)); |
| } |
| catch (IOException e) |
| { |
| RuntimeException e2 = new RuntimeException(); |
| e2.initCause(e); |
| throw e2; |
| } |
| } |
| pushInput(new Input(null, reader, null, systemId, null, null, false, true)); |
| } |
| |
| // -- NamespaceContext -- |
| |
| public String getNamespaceURI(String prefix) |
| { |
| if (XMLConstants.XML_NS_PREFIX.equals(prefix)) |
| return XMLConstants.XML_NS_URI; |
| if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix)) |
| return XMLConstants.XMLNS_ATTRIBUTE_NS_URI; |
| for (Iterator i = namespaces.iterator(); i.hasNext(); ) |
| { |
| LinkedHashMap ctx = (LinkedHashMap) i.next(); |
| String namespaceURI = (String) ctx.get(prefix); |
| if (namespaceURI != null) |
| return namespaceURI; |
| } |
| return null; |
| } |
| |
| public String getPrefix(String namespaceURI) |
| { |
| if (XMLConstants.XML_NS_URI.equals(namespaceURI)) |
| return XMLConstants.XML_NS_PREFIX; |
| if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI)) |
| return XMLConstants.XMLNS_ATTRIBUTE; |
| for (Iterator i = namespaces.iterator(); i.hasNext(); ) |
| { |
| LinkedHashMap ctx = (LinkedHashMap) i.next(); |
| if (ctx.containsValue(namespaceURI)) |
| { |
| for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); ) |
| { |
| Map.Entry entry = (Map.Entry) i.next(); |
| String uri = (String) entry.getValue(); |
| if (uri.equals(namespaceURI)) |
| return (String) entry.getKey(); |
| } |
| } |
| } |
| return null; |
| } |
| |
| public Iterator getPrefixes(String namespaceURI) |
| { |
| if (XMLConstants.XML_NS_URI.equals(namespaceURI)) |
| return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator(); |
| if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI)) |
| return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator(); |
| LinkedList acc = new LinkedList(); |
| for (Iterator i = namespaces.iterator(); i.hasNext(); ) |
| { |
| LinkedHashMap ctx = (LinkedHashMap) i.next(); |
| if (ctx.containsValue(namespaceURI)) |
| { |
| for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); ) |
| { |
| Map.Entry entry = (Map.Entry) i.next(); |
| String uri = (String) entry.getValue(); |
| if (uri.equals(namespaceURI)) |
| acc.add(entry.getKey()); |
| } |
| } |
| } |
| return acc.iterator(); |
| } |
| |
| // -- XMLStreamReader -- |
| |
| public void close() |
| throws XMLStreamException |
| { |
| stack = null; |
| namespaces = null; |
| bases = null; |
| buf = null; |
| attrs = null; |
| doctype = null; |
| |
| inputStack = null; |
| validationStack = null; |
| ids = null; |
| idrefs = null; |
| } |
| |
| public NamespaceContext getNamespaceContext() |
| { |
| return this; |
| } |
| |
| public int getAttributeCount() |
| { |
| return attrs.size(); |
| } |
| |
| public String getAttributeLocalName(int index) |
| { |
| Attribute a = (Attribute) attrs.get(index); |
| return a.localName; |
| } |
| |
| public String getAttributeNamespace(int index) |
| { |
| String prefix = getAttributePrefix(index); |
| return getNamespaceURI(prefix); |
| } |
| |
| public String getAttributePrefix(int index) |
| { |
| Attribute a = (Attribute) attrs.get(index); |
| return a.prefix; |
| } |
| |
| public QName getAttributeName(int index) |
| { |
| Attribute a = (Attribute) attrs.get(index); |
| String namespaceURI = getNamespaceURI(a.prefix); |
| return new QName(namespaceURI, a.localName, a.prefix); |
| } |
| |
| public String getAttributeType(int index) |
| { |
| Attribute a = (Attribute) attrs.get(index); |
| return a.type; |
| } |
| |
| private String getAttributeType(String elementName, String attName) |
| { |
| if (doctype != null) |
| { |
| AttributeDecl att = doctype.getAttributeDecl(elementName, attName); |
| if (att != null) |
| return att.type; |
| } |
| return "CDATA"; |
| } |
| |
| public String getAttributeValue(int index) |
| { |
| Attribute a = (Attribute) attrs.get(index); |
| return a.value; |
| } |
| |
| public String getAttributeValue(String namespaceURI, String localName) |
| { |
| for (Iterator i = attrs.iterator(); i.hasNext(); ) |
| { |
| Attribute a = (Attribute) i.next(); |
| if (a.localName.equals(localName)) |
| { |
| String uri = getNamespaceURI(a.prefix); |
| if ((uri == null && namespaceURI == null) || |
| (uri != null && uri.equals(namespaceURI))) |
| return a.value; |
| } |
| } |
| return null; |
| } |
| |
| boolean isAttributeDeclared(int index) |
| { |
| if (doctype == null) |
| return false; |
| Attribute a = (Attribute) attrs.get(index); |
| String qn = ("".equals(a.prefix)) ? a.localName : |
| a.prefix + ":" + a.localName; |
| String elementName = buf.toString(); |
| return doctype.isAttributeDeclared(elementName, qn); |
| } |
| |
| public String getCharacterEncodingScheme() |
| { |
| return xmlEncoding; |
| } |
| |
| public String getElementText() |
| throws XMLStreamException |
| { |
| if (event != XMLStreamConstants.START_ELEMENT) |
| throw new XMLStreamException("current event must be START_ELEMENT"); |
| StringBuffer elementText = new StringBuffer(); |
| int depth = stack.size(); |
| while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth) |
| { |
| switch (next()) |
| { |
| case XMLStreamConstants.CHARACTERS: |
| case XMLStreamConstants.SPACE: |
| elementText.append(buf.toString()); |
| } |
| } |
| return elementText.toString(); |
| } |
| |
| public String getEncoding() |
| { |
| return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding; |
| } |
| |
| public int getEventType() |
| { |
| return event; |
| } |
| |
| public String getLocalName() |
| { |
| switch (event) |
| { |
| case XMLStreamConstants.START_ELEMENT: |
| case XMLStreamConstants.END_ELEMENT: |
| String qName = buf.toString(); |
| int ci = qName.indexOf(':'); |
| return (ci == -1) ? qName : qName.substring(ci + 1); |
| default: |
| return null; |
| } |
| } |
| |
| public Location getLocation() |
| { |
| return input; |
| } |
| |
| public QName getName() |
| { |
| switch (event) |
| { |
| case XMLStreamConstants.START_ELEMENT: |
| case XMLStreamConstants.END_ELEMENT: |
| String qName = buf.toString(); |
| int ci = qName.indexOf(':'); |
| String localName = (ci == -1) ? qName : qName.substring(ci + 1); |
| String prefix = (ci == -1) ? |
| (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) : |
| qName.substring(0, ci); |
| String namespaceURI = getNamespaceURI(prefix); |
| return new QName(namespaceURI, localName, prefix); |
| default: |
| return null; |
| } |
| } |
| |
| public int getNamespaceCount() |
| { |
| if (!namespaceAware || namespaces.isEmpty()) |
| return 0; |
| switch (event) |
| { |
| case XMLStreamConstants.START_ELEMENT: |
| case XMLStreamConstants.END_ELEMENT: |
| LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); |
| return ctx.size(); |
| default: |
| return 0; |
| } |
| } |
| |
| public String getNamespacePrefix(int index) |
| { |
| LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); |
| int count = 0; |
| for (Iterator i = ctx.keySet().iterator(); i.hasNext(); ) |
| { |
| String prefix = (String) i.next(); |
| if (count++ == index) |
| return prefix; |
| } |
| return null; |
| } |
| |
| public String getNamespaceURI() |
| { |
| switch (event) |
| { |
| case XMLStreamConstants.START_ELEMENT: |
| case XMLStreamConstants.END_ELEMENT: |
| String qName = buf.toString(); |
| int ci = qName.indexOf(':'); |
| if (ci == -1) |
| return null; |
| String prefix = qName.substring(0, ci); |
| return getNamespaceURI(prefix); |
| default: |
| return null; |
| } |
| } |
| |
| public String getNamespaceURI(int index) |
| { |
| LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); |
| int count = 0; |
| for (Iterator i = ctx.values().iterator(); i.hasNext(); ) |
| { |
| String uri = (String) i.next(); |
| if (count++ == index) |
| return uri; |
| } |
| return null; |
| } |
| |
| public String getPIData() |
| { |
| return piData; |
| } |
| |
| public String getPITarget() |
| { |
| return piTarget; |
| } |
| |
| public String getPrefix() |
| { |
| switch (event) |
| { |
| case XMLStreamConstants.START_ELEMENT: |
| case XMLStreamConstants.END_ELEMENT: |
| String qName = buf.toString(); |
| int ci = qName.indexOf(':'); |
| return (ci == -1) ? |
| (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) : |
| qName.substring(0, ci); |
| default: |
| return null; |
| } |
| } |
| |
| public Object getProperty(String name) |
| throws IllegalArgumentException |
| { |
| if (name == null) |
| throw new IllegalArgumentException("name is null"); |
| if (XMLInputFactory.ALLOCATOR.equals(name)) |
| return null; |
| if (XMLInputFactory.IS_COALESCING.equals(name)) |
| return coalescing ? Boolean.TRUE : Boolean.FALSE; |
| if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name)) |
| return namespaceAware ? Boolean.TRUE : Boolean.FALSE; |
| if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name)) |
| return replaceERefs ? Boolean.TRUE : Boolean.FALSE; |
| if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name)) |
| return externalEntities ? Boolean.TRUE : Boolean.FALSE; |
| if (XMLInputFactory.IS_VALIDATING.equals(name)) |
| return Boolean.FALSE; |
| if (XMLInputFactory.REPORTER.equals(name)) |
| return reporter; |
| if (XMLInputFactory.RESOLVER.equals(name)) |
| return resolver; |
| if (XMLInputFactory.SUPPORT_DTD.equals(name)) |
| return supportDTD ? Boolean.TRUE : Boolean.FALSE; |
| if ("gnu.xml.stream.stringInterning".equals(name)) |
| return stringInterning ? Boolean.TRUE : Boolean.FALSE; |
| if ("gnu.xml.stream.xmlBase".equals(name)) |
| return baseAware ? Boolean.TRUE : Boolean.FALSE; |
| if ("gnu.xml.stream.baseURI".equals(name)) |
| return getXMLBase(); |
| return null; |
| } |
| |
| public String getText() |
| { |
| return buf.toString(); |
| } |
| |
| public char[] getTextCharacters() |
| { |
| return buf.toString().toCharArray(); |
| } |
| |
| public int getTextCharacters(int sourceStart, char[] target, |
| int targetStart, int length) |
| throws XMLStreamException |
| { |
| length = Math.min(sourceStart + buf.length(), length); |
| int sourceEnd = sourceStart + length; |
| buf.getChars(sourceStart, sourceEnd, target, targetStart); |
| return length; |
| } |
| |
| public int getTextLength() |
| { |
| return buf.length(); |
| } |
| |
| public int getTextStart() |
| { |
| return 0; |
| } |
| |
| public String getVersion() |
| { |
| return (xmlVersion == null) ? "1.0" : xmlVersion; |
| } |
| |
| public boolean hasName() |
| { |
| switch (event) |
| { |
| case XMLStreamConstants.START_ELEMENT: |
| case XMLStreamConstants.END_ELEMENT: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| public boolean hasText() |
| { |
| switch (event) |
| { |
| case XMLStreamConstants.CHARACTERS: |
| case XMLStreamConstants.SPACE: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| public boolean isAttributeSpecified(int index) |
| { |
| Attribute a = (Attribute) attrs.get(index); |
| return a.specified; |
| } |
| |
| public boolean isCharacters() |
| { |
| return (event == XMLStreamConstants.CHARACTERS); |
| } |
| |
| public boolean isEndElement() |
| { |
| return (event == XMLStreamConstants.END_ELEMENT); |
| } |
| |
| public boolean isStandalone() |
| { |
| return Boolean.TRUE.equals(xmlStandalone); |
| } |
| |
| public boolean isStartElement() |
| { |
| return (event == XMLStreamConstants.START_ELEMENT); |
| } |
| |
| public boolean isWhiteSpace() |
| { |
| return (event == XMLStreamConstants.SPACE); |
| } |
| |
| public int nextTag() |
| throws XMLStreamException |
| { |
| do |
| { |
| switch (next()) |
| { |
| case XMLStreamConstants.START_ELEMENT: |
| case XMLStreamConstants.END_ELEMENT: |
| case XMLStreamConstants.CHARACTERS: |
| case XMLStreamConstants.SPACE: |
| case XMLStreamConstants.COMMENT: |
| case XMLStreamConstants.PROCESSING_INSTRUCTION: |
| break; |
| default: |
| throw new XMLStreamException("Unexpected event type: " + event); |
| } |
| } |
| while (event != XMLStreamConstants.START_ELEMENT && |
| event != XMLStreamConstants.END_ELEMENT); |
| return event; |
| } |
| |
| public void require(int type, String namespaceURI, String localName) |
| throws XMLStreamException |
| { |
| if (event != type) |
| throw new XMLStreamException("Current event type is " + event); |
| if (event == XMLStreamConstants.START_ELEMENT || |
| event == XMLStreamConstants.END_ELEMENT) |
| { |
| String ln = getLocalName(); |
| if (!ln.equals(localName)) |
| throw new XMLStreamException("Current local-name is " + ln); |
| String uri = getNamespaceURI(); |
| if ((uri == null && namespaceURI != null) || |
| (uri != null && !uri.equals(namespaceURI))) |
| throw new XMLStreamException("Current namespace URI is " + uri); |
| } |
| } |
| |
| public boolean standaloneSet() |
| { |
| return (xmlStandalone != null); |
| } |
| |
| public boolean hasNext() |
| throws XMLStreamException |
| { |
| if (event == XMLStreamConstants.END_DOCUMENT) |
| return false; |
| if (!lookahead) |
| { |
| next(); |
| lookahead = true; |
| } |
| return event != -1; |
| } |
| |
| public int next() |
| throws XMLStreamException |
| { |
| if (lookahead) |
| { |
| lookahead = false; |
| return event; |
| } |
| if (event == XMLStreamConstants.END_ELEMENT) |
| { |
| // Pop namespace context |
| if (namespaceAware && !namespaces.isEmpty()) |
| namespaces.removeFirst(); |
| // Pop base context |
| if (baseAware && !bases.isEmpty()) |
| bases.removeFirst(); |
| } |
| if (!startEntityStack.isEmpty()) |
| { |
| String entityName = (String) startEntityStack.removeFirst(); |
| buf.setLength(0); |
| buf.append(entityName); |
| event = START_ENTITY; |
| return extendedEventTypes ? event : next(); |
| } |
| else if (!endEntityStack.isEmpty()) |
| { |
| String entityName = (String) endEntityStack.removeFirst(); |
| buf.setLength(0); |
| buf.append(entityName); |
| event = END_ENTITY; |
| return extendedEventTypes ? event : next(); |
| } |
| try |
| { |
| if (!input.initialized) |
| input.init(); |
| switch (state) |
| { |
| case CONTENT: |
| if (tryRead(TEST_END_ELEMENT)) |
| { |
| readEndElement(); |
| if (stack.isEmpty()) |
| state = MISC; |
| event = XMLStreamConstants.END_ELEMENT; |
| } |
| else if (tryRead(TEST_COMMENT)) |
| { |
| readComment(false); |
| event = XMLStreamConstants.COMMENT; |
| } |
| else if (tryRead(TEST_PI)) |
| { |
| readPI(false); |
| event = XMLStreamConstants.PROCESSING_INSTRUCTION; |
| } |
| else if (tryRead(TEST_CDATA)) |
| { |
| readCDSect(); |
| event = XMLStreamConstants.CDATA; |
| } |
| else if (tryRead(TEST_START_ELEMENT)) |
| { |
| state = readStartElement(); |
| event = XMLStreamConstants.START_ELEMENT; |
| } |
| else |
| { |
| // Check for character reference or predefined entity |
| mark(8); |
| int c = readCh(); |
| if (c == 0x26) // '&' |
| { |
| c = readCh(); |
| if (c == 0x23) // '#' |
| { |
| reset(); |
| event = readCharData(null); |
| } |
| else |
| { |
| // entity reference |
| reset(); |
| readCh(); // & |
| readReference(); |
| String ref = buf.toString(); |
| String text = (String) PREDEFINED_ENTITIES.get(ref); |
| if (text != null) |
| { |
| event = readCharData(text); |
| } |
| else if (replaceERefs && !isUnparsedEntity(ref)) |
| { |
| // this will report a start-entity event |
| boolean external = false; |
| if (doctype != null) |
| { |
| Object entity = doctype.getEntity(ref); |
| if (entity instanceof ExternalIds) |
| external = true; |
| } |
| expandEntity(ref, false, external); |
| event = next(); |
| } |
| else |
| { |
| event = XMLStreamConstants.ENTITY_REFERENCE; |
| } |
| } |
| } |
| else |
| { |
| reset(); |
| event = readCharData(null); |
| if (validating && doctype != null) |
| validatePCData(buf.toString()); |
| } |
| } |
| break; |
| case EMPTY_ELEMENT: |
| String elementName = (String) stack.removeLast(); |
| buf.setLength(0); |
| buf.append(elementName); |
| state = stack.isEmpty() ? MISC : CONTENT; |
| event = XMLStreamConstants.END_ELEMENT; |
| if (validating && doctype != null) |
| endElementValidationHook(); |
| break; |
| case INIT: // XMLDecl? |
| if (tryRead(TEST_XML_DECL)) |
| readXMLDecl(); |
| input.finalizeEncoding(); |
| event = XMLStreamConstants.START_DOCUMENT; |
| state = PROLOG; |
| break; |
| case PROLOG: // Misc* (doctypedecl Misc*)? |
| skipWhitespace(); |
| if (doctype == null && tryRead(TEST_DOCTYPE_DECL)) |
| { |
| readDoctypeDecl(); |
| event = XMLStreamConstants.DTD; |
| } |
| else if (tryRead(TEST_COMMENT)) |
| { |
| readComment(false); |
| event = XMLStreamConstants.COMMENT; |
| } |
| else if (tryRead(TEST_PI)) |
| { |
| readPI(false); |
| event = XMLStreamConstants.PROCESSING_INSTRUCTION; |
| } |
| else if (tryRead(TEST_START_ELEMENT)) |
| { |
| state = readStartElement(); |
| event = XMLStreamConstants.START_ELEMENT; |
| } |
| else |
| { |
| int c = readCh(); |
| error("no root element: U+" + Integer.toHexString(c)); |
| } |
| break; |
| case MISC: // Comment | PI | S |
| skipWhitespace(); |
| if (tryRead(TEST_COMMENT)) |
| { |
| readComment(false); |
| event = XMLStreamConstants.COMMENT; |
| } |
| else if (tryRead(TEST_PI)) |
| { |
| readPI(false); |
| event = XMLStreamConstants.PROCESSING_INSTRUCTION; |
| } |
| else |
| { |
| if (event == XMLStreamConstants.END_DOCUMENT) |
| throw new NoSuchElementException(); |
| int c = readCh(); |
| if (c != -1) |
| error("Only comments and PIs may appear after " + |
| "the root element"); |
| event = XMLStreamConstants.END_DOCUMENT; |
| } |
| break; |
| default: |
| event = -1; |
| } |
| return event; |
| } |
| catch (IOException e) |
| { |
| XMLStreamException e2 = new XMLStreamException(); |
| e2.initCause(e); |
| throw e2; |
| } |
| } |
| |
| // package private |
| |
| /** |
| * Returns the current element name. |
| */ |
| String getCurrentElement() |
| { |
| return (String) stack.getLast(); |
| } |
| |
| // private |
| |
| private void mark(int limit) |
| throws IOException |
| { |
| input.mark(limit); |
| } |
| |
| private void reset() |
| throws IOException |
| { |
| input.reset(); |
| } |
| |
| private int read() |
| throws IOException |
| { |
| return input.read(); |
| } |
| |
| private int read(int[] b, int off, int len) |
| throws IOException |
| { |
| return input.read(b, off, len); |
| } |
| |
| /** |
| * Parsed character read. |
| */ |
| private int readCh() |
| throws IOException, XMLStreamException |
| { |
| int c = read(); |
| if (expandPE && c == 0x25) // '%' |
| { |
| if (peIsError) |
| error("PE reference within decl in internal subset."); |
| expandPEReference(); |
| return readCh(); |
| } |
| return c; |
| } |
| |
| /** |
| * Reads the next character, ensuring it is the character specified. |
| * @param delim the character to match |
| * @exception XMLStreamException if the next character is not the |
| * specified one |
| */ |
| private void require(char delim) |
| throws IOException, XMLStreamException |
| { |
| mark(1); |
| int c = readCh(); |
| if (delim != c) |
| { |
| reset(); |
| error("required character (got U+" + Integer.toHexString(c) + ")", |
| new Character(delim)); |
| } |
| } |
| |
| /** |
| * Reads the next few characters, ensuring they match the string specified. |
| * @param delim the string to match |
| * @exception XMLStreamException if the next characters do not match the |
| * specified string |
| */ |
| private void require(String delim) |
| throws IOException, XMLStreamException |
| { |
| char[] chars = delim.toCharArray(); |
| int len = chars.length; |
| mark(len); |
| int off = 0; |
| do |
| { |
| int l2 = read(tmpBuf, off, len - off); |
| if (l2 == -1) |
| { |
| reset(); |
| error("EOF before required string", delim); |
| } |
| off += l2; |
| } |
| while (off < len); |
| for (int i = 0; i < chars.length; i++) |
| { |
| if (chars[i] != tmpBuf[i]) |
| { |
| reset(); |
| error("required string", delim); |
| } |
| } |
| } |
| |
| /** |
| * Try to read a single character. On failure, reset the stream. |
| * @param delim the character to test |
| * @return true if the character matched delim, false otherwise. |
| */ |
| private boolean tryRead(char delim) |
| throws IOException, XMLStreamException |
| { |
| mark(1); |
| int c = readCh(); |
| if (delim != c) |
| { |
| reset(); |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| * Tries to read the specified characters. |
| * If successful, the stream is positioned after the last character, |
| * otherwise it is reset. |
| * @param test the string to test |
| * @return true if the characters matched the test string, false otherwise. |
| */ |
| private boolean tryRead(String test) |
| throws IOException |
| { |
| char[] chars = test.toCharArray(); |
| int len = chars.length; |
| mark(len); |
| int count = 0; |
| int l2 = read(tmpBuf, 0, len); |
| if (l2 == -1) |
| { |
| reset(); |
| return false; |
| } |
| count += l2; |
| // check the characters we received first before doing additional reads |
| for (int i = 0; i < count; i++) |
| { |
| if (chars[i] != tmpBuf[i]) |
| { |
| reset(); |
| return false; |
| } |
| } |
| while (count < len) |
| { |
| // force read |
| int c = read(); |
| if (c == -1) |
| { |
| reset(); |
| return false; |
| } |
| tmpBuf[count] = (char) c; |
| // check each character as it is read |
| if (chars[count] != tmpBuf[count]) |
| { |
| reset(); |
| return false; |
| } |
| count++; |
| } |
| return true; |
| } |
| |
| /** |
| * Reads characters until the specified test string is encountered. |
| * @param delim the string delimiting the end of the characters |
| */ |
| private void readUntil(String delim) |
| throws IOException, XMLStreamException |
| { |
| int startLine = input.line; |
| try |
| { |
| while (!tryRead(delim)) |
| { |
| int c = readCh(); |
| if (c == -1) |
| throw new EOFException(); |
| else if (input.xml11) |
| { |
| if (!isXML11Char(c) || isXML11RestrictedChar(c)) |
| error("illegal XML 1.1 character", |
| "U+" + Integer.toHexString(c)); |
| } |
| else if (!isChar(c)) |
| error("illegal XML character", |
| "U+" + Integer.toHexString(c)); |
| buf.append(Character.toChars(c)); |
| } |
| } |
| catch (EOFException e) |
| { |
| error("end of input while looking for delimiter "+ |
| "(started on line " + startLine + ')', delim); |
| } |
| } |
| |
| /** |
| * Reads any whitespace characters. |
| * @return true if whitespace characters were read, false otherwise |
| */ |
| private boolean tryWhitespace() |
| throws IOException, XMLStreamException |
| { |
| boolean white; |
| boolean ret = false; |
| do |
| { |
| mark(1); |
| int c = readCh(); |
| while (c == -1 && inputStack.size() > 1) |
| { |
| popInput(); |
| c = readCh(); |
| } |
| white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d); |
| if (white) |
| ret = true; |
| } |
| while (white); |
| reset(); |
| return ret; |
| } |
| |
| /** |
| * Skip over any whitespace characters. |
| */ |
| private void skipWhitespace() |
| throws IOException, XMLStreamException |
| { |
| boolean white; |
| do |
| { |
| mark(1); |
| int c = readCh(); |
| while (c == -1 && inputStack.size() > 1) |
| { |
| popInput(); |
| c = readCh(); |
| } |
| white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d); |
| } |
| while (white); |
| reset(); |
| } |
| |
| /** |
| * Try to read as many whitespace characters as are available. |
| * @exception XMLStreamException if no whitespace characters were seen |
| */ |
| private void requireWhitespace() |
| throws IOException, XMLStreamException |
| { |
| if (!tryWhitespace()) |
| error("whitespace required"); |
| } |
| |
| /** |
| * Returns the current base URI for resolving external entities. |
| */ |
| String getXMLBase() |
| { |
| if (baseAware) |
| { |
| for (Iterator i = bases.iterator(); i.hasNext(); ) |
| { |
| String base = (String) i.next(); |
| if (base != null) |
| return base; |
| } |
| } |
| return input.systemId; |
| } |
| |
| /** |
| * Push the specified text input source. |
| */ |
| private void pushInput(String name, String text, boolean report, |
| boolean normalize) |
| throws IOException, XMLStreamException |
| { |
| // Check for recursion |
| if (name != null && !"".equals(name)) |
| { |
| for (Iterator i = inputStack.iterator(); i.hasNext(); ) |
| { |
| Input ctx = (Input) i.next(); |
| if (name.equals(ctx.name)) |
| error("entities may not be self-recursive", name); |
| } |
| } |
| else |
| report = false; |
| pushInput(new Input(null, new StringReader(text), input.publicId, |
| input.systemId, name, input.inputEncoding, report, |
| normalize)); |
| } |
| |
| /** |
| * Push the specified external input source. |
| */ |
| private void pushInput(String name, ExternalIds ids, boolean report, |
| boolean normalize) |
| throws IOException, XMLStreamException |
| { |
| if (!externalEntities) |
| return; |
| String url = absolutize(input.systemId, ids.systemId); |
| // Check for recursion |
| for (Iterator i = inputStack.iterator(); i.hasNext(); ) |
| { |
| Input ctx = (Input) i.next(); |
| if (url.equals(ctx.systemId)) |
| error("entities may not be self-recursive", url); |
| if (name != null && !"".equals(name) && name.equals(ctx.name)) |
| error("entities may not be self-recursive", name); |
| } |
| if (name == null || "".equals(name)) |
| report = false; |
| InputStream in = null; |
| if (resolver != null) |
| { |
| Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(), |
| null); |
| if (obj instanceof InputStream) |
| in = (InputStream) obj; |
| } |
| if (in == null) |
| in = resolve(url); |
| if (in == null) |
| error("unable to resolve external entity", |
| (ids.systemId != null) ? ids.systemId : ids.publicId); |
| pushInput(new Input(in, null, ids.publicId, url, name, null, report, |
| normalize)); |
| input.init(); |
| if (tryRead(TEST_XML_DECL)) |
| readTextDecl(); |
| input.finalizeEncoding(); |
| } |
| |
| /** |
| * Push the specified input source (general entity) onto the input stack. |
| */ |
| private void pushInput(Input input) |
| { |
| if (input.report) |
| startEntityStack.addFirst(input.name); |
| inputStack.addLast(input); |
| if (this.input != null) |
| input.xml11 = this.input.xml11; |
| this.input = input; |
| } |
| |
| /** |
| * "Absolutize" a URL. This resolves a relative URL into an absolute one. |
| * @param base the current base URL |
| * @param href the (absolute or relative) URL to resolve |
| */ |
| public static String absolutize(String base, String href) |
| throws MalformedURLException |
| { |
| if (href == null) |
| return null; |
| int ci = href.indexOf(':'); |
| if (ci > 1 && isURLScheme(href.substring(0, ci))) |
| { |
| // href is absolute already |
| return href; |
| } |
| if (base == null) |
| base = ""; |
| else |
| { |
| int i = base.lastIndexOf('/'); |
| if (i != -1) |
| base = base.substring(0, i + 1); |
| else |
| base = ""; |
| } |
| if ("".equals(base)) |
| { |
| // assume file URL relative to current directory |
| base = System.getProperty("user.dir"); |
| if (base.charAt(0) == '/') |
| base = base.substring(1); |
| base = "file:///" + base.replace(File.separatorChar, '/'); |
| if (!base.endsWith("/")) |
| base += "/"; |
| } |
| return new URL(new URL(base), href).toString(); |
| } |
| |
| /** |
| * Indicates whether the specified characters match the scheme portion of |
| * a URL. |
| * @see RFC 1738 section 2.1 |
| */ |
| private static boolean isURLScheme(String text) |
| { |
| int len = text.length(); |
| for (int i = 0; i < len; i++) |
| { |
| char c = text.charAt(i); |
| if (c == '+' || c == '.' || c == '-') |
| continue; |
| if (c < 65 || (c > 90 && c < 97) || c > 122) |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| * Returns an input stream for the given URL. |
| */ |
| static InputStream resolve(String url) |
| throws IOException |
| { |
| try |
| { |
| return new URL(url).openStream(); |
| } |
| catch (MalformedURLException e) |
| { |
| return null; |
| } |
| catch (IOException e) |
| { |
| IOException e2 = new IOException("error resolving " + url); |
| e2.initCause(e); |
| throw e2; |
| } |
| } |
| |
| /** |
| * Pops the current input source (general entity) off the stack. |
| */ |
| private void popInput() |
| { |
| Input old = (Input) inputStack.removeLast(); |
| if (old.report) |
| endEntityStack.addFirst(old.name); |
| input = (Input) inputStack.getLast(); |
| } |
| |
| /** |
| * Parse an entity text declaration. |
| */ |
| private void readTextDecl() |
| throws IOException, XMLStreamException |
| { |
| final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; |
| requireWhitespace(); |
| if (tryRead("version")) |
| { |
| readEq(); |
| String v = readLiteral(flags, false); |
| if ("1.0".equals(v)) |
| input.xml11 = false; |
| else if ("1.1".equals(v)) |
| { |
| Input i1 = (Input) inputStack.getFirst(); |
| if (!i1.xml11) |
| error("external entity specifies later version number"); |
| input.xml11 = true; |
| } |
| else |
| throw new XMLStreamException("illegal XML version: " + v); |
| requireWhitespace(); |
| } |
| require("encoding"); |
| readEq(); |
| String enc = readLiteral(flags, false); |
| skipWhitespace(); |
| require("?>"); |
| input.setInputEncoding(enc); |
| } |
| |
| /** |
| * Parse the XML declaration. |
| */ |
| private void readXMLDecl() |
| throws IOException, XMLStreamException |
| { |
| final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; |
| |
| requireWhitespace(); |
| require("version"); |
| readEq(); |
| xmlVersion = readLiteral(flags, false); |
| if ("1.0".equals(xmlVersion)) |
| input.xml11 = false; |
| else if ("1.1".equals(xmlVersion)) |
| input.xml11 = true; |
| else |
| throw new XMLStreamException("illegal XML version: " + xmlVersion); |
| |
| boolean white = tryWhitespace(); |
| |
| if (tryRead("encoding")) |
| { |
| if (!white) |
| error("whitespace required before 'encoding='"); |
| readEq(); |
| xmlEncoding = readLiteral(flags, false); |
| white = tryWhitespace(); |
| } |
| |
| if (tryRead("standalone")) |
| { |
| if (!white) |
| error("whitespace required before 'standalone='"); |
| readEq(); |
| String standalone = readLiteral(flags, false); |
| if ("yes".equals(standalone)) |
| xmlStandalone = Boolean.TRUE; |
| else if ("no".equals(standalone)) |
| xmlStandalone = Boolean.FALSE; |
| else |
| error("standalone flag must be 'yes' or 'no'", standalone); |
| } |
| |
| skipWhitespace(); |
| require("?>"); |
| if (xmlEncoding != null) |
| input.setInputEncoding(xmlEncoding); |
| } |
| |
| /** |
| * Parse the DOCTYPE declaration. |
| */ |
| private void readDoctypeDecl() |
| throws IOException, XMLStreamException |
| { |
| if (!supportDTD) |
| error("parser was configured not to support DTDs"); |
| requireWhitespace(); |
| String rootName = readNmtoken(true); |
| skipWhitespace(); |
| ExternalIds ids = readExternalIds(false, true); |
| doctype = |
| this.new Doctype(rootName, ids.publicId, ids.systemId); |
| |
| // Parse internal subset first |
| skipWhitespace(); |
| if (tryRead('[')) |
| { |
| while (true) |
| { |
| expandPE = true; |
| skipWhitespace(); |
| expandPE = false; |
| if (tryRead(']')) |
| break; |
| else |
| readMarkupdecl(false); |
| } |
| } |
| skipWhitespace(); |
| require('>'); |
| |
| // Parse external subset |
| if (ids.systemId != null && externalEntities) |
| { |
| pushInput("", ">", false, false); |
| pushInput("[dtd]", ids, true, true); |
| // loop until we get back to ">" |
| while (true) |
| { |
| expandPE = true; |
| skipWhitespace(); |
| expandPE = false; |
| mark(1); |
| int c = readCh(); |
| if (c == 0x3e) // '>' |
| break; |
| else if (c == -1) |
| popInput(); |
| else |
| { |
| reset(); |
| expandPE = true; |
| readMarkupdecl(true); |
| expandPE = true; |
| } |
| } |
| if (inputStack.size() != 2) |
| error("external subset has unmatched '>'"); |
| popInput(); |
| } |
| checkDoctype(); |
| if (validating) |
| validateDoctype(); |
| |
| // Make rootName available for reading |
| buf.setLength(0); |
| buf.append(rootName); |
| } |
| |
| /** |
| * Checks the well-formedness of the DTD. |
| */ |
| private void checkDoctype() |
| throws XMLStreamException |
| { |
| // TODO check entity recursion |
| } |
| |
| /** |
| * Parse the markupdecl production. |
| */ |
| private void readMarkupdecl(boolean inExternalSubset) |
| throws IOException, XMLStreamException |
| { |
| boolean saved = expandPE; |
| mark(1); |
| require('<'); |
| reset(); |
| expandPE = false; |
| if (tryRead(TEST_ELEMENT_DECL)) |
| { |
| expandPE = saved; |
| readElementDecl(); |
| } |
| else if (tryRead(TEST_ATTLIST_DECL)) |
| { |
| expandPE = saved; |
| readAttlistDecl(); |
| } |
| else if (tryRead(TEST_ENTITY_DECL)) |
| { |
| expandPE = saved; |
| readEntityDecl(inExternalSubset); |
| } |
| else if (tryRead(TEST_NOTATION_DECL)) |
| { |
| expandPE = saved; |
| readNotationDecl(inExternalSubset); |
| } |
| else if (tryRead(TEST_PI)) |
| { |
| readPI(true); |
| expandPE = saved; |
| } |
| else if (tryRead(TEST_COMMENT)) |
| { |
| readComment(true); |
| expandPE = saved; |
| } |
| else if (tryRead("<![")) |
| { |
| // conditional section |
| expandPE = saved; |
| if (inputStack.size() < 2) |
| error("conditional sections illegal in internal subset"); |
| skipWhitespace(); |
| if (tryRead("INCLUDE")) |
| { |
| skipWhitespace(); |
| require('['); |
| skipWhitespace(); |
| while (!tryRead("]]>")) |
| { |
| readMarkupdecl(inExternalSubset); |
| skipWhitespace(); |
| } |
| } |
| else if (tryRead("IGNORE")) |
| { |
| skipWhitespace(); |
| require('['); |
| expandPE = false; |
| for (int nesting = 1; nesting > 0; ) |
| { |
| int c = readCh(); |
| switch (c) |
| { |
| case 0x3c: // '<' |
| if (tryRead("![")) |
| nesting++; |
| break; |
| case 0x5d: // ']' |
| if (tryRead("]>")) |
| nesting--; |
| break; |
| case -1: |
| throw new EOFException(); |
| } |
| } |
| expandPE = saved; |
| } |
| else |
| error("conditional section must begin with INCLUDE or IGNORE"); |
| } |
| else |
| error("expected markup declaration"); |
| } |
| |
| /** |
| * Parse the elementdecl production. |
| */ |
| private void readElementDecl() |
| throws IOException, XMLStreamException |
| { |
| requireWhitespace(); |
| boolean saved = expandPE; |
| expandPE = (inputStack.size() > 1); |
| String name = readNmtoken(true); |
| expandPE = saved; |
| requireWhitespace(); |
| readContentspec(name); |
| skipWhitespace(); |
| require('>'); |
| } |
| |
| /** |
| * Parse the contentspec production. |
| */ |
| private void readContentspec(String elementName) |
| throws IOException, XMLStreamException |
| { |
| if (tryRead("EMPTY")) |
| doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel()); |
| else if (tryRead("ANY")) |
| doctype.addElementDecl(elementName, "ANY", new AnyContentModel()); |
| else |
| { |
| ContentModel model; |
| StringBuffer acc = new StringBuffer(); |
| require('('); |
| acc.append('('); |
| skipWhitespace(); |
| if (tryRead("#PCDATA")) |
| { |
| // mixed content |
| acc.append("#PCDATA"); |
| MixedContentModel mm = new MixedContentModel(); |
| model = mm; |
| skipWhitespace(); |
| if (tryRead(')')) |
| { |
| acc.append(")"); |
| if (tryRead('*')) |
| { |
| mm.min = 0; |
| mm.max = -1; |
| } |
| } |
| else |
| { |
| while (!tryRead(")")) |
| { |
| require('|'); |
| acc.append('|'); |
| skipWhitespace(); |
| String name = readNmtoken(true); |
| acc.append(name); |
| mm.addName(name); |
| skipWhitespace(); |
| } |
| require('*'); |
| acc.append(")*"); |
| mm.min = 0; |
| mm.max = -1; |
| } |
| } |
| else |
| model = readElements(acc); |
| doctype.addElementDecl(elementName, acc.toString(), model); |
| } |
| } |
| |
| /** |
| * Parses an element content model. |
| */ |
| private ElementContentModel readElements(StringBuffer acc) |
| throws IOException, XMLStreamException |
| { |
| int separator; |
| ElementContentModel model = new ElementContentModel(); |
| |
| // Parse first content particle |
| skipWhitespace(); |
| model.addContentParticle(readContentParticle(acc)); |
| // End or separator |
| skipWhitespace(); |
| int c = readCh(); |
| switch (c) |
| { |
| case 0x29: // ')' |
| acc.append(')'); |
| mark(1); |
| c = readCh(); |
| switch (c) |
| { |
| case 0x3f: // '?' |
| acc.append('?'); |
| model.min = 0; |
| model.max = 1; |
| break; |
| case 0x2a: // '*' |
| acc.append('*'); |
| model.min = 0; |
| model.max = -1; |
| break; |
| case 0x2b: // '+' |
| acc.append('+'); |
| model.min = 1; |
| model.max = -1; |
| break; |
| default: |
| reset(); |
| } |
| return model; // done |
| case 0x7c: // '|' |
| model.or = true; |
| // fall through |
| case 0x2c: // ',' |
| separator = c; |
| acc.append(Character.toChars(c)); |
| break; |
| default: |
| error("bad separator in content model", |
| "U+" + Integer.toHexString(c)); |
| return model; |
| } |
| // Parse subsequent content particles |
| while (true) |
| { |
| skipWhitespace(); |
| model.addContentParticle(readContentParticle(acc)); |
| skipWhitespace(); |
| c = readCh(); |
| if (c == 0x29) // ')' |
| { |
| acc.append(')'); |
| break; |
| } |
| else if (c != separator) |
| { |
| error("bad separator in content model", |
| "U+" + Integer.toHexString(c)); |
| return model; |
| } |
| else |
| acc.append(c); |
| } |
| // Check for occurrence indicator |
| mark(1); |
| c = readCh(); |
| switch (c) |
| { |
| case 0x3f: // '?' |
| acc.append('?'); |
| model.min = 0; |
| model.max = 1; |
| break; |
| case 0x2a: // '*' |
| acc.append('*'); |
| model.min = 0; |
| model.max = -1; |
| break; |
| case 0x2b: // '+' |
| acc.append('+'); |
| model.min = 1; |
| model.max = -1; |
| break; |
| default: |
| reset(); |
| } |
| return model; |
| } |
| |
| /** |
| * Parse a cp production. |
| */ |
| private ContentParticle readContentParticle(StringBuffer acc) |
| throws IOException, XMLStreamException |
| { |
| ContentParticle cp = new ContentParticle(); |
| if (tryRead('(')) |
| { |
| acc.append('('); |
| cp.content = readElements(acc); |
| } |
| else |
| { |
| String name = readNmtoken(true); |
| acc.append(name); |
| cp.content = name; |
| mark(1); |
| int c = readCh(); |
| switch (c) |
| { |
| case 0x3f: // '?' |
| acc.append('?'); |
| cp.min = 0; |
| cp.max = 1; |
| break; |
| case 0x2a: // '*' |
| acc.append('*'); |
| cp.min = 0; |
| cp.max = -1; |
| break; |
| case 0x2b: // '+' |
| acc.append('+'); |
| cp.min = 1; |
| cp.max = -1; |
| break; |
| default: |
| reset(); |
| } |
| } |
| return cp; |
| } |
| |
| /** |
| * Parse an attribute-list definition. |
| */ |
| private void readAttlistDecl() |
| throws IOException, XMLStreamException |
| { |
| requireWhitespace(); |
| boolean saved = expandPE; |
| expandPE = (inputStack.size() > 1); |
| String elementName = readNmtoken(true); |
| expandPE = saved; |
| boolean white = tryWhitespace(); |
| while (!tryRead('>')) |
| { |
| if (!white) |
| error("whitespace required before attribute definition"); |
| readAttDef(elementName); |
| white = tryWhitespace(); |
| } |
| } |
| |
| /** |
| * Parse a single attribute definition. |
| */ |
| private void readAttDef(String elementName) |
| throws IOException, XMLStreamException |
| { |
| String name = readNmtoken(true); |
| requireWhitespace(); |
| StringBuffer acc = new StringBuffer(); |
| HashSet values = new HashSet(); |
| String type = readAttType(acc, values); |
| if (validating) |
| { |
| if ("ID".equals(type)) |
| { |
| // VC: One ID per Element Type |
| for (Iterator i = doctype.attlistIterator(elementName); |
| i.hasNext(); ) |
| { |
| Map.Entry entry = (Map.Entry) i.next(); |
| AttributeDecl decl = (AttributeDecl) entry.getValue(); |
| if ("ID".equals(decl.type)) |
| error("element types must not have more than one ID " + |
| "attribute"); |
| } |
| } |
| else if ("NOTATION".equals(type)) |
| { |
| // VC: One Notation Per Element Type |
| for (Iterator i = doctype.attlistIterator(elementName); |
| i.hasNext(); ) |
| { |
| Map.Entry entry = (Map.Entry) i.next(); |
| AttributeDecl decl = (AttributeDecl) entry.getValue(); |
| if ("NOTATION".equals(decl.type)) |
| error("element types must not have more than one NOTATION " + |
| "attribute"); |
| } |
| // VC: No Notation on Empty Element |
| ContentModel model = doctype.getElementModel(elementName); |
| if (model != null && model.type == ContentModel.EMPTY) |
| error("attributes of type NOTATION must not be declared on an " + |
| "element declared EMPTY"); |
| } |
| } |
| String enumer = null; |
| if ("ENUMERATION".equals(type) || "NOTATION".equals(type)) |
| enumer = acc.toString(); |
| else |
| values = null; |
| requireWhitespace(); |
| readDefault(elementName, name, type, enumer, values); |
| } |
| |
| /** |
| * Parse an attribute type. |
| */ |
| private String readAttType(StringBuffer acc, HashSet values) |
| throws IOException, XMLStreamException |
| { |
| if (tryRead('(')) |
| { |
| readEnumeration(false, acc, values); |
| return "ENUMERATION"; |
| } |
| else |
| { |
| String typeString = readNmtoken(true); |
| if ("NOTATION".equals(typeString)) |
| { |
| readNotationType(acc, values); |
| return typeString; |
| } |
| else if ("CDATA".equals(typeString) || |
| "ID".equals(typeString) || |
| "IDREF".equals(typeString) || |
| "IDREFS".equals(typeString) || |
| "ENTITY".equals(typeString) || |
| "ENTITIES".equals(typeString) || |
| "NMTOKEN".equals(typeString) || |
| "NMTOKENS".equals(typeString)) |
| return typeString; |
| else |
| { |
| error("illegal attribute type", typeString); |
| return null; |
| } |
| } |
| } |
| |
| /** |
| * Parse an enumeration. |
| */ |
| private void readEnumeration(boolean isNames, StringBuffer acc, |
| HashSet values) |
| throws IOException, XMLStreamException |
| { |
| acc.append('('); |
| // first token |
| skipWhitespace(); |
| String token = readNmtoken(isNames); |
| acc.append(token); |
| values.add(token); |
| // subsequent tokens |
| skipWhitespace(); |
| while (!tryRead(')')) |
| { |
| require('|'); |
| acc.append('|'); |
| skipWhitespace(); |
| token = readNmtoken(isNames); |
| // VC: No Duplicate Tokens |
| if (validating && values.contains(token)) |
| error("duplicate token", token); |
| acc.append(token); |
| values.add(token); |
| skipWhitespace(); |
| } |
| acc.append(')'); |
| } |
| |
| /** |
| * Parse a notation type for an attribute. |
| */ |
| private void readNotationType(StringBuffer acc, HashSet values) |
| throws IOException, XMLStreamException |
| { |
| requireWhitespace(); |
| require('('); |
| readEnumeration(true, acc, values); |
| } |
| |
| /** |
| * Parse the default value for an attribute. |
| */ |
| private void readDefault(String elementName, String name, |
| String type, String enumeration, HashSet values) |
| throws IOException, XMLStreamException |
| { |
| int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; |
| int flags = LIT_ATTRIBUTE; |
| String value = null, defaultType = null; |
| boolean saved = expandPE; |
| |
| if (!"CDATA".equals(type)) |
| flags |= LIT_NORMALIZE; |
| |
| expandPE = false; |
| if (tryRead('#')) |
| { |
| if (tryRead("FIXED")) |
| { |
| defaultType = "#FIXED"; |
| valueType = ATTRIBUTE_DEFAULT_FIXED; |
| requireWhitespace(); |
| value = readLiteral(flags, false); |
| } |
| else if (tryRead("REQUIRED")) |
| { |
| defaultType = "#REQUIRED"; |
| valueType = ATTRIBUTE_DEFAULT_REQUIRED; |
| } |
| else if (tryRead("IMPLIED")) |
| { |
| defaultType = "#IMPLIED"; |
| valueType = ATTRIBUTE_DEFAULT_IMPLIED; |
| } |
| else |
| error("illegal keyword for attribute default value"); |
| } |
| else |
| value = readLiteral(flags, false); |
| expandPE = saved; |
| if (validating) |
| { |
| if ("ID".equals(type)) |
| { |
| // VC: Attribute Default Value Syntactically Correct |
| if (value != null && !isNmtoken(value, true)) |
| error("default value must match Name production", value); |
| // VC: ID Attribute Default |
| if (valueType != ATTRIBUTE_DEFAULT_REQUIRED && |
| valueType != ATTRIBUTE_DEFAULT_IMPLIED) |
| error("ID attributes must have a declared default of " + |
| "#IMPLIED or #REQUIRED"); |
| } |
| else if (value != null) |
| { |
| // VC: Attribute Default Value Syntactically Correct |
| if ("IDREF".equals(type) || "ENTITY".equals(type)) |
| { |
| if (!isNmtoken(value, true)) |
| error("default value must match Name production", value); |
| } |
| else if ("IDREFS".equals(type) || "ENTITIES".equals(type)) |
| { |
| StringTokenizer st = new StringTokenizer(value); |
| while (st.hasMoreTokens()) |
| { |
| String token = st.nextToken(); |
| if (!isNmtoken(token, true)) |
| error("default value must match Name production", token); |
| } |
| } |
| else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type)) |
| { |
| if (!isNmtoken(value, false)) |
| error("default value must match Nmtoken production", value); |
| } |
| else if ("NMTOKENS".equals(type)) |
| { |
| StringTokenizer st = new StringTokenizer(value); |
| while (st.hasMoreTokens()) |
| { |
| String token = st.nextToken(); |
| if (!isNmtoken(token, false)) |
| error("default value must match Nmtoken production", |
| token); |
| } |
| } |
| } |
| } |
| // Register attribute def |
| AttributeDecl attribute = |
| new AttributeDecl(type, value, valueType, enumeration, values, |
| inputStack.size() != 1); |
| doctype.addAttributeDecl(elementName, name, attribute); |
| } |
| |
| /** |
| * Parse the EntityDecl production. |
| */ |
| private void readEntityDecl(boolean inExternalSubset) |
| throws IOException, XMLStreamException |
| { |
| int flags = 0; |
| // Check if parameter entity |
| boolean peFlag = false; |
| expandPE = false; |
| requireWhitespace(); |
| if (tryRead('%')) |
| { |
| peFlag = true; |
| requireWhitespace(); |
| } |
| expandPE = true; |
| // Read entity name |
| String name = readNmtoken(true); |
| if (name.indexOf(':') != -1) |
| error("illegal character ':' in entity name", name); |
| if (peFlag) |
| name = "%" + name; |
| requireWhitespace(); |
| mark(1); |
| int c = readCh(); |
| reset(); |
| if (c == 0x22 || c == 0x27) // " | ' |
| { |
| // Internal entity replacement text |
| String value = readLiteral(flags | LIT_DISABLE_EREF, true); |
| int ai = value.indexOf('&'); |
| while (ai != -1) |
| { |
| int sci = value.indexOf(';', ai); |
| if (sci == -1) |
| error("malformed reference in entity value", value); |
| String ref = value.substring(ai + 1, sci); |
| int[] cp = UnicodeReader.toCodePointArray(ref); |
| if (cp.length == 0) |
| error("malformed reference in entity value", value); |
| if (cp[0] == 0x23) // # |
| { |
| if (cp.length == 1) |
| error("malformed reference in entity value", value); |
| if (cp[1] == 0x78) // 'x' |
| { |
| if (cp.length == 2) |
| error("malformed reference in entity value", value); |
| for (int i = 2; i < cp.length; i++) |
| { |
| int x = cp[i]; |
| if (x < 0x30 || |
| (x > 0x39 && x < 0x41) || |
| (x > 0x46 && x < 0x61) || |
| x > 0x66) |
| error("malformed character reference in entity value", |
| value); |
| } |
| } |
| else |
| { |
| for (int i = 1; i < cp.length; i++) |
| { |
| int x = cp[i]; |
| if (x < 0x30 || x > 0x39) |
| error("malformed character reference in entity value", |
| value); |
| } |
| } |
| } |
| else |
| { |
| if (!isNameStartCharacter(cp[0], input.xml11)) |
| error("malformed reference in entity value", value); |
| for (int i = 1; i < cp.length; i++) |
| { |
| if (!isNameCharacter(cp[i], input.xml11)) |
| error("malformed reference in entity value", value); |
| } |
| } |
| ai = value.indexOf('&', sci); |
| } |
| doctype.addEntityDecl(name, value, inExternalSubset); |
| } |
| else |
| { |
| ExternalIds ids = readExternalIds(false, false); |
| // Check for NDATA |
| boolean white = tryWhitespace(); |
| if (!peFlag && tryRead("NDATA")) |
| { |
| if (!white) |
| error("whitespace required before NDATA"); |
| requireWhitespace(); |
| ids.notationName = readNmtoken(true); |
| } |
| doctype.addEntityDecl(name, ids, inExternalSubset); |
| } |
| // finish |
| skipWhitespace(); |
| require('>'); |
| } |
| |
| /** |
| * Parse the NotationDecl production. |
| */ |
| private void readNotationDecl(boolean inExternalSubset) |
| throws IOException, XMLStreamException |
| { |
| requireWhitespace(); |
| String notationName = readNmtoken(true); |
| if (notationName.indexOf(':') != -1) |
| error("illegal character ':' in notation name", notationName); |
| if (validating) |
| { |
| // VC: Unique Notation Name |
| ExternalIds notation = doctype.getNotation(notationName); |
| if (notation != null) |
| error("duplicate notation name", notationName); |
| } |
| requireWhitespace(); |
| ExternalIds ids = readExternalIds(true, false); |
| ids.notationName = notationName; |
| doctype.addNotationDecl(notationName, ids, inExternalSubset); |
| skipWhitespace(); |
| require('>'); |
| } |
| |
| /** |
| * Returns a tuple {publicId, systemId}. |
| */ |
| private ExternalIds readExternalIds(boolean inNotation, boolean isSubset) |
| throws IOException, XMLStreamException |
| { |
| int c; |
| int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; |
| ExternalIds ids = new ExternalIds(); |
| |
| if (tryRead("PUBLIC")) |
| { |
| requireWhitespace(); |
| ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false); |
| if (inNotation) |
| { |
| skipWhitespace(); |
| mark(1); |
| c = readCh(); |
| reset(); |
| if (c == 0x22 || c == 0x27) // " | ' |
| { |
| String href = readLiteral(flags, false); |
| ids.systemId = absolutize(input.systemId, href); |
| } |
| } |
| else |
| { |
| requireWhitespace(); |
| String href = readLiteral(flags, false); |
| ids.systemId = absolutize(input.systemId, href); |
| } |
| // Check valid URI characters |
| for (int i = 0; i < ids.publicId.length(); i++) |
| { |
| char d = ids.publicId.charAt(i); |
| if (d >= 'a' && d <= 'z') |
| continue; |
| if (d >= 'A' && d <= 'Z') |
| continue; |
| if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1) |
| continue; |
| error("illegal PUBLIC id character", |
| "U+" + Integer.toHexString(d)); |
| } |
| } |
| else if (tryRead("SYSTEM")) |
| { |
| requireWhitespace(); |
| String href = readLiteral(flags, false); |
| ids.systemId = absolutize(input.systemId, href); |
| } |
| else if (!isSubset) |
| { |
| error("missing SYSTEM or PUBLIC keyword"); |
| } |
| if (ids.systemId != null && !inNotation) |
| { |
| if (ids.systemId.indexOf('#') != -1) |
| error("SYSTEM id has a URI fragment", ids.systemId); |
| } |
| return ids; |
| } |
| |
| /** |
| * Parse the start of an element. |
| * @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT) |
| */ |
| private int readStartElement() |
| throws IOException, XMLStreamException |
| { |
| // Read element name |
| String elementName = readNmtoken(true); |
| attrs.clear(); |
| // Push namespace context |
| if (namespaceAware) |
| { |
| if (elementName.charAt(0) == ':' || |
| elementName.charAt(elementName.length() - 1) == ':') |
| error("not a QName", elementName); |
| namespaces.addFirst(new LinkedHashMap()); |
| } |
| // Read element content |
| boolean white = tryWhitespace(); |
| mark(1); |
| int c = readCh(); |
| while (c != 0x2f && c != 0x3e) // '/' | '>' |
| { |
| // Read attribute |
| reset(); |
| if (!white) |
| error("need whitespace between attributes"); |
| readAttribute(elementName); |
| white = tryWhitespace(); |
| mark(1); |
| c = readCh(); |
| } |
| // supply defaulted attributes |
| if (doctype != null) |
| { |
| for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); ) |
| { |
| Map.Entry entry = (Map.Entry) i.next(); |
| String attName = (String) entry.getKey(); |
| AttributeDecl decl = (AttributeDecl) entry.getValue(); |
| if (validating) |
| { |
| switch (decl.valueType) |
| { |
| case ATTRIBUTE_DEFAULT_REQUIRED: |
| // VC: Required Attribute |
| if (decl.value == null && !attributeSpecified(attName)) |
| error("value for " + attName + " attribute is required"); |
| break; |
| case ATTRIBUTE_DEFAULT_FIXED: |
| // VC: Fixed Attribute Default |
| for (Iterator j = attrs.iterator(); j.hasNext(); ) |
| { |
| Attribute a = (Attribute) j.next(); |
| if (attName.equals(a.name) && |
| !decl.value.equals(a.value)) |
| error("value for " + attName + " attribute must be " + |
| decl.value); |
| } |
| break; |
| } |
| } |
| if (namespaceAware && attName.equals("xmlns")) |
| { |
| LinkedHashMap ctx = |
| (LinkedHashMap) namespaces.getFirst(); |
| if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX)) |
| continue; // namespace was specified |
| } |
| else if (namespaceAware && attName.startsWith("xmlns:")) |
| { |
| LinkedHashMap ctx = |
| (LinkedHashMap) namespaces.getFirst(); |
| if (ctx.containsKey(attName.substring(6))) |
| continue; // namespace was specified |
| } |
| else if (attributeSpecified(attName)) |
| continue; |
| if (decl.value == null) |
| continue; |
| // VC: Standalone Document Declaration |
| if (validating && decl.external && xmlStandalone == Boolean.TRUE) |
| error("standalone must be 'no' if attributes inherit values " + |
| "from externally declared markup declarations"); |
| Attribute attr = |
| new Attribute(attName, decl.type, false, decl.value); |
| if (namespaceAware) |
| { |
| if (!addNamespace(attr)) |
| attrs.add(attr); |
| } |
| else |
| attrs.add(attr); |
| } |
| } |
| if (baseAware) |
| { |
| String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base"); |
| String base = getXMLBase(); |
| bases.addFirst(absolutize(base, uri)); |
| } |
| if (namespaceAware) |
| { |
| // check prefix bindings |
| int ci = elementName.indexOf(':'); |
| if (ci != -1) |
| { |
| String prefix = elementName.substring(0, ci); |
| String uri = getNamespaceURI(prefix); |
| if (uri == null) |
| error("unbound element prefix", prefix); |
| else if (input.xml11 && "".equals(uri)) |
| error("XML 1.1 unbound element prefix", prefix); |
| } |
| for (Iterator i = attrs.iterator(); i.hasNext(); ) |
| { |
| Attribute attr = (Attribute) i.next(); |
| if (attr.prefix != null && |
| !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix)) |
| { |
| String uri = getNamespaceURI(attr.prefix); |
| if (uri == null) |
| error("unbound attribute prefix", attr.prefix); |
| else if (input.xml11 && "".equals(uri)) |
| error("XML 1.1 unbound attribute prefix", attr.prefix); |
| } |
| } |
| } |
| if (validating && doctype != null) |
| { |
| validateStartElement(elementName); |
| currentContentModel = doctype.getElementModel(elementName); |
| if (currentContentModel == null) |
| error("no element declaration", elementName); |
| validationStack.add(new LinkedList()); |
| } |
| // make element name available for read |
| buf.setLength(0); |
| buf.append(elementName); |
| // push element onto stack |
| stack.addLast(elementName); |
| switch (c) |
| { |
| case 0x3e: // '>' |
| return CONTENT; |
| case 0x2f: // '/' |
| require('>'); |
| return EMPTY_ELEMENT; |
| } |
| return -1; // to satisfy compiler |
| } |
| |
| /** |
| * Indicates whether the specified attribute name was specified for the |
| * current element. |
| */ |
| private boolean attributeSpecified(String attName) |
| { |
| for (Iterator j = attrs.iterator(); j.hasNext(); ) |
| { |
| Attribute a = (Attribute) j.next(); |
| if (attName.equals(a.name)) |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Parse an attribute. |
| */ |
| private void readAttribute(String elementName) |
| throws IOException, XMLStreamException |
| { |
| // Read attribute name |
| String attributeName = readNmtoken(true); |
| String type = getAttributeType(elementName, attributeName); |
| readEq(); |
| // Read literal |
| final int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF; |
| String value = (type == null || "CDATA".equals(type)) ? |
| readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false); |
| // add attribute event |
| Attribute attr = this.new Attribute(attributeName, type, true, value); |
| if (namespaceAware) |
| { |
| if (attributeName.charAt(0) == ':' || |
| attributeName.charAt(attributeName.length() - 1) == ':') |
| error("not a QName", attributeName); |
| else if (attributeName.equals("xmlns")) |
| { |
| LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); |
| if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX)) |
| error("duplicate default namespace"); |
| } |
| else if (attributeName.startsWith("xmlns:")) |
| { |
| LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); |
| if (ctx.containsKey(attributeName.substring(6))) |
| error("duplicate namespace", attributeName.substring(6)); |
| } |
| else if (attrs.contains(attr)) |
| error("duplicate attribute", attributeName); |
| } |
| else if (attrs.contains(attr)) |
| error("duplicate attribute", attributeName); |
| if (validating && doctype != null) |
| { |
| // VC: Attribute Value Type |
| AttributeDecl decl = |
| doctype.getAttributeDecl(elementName, attributeName); |
| if (decl == null) |
| error("attribute must be declared", attributeName); |
| if ("ENUMERATION".equals(decl.type)) |
| { |
| // VC: Enumeration |
| if (!decl.values.contains(value)) |
| error("value does not match enumeration " + decl.enumeration, |
| value); |
| } |
| else if ("ID".equals(decl.type)) |
| { |
| // VC: ID |
| if (!isNmtoken(value, true)) |
| error("ID values must match the Name production"); |
| if (ids.contains(value)) |
| error("Duplicate ID", value); |
| ids.add(value); |
| } |
| else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type)) |
| { |
| StringTokenizer st = new StringTokenizer(value); |
| while (st.hasMoreTokens()) |
| { |
| String token = st.nextToken(); |
| // VC: IDREF |
| if (!isNmtoken(token, true)) |
| error("IDREF values must match the Name production"); |
| idrefs.add(token); |
| } |
| } |
| else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type)) |
| { |
| StringTokenizer st = new StringTokenizer(value); |
| while (st.hasMoreTokens()) |
| { |
| String token = st.nextToken(); |
| // VC: Name Token |
| if (!isNmtoken(token, false)) |
| error("NMTOKEN values must match the Nmtoken production"); |
| } |
| } |
| else if ("ENTITY".equals(decl.type)) |
| { |
| // VC: Entity Name |
| if (!isNmtoken(value, true)) |
| error("ENTITY values must match the Name production"); |
| Object entity = doctype.getEntity(value); |
| if (entity == null || !(entity instanceof ExternalIds) || |
| ((ExternalIds) entity).notationName == null) |
| error("ENTITY values must match the name of an unparsed " + |
| "entity declared in the DTD"); |
| } |
| else if ("NOTATION".equals(decl.type)) |
| { |
| if (!decl.values.contains(value)) |
| error("NOTATION values must match a declared notation name", |
| value); |
| // VC: Notation Attributes |
| ExternalIds notation = doctype.getNotation(value); |
| if (notation == null) |
| error("NOTATION values must match the name of a notation " + |
| "declared in the DTD", value); |
| } |
| } |
| if (namespaceAware) |
| { |
| if (!addNamespace(attr)) |
| attrs.add(attr); |
| } |
| else |
| attrs.add(attr); |
| } |
| |
| /** |
| * Determines whether the specified attribute is a namespace declaration, |
| * and adds it to the current namespace context if so. Returns false if |
| * the attribute is an ordinary attribute. |
| */ |
| private boolean addNamespace(Attribute attr) |
| throws XMLStreamException |
| { |
| if ("xmlns".equals(attr.name)) |
| { |
| LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); |
| if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null) |
| error("Duplicate default namespace declaration"); |
| if (XMLConstants.XML_NS_URI.equals(attr.value)) |
| error("can't bind XML namespace"); |
| ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value); |
| return true; |
| } |
| else if ("xmlns".equals(attr.prefix)) |
| { |
| LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); |
| if (ctx.get(attr.localName) != null) |
| error("Duplicate namespace declaration for prefix", |
| attr.localName); |
| if (XMLConstants.XML_NS_PREFIX.equals(attr.localName)) |
| { |
| if (!XMLConstants.XML_NS_URI.equals(attr.value)) |
| error("can't redeclare xml prefix"); |
| else |
| return false; // treat as attribute |
| } |
| if (XMLConstants.XML_NS_URI.equals(attr.value)) |
| error("can't bind non-xml prefix to XML namespace"); |
| if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName)) |
| error("can't redeclare xmlns prefix"); |
| if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value)) |
| error("can't bind non-xmlns prefix to XML Namespace namespace"); |
| if ("".equals(attr.value) && !input.xml11) |
| error("illegal use of 1.1-style prefix unbinding in 1.0 document"); |
| ctx.put(attr.localName, attr.value); |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Parse a closing tag. |
| */ |
| private void readEndElement() |
| throws IOException, XMLStreamException |
| { |
| // pop element off stack |
| String expected = (String) stack.removeLast(); |
| require(expected); |
| skipWhitespace(); |
| require('>'); |
| // Make element name available |
| buf.setLength(0); |
| buf.append(expected); |
| if (validating && doctype != null) |
| endElementValidationHook(); |
| } |
| |
| /** |
| * Validate the end of an element. |
| * Called on an end-element or empty element if validating. |
| */ |
| private void endElementValidationHook() |
| throws XMLStreamException |
| { |
| validateEndElement(); |
| validationStack.removeLast(); |
| if (stack.isEmpty()) |
| currentContentModel = null; |
| else |
| { |
| String parent = (String) stack.getLast(); |
| currentContentModel = doctype.getElementModel(parent); |
| } |
| } |
| |
| /** |
| * Parse a comment. |
| */ |
| private void readComment(boolean inDTD) |
| throws IOException, XMLStreamException |
| { |
| boolean saved = expandPE; |
| expandPE = false; |
| buf.setLength(0); |
| readUntil(TEST_END_COMMENT); |
| require('>'); |
| expandPE = saved; |
| if (inDTD) |
| doctype.addComment(buf.toString()); |
| } |
| |
| /** |
| * Parse a processing instruction. |
| */ |
| private void readPI(boolean inDTD) |
| throws IOException, XMLStreamException |
| { |
| boolean saved = expandPE; |
| expandPE = false; |
| piTarget = readNmtoken(true); |
| if (piTarget.indexOf(':') != -1) |
| error("illegal character in PI target", new Character(':')); |
| if ("xml".equalsIgnoreCase(piTarget)) |
| error("illegal PI target", piTarget); |
| if (tryRead(TEST_END_PI)) |
| piData = null; |
| else |
| { |
| if (!tryWhitespace()) |
| error("whitespace required between PI target and data"); |
| buf.setLength(0); |
| readUntil(TEST_END_PI); |
| piData = buf.toString(); |
| } |
| expandPE = saved; |
| if (inDTD) |
| doctype.addPI(piTarget, piData); |
| } |
| |
| /** |
| * Parse an entity reference. |
| */ |
| private void readReference() |
| throws IOException, XMLStreamException |
| { |
| buf.setLength(0); |
| String entityName = readNmtoken(true); |
| require(';'); |
| buf.setLength(0); |
| buf.append(entityName); |
| } |
| |
| /** |
| * Read an CDATA section. |
| */ |
| private void readCDSect() |
| throws IOException, XMLStreamException |
| { |
| buf.setLength(0); |
| readUntil(TEST_END_CDATA); |
| } |
| |
| /** |
| * Read character data. |
| * @return the type of text read (CHARACTERS or SPACE) |
| */ |
| private int readCharData(String prefix) |
| throws IOException, XMLStreamException |
| { |
| boolean white = true; |
| buf.setLength(0); |
| if (prefix != null) |
| buf.append(prefix); |
| boolean done = false; |
| boolean entities = false; |
| while (!done) |
| { |
| // Block read |
| mark(tmpBuf.length); |
| int len = read(tmpBuf, 0, tmpBuf.length); |
| if (len == -1) |
| { |
| if (inputStack.size() > 1) |
| { |
| popInput(); |
| // report end-entity |
| done = true; |
| } |
| else |
| throw new EOFException(); |
| } |
| for (int i = 0; i < len && !done; i++) |
| { |
| int c = tmpBuf[i]; |
| switch (c) |
| { |
| case 0x20: |
| case 0x09: |
| case 0x0a: |
| case 0x0d: |
| buf.append(Character.toChars(c)); |
| break; // whitespace |
| case 0x26: // '&' |
| reset(); |
| read(tmpBuf, 0, i); |
| // character reference? |
| mark(3); |
| c = readCh(); // & |
| c = readCh(); |
| if (c == 0x23) // '#' |
| { |
| mark(1); |
| c = readCh(); |
| boolean hex = (c == 0x78); // 'x' |
| if (!hex) |
| reset(); |
| char[] ch = readCharacterRef(hex ? 16 : 10); |
| buf.append(ch, 0, ch.length); |
| for (int j = 0; j < ch.length; j++) |
| { |
| switch (ch[j]) |
| { |
| case 0x20: |
| case 0x09: |
| case 0x0a: |
| case 0x0d: |
| break; // whitespace |
| default: |
| white = false; |
| } |
| } |
| } |
| else |
| { |
| // entity reference |
| reset(); |
| c = readCh(); // & |
| String entityName = readNmtoken(true); |
| require(';'); |
| String text = |
| (String) PREDEFINED_ENTITIES.get(entityName); |
| if (text != null) |
| buf.append(text); |
| else |
| { |
| pushInput("", "&" + entityName + ";", false, false); |
| done = true; |
| break; |
| } |
| } |
| // continue processing |
| i = -1; |
| mark(tmpBuf.length); |
| len = read(tmpBuf, 0, tmpBuf.length); |
| if (len == -1) |
| { |
| if (inputStack.size() > 1) |
| { |
| popInput(); |
| done = true; |
| } |
| else |
| throw new EOFException(); |
| } |
| entities = true; |
| break; // end of text sequence |
| case 0x3e: // '>' |
| int l = buf.length(); |
| if (l > 1 && |
| buf.charAt(l - 1) == ']' && |
| buf.charAt(l - 2) == ']') |
| error("Character data may not contain unescaped ']]>'"); |
| buf.append(Character.toChars(c)); |
| break; |
| case 0x3c: // '<' |
| reset(); |
| // read i characters |
| int count = 0, remaining = i; |
| do |
| { |
| int r = read(tmpBuf, 0, remaining); |
| count += r; |
| remaining -= r; |
| } |
| while (count < i); |
| i = len; |
| if (coalescing && tryRead(TEST_CDATA)) |
| readUntil(TEST_END_CDATA); // read CDATA section into buf |
| else |
| done = true; // end of text sequence |
| break; |
| default: |
| if (input.xml11) |
| { |
| if (!isXML11Char(c) || isXML11RestrictedChar(c)) |
| error("illegal XML 1.1 character", |
| "U+" + Integer.toHexString(c)); |
| } |
| else if (!isChar(c)) |
| error("illegal XML character", |
| "U+" + Integer.toHexString(c)); |
| white = false; |
| buf.append(Character.toChars(c)); |
| } |
| } |
| // if text buffer >= 2MB, return it as a chunk |
| // to avoid excessive memory use |
| if (buf.length() >= 2097152) |
| done = true; |
| } |
| if (entities) |
| normalizeCRLF(buf); |
| return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS; |
| } |
| |
| /** |
| * Expands the specified entity. |
| */ |
| private void expandEntity(String name, boolean inAttr, boolean normalize) |
| throws IOException, XMLStreamException |
| { |
| if (doctype != null) |
| { |
| Object value = doctype.getEntity(name); |
| if (value != null) |
| { |
| if (xmlStandalone == Boolean.TRUE) |
| { |
| // VC: Standalone Document Declaration |
| if (doctype.isEntityExternal(name)) |
| error("reference to external entity in standalone document"); |
| else if (value instanceof ExternalIds) |
| { |
| ExternalIds ids = (ExternalIds) value; |
| if (ids.notationName != null && |
| doctype.isNotationExternal(ids.notationName)) |
| error("reference to external notation in " + |
| "standalone document"); |
| } |
| } |
| if (value instanceof String) |
| { |
| String text = (String) value; |
| if (inAttr && text.indexOf('<') != -1) |
| error("< in attribute value"); |
| pushInput(name, text, !inAttr, normalize); |
| } |
| else if (inAttr) |
| error("reference to external entity in attribute value", name); |
| else |
| pushInput(name, (ExternalIds) value, !inAttr, normalize); |
| return; |
| } |
| } |
| error("reference to undeclared entity", name); |
| } |
| |
| /** |
| * Indicates whether the specified entity is unparsed. |
| */ |
| private boolean isUnparsedEntity(String name) |
| { |
| if (doctype != null) |
| { |
| Object value = doctype.getEntity(name); |
| if (value != null && value instanceof ExternalIds) |
| return ((ExternalIds) value).notationName != null; |
| } |
| return false; |
| } |
| |
| /** |
| * Read an equals sign. |
| */ |
| private void readEq() |
| throws IOException, XMLStreamException |
| { |
| skipWhitespace(); |
| require('='); |
| skipWhitespace(); |
| } |
| |
| /** |
| * Character read for reading literals. |
| * @param recognizePEs whether to recognize parameter-entity references |
| */ |
| private int literalReadCh(boolean recognizePEs) |
| throws IOException, XMLStreamException |
| { |
| int c = recognizePEs ? readCh() : read(); |
| while (c == -1) |
| { |
| if (inputStack.size() > 1) |
| { |
| inputStack.removeLast(); |
| input = (Input) inputStack.getLast(); |
| // Don't issue end-entity |
| c = recognizePEs ? readCh() : read(); |
| } |
| else |
| throw new EOFException(); |
| } |
| return c; |
| } |
| |
| /** |
| * Read a string literal. |
| */ |
| private String readLiteral(int flags, boolean recognizePEs) |
| throws IOException, XMLStreamException |
| { |
| boolean saved = expandPE; |
| int delim = readCh(); |
| if (delim != 0x27 && delim != 0x22) |
| error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim)); |
| literalBuf.setLength(0); |
| if ((flags & LIT_DISABLE_PE) != 0) |
| expandPE = false; |
| boolean entities = false; |
| int inputStackSize = inputStack.size(); |
| do |
| { |
| int c = literalReadCh(recognizePEs); |
| if (c == delim && inputStackSize == inputStack.size()) |
| break; |
| switch (c) |
| { |
| case 0x0a: |
| case 0x0d: |
| if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0) |
| c = 0x20; // normalize to space |
| break; |
| case 0x09: |
| if ((flags & LIT_ATTRIBUTE) != 0) |
| c = 0x20; // normalize to space |
| break; |
| case 0x26: // '&' |
| mark(2); |
| c = readCh(); |
| if (c == 0x23) // '#' |
| { |
| if ((flags & LIT_DISABLE_CREF) != 0) |
| { |
| reset(); |
| c = 0x26; // '&' |
| } |
| else |
| { |
| mark(1); |
| c = readCh(); |
| boolean hex = (c == 0x78); // 'x' |
| if (!hex) |
| reset(); |
| char[] ref = readCharacterRef(hex ? 16 : 10); |
| for (int i = 0; i < ref.length; i++) |
| literalBuf.append(ref[i]); |
| entities = true; |
| continue; |
| } |
| } |
| else |
| { |
| if ((flags & LIT_DISABLE_EREF) != 0) |
| { |
| reset(); |
| c = 0x26; // '&' |
| } |
| else |
| { |
| reset(); |
| String entityName = readNmtoken(true); |
| require(';'); |
| String text = |
| (String) PREDEFINED_ENTITIES.get(entityName); |
| if (text != null) |
| literalBuf.append(text); |
| else |
| expandEntity(entityName, |
| (flags & LIT_ATTRIBUTE) != 0, |
| true); |
| entities = true; |
| continue; |
| } |
| } |
| break; |
| case 0x3c: // '<' |
| if ((flags & LIT_ATTRIBUTE) != 0) |
| error("attribute values may not contain '<'"); |
| break; |
| case -1: |
| if (inputStack.size() > 1) |
| { |
| popInput(); |
| continue; |
| } |
| throw new EOFException(); |
| default: |
| if ((c < 0x0020 || c > 0xfffd) || |
| (c >= 0xd800 && c < 0xdc00) || |
| (input.xml11 && (c >= 0x007f) && |
| (c <= 0x009f) && (c != 0x0085))) |
| error("illegal character", "U+" + Integer.toHexString(c)); |
| } |
| literalBuf.append(Character.toChars(c)); |
| } |
| while (true); |
| expandPE = saved; |
| if (entities) |
| normalizeCRLF(literalBuf); |
| if ((flags & LIT_NORMALIZE) > 0) |
| literalBuf = normalize(literalBuf); |
| return literalBuf.toString(); |
| } |
| |
| /** |
| * Performs attribute-value normalization of the text buffer. |
| * This discards leading and trailing whitespace, and replaces sequences |
| * of whitespace with a single space. |
| */ |
| private StringBuffer normalize(StringBuffer buf) |
| { |
| StringBuffer acc = new StringBuffer(); |
| int len = buf.length(); |
| int avState = 0; |
| for (int i = 0; i < len; i++) |
| { |
| char c = buf.charAt(i); |
| if (c == ' ') |
| avState = (avState == 0) ? 0 : 1; |
| else |
| { |
| if (avState == 1) |
| acc.append(' '); |
| acc.append(c); |
| avState = 2; |
| } |
| } |
| return acc; |
| } |
| |
| /** |
| * Replace any CR/LF pairs in the buffer with LF. |
| * This may be necessary if combinations of CR or LF were declared as |
| * (character) entity references in the input. |
| */ |
| private void normalizeCRLF(StringBuffer buf) |
| { |
| int len = buf.length() - 1; |
| for (int i = 0; i < len; i++) |
| { |
| char c = buf.charAt(i); |
| if (c == '\r' && buf.charAt(i + 1) == '\n') |
| { |
| buf.deleteCharAt(i--); |
| len--; |
| } |
| } |
| } |
| |
| /** |
| * Parse and expand a parameter entity reference. |
| */ |
| private void expandPEReference() |
| throws IOException, XMLStreamException |
| { |
| String name = readNmtoken(true, new StringBuffer()); |
| require(';'); |
| mark(1); // ensure we don't reset to before the semicolon |
| if (doctype != null) |
| { |
| String entityName = "%" + name; |
| Object entity = doctype.getEntity(entityName); |
| if (entity != null) |
| { |
| if (xmlStandalone == Boolean.TRUE) |
| { |
| if (doctype.isEntityExternal(entityName)) |
| error("reference to external parameter entity in " + |
| "standalone document"); |
| } |
| if (entity instanceof String) |
| { |
| pushInput(name, (String) entity, false, input.normalize); |
| //pushInput(name, " " + (String) entity + " "); |
| } |
| else |
| { |
| //pushInput("", " "); |
| pushInput(name, (ExternalIds) entity, false, input.normalize); |
| //pushInput("", " "); |
| } |
| } |
| else |
| error("reference to undeclared parameter entity", name); |
| } |
| else |
| error("reference to parameter entity without doctype", name); |
| } |
| |
| /** |
| * Parse the digits in a character reference. |
| * @param base the base of the digits (10 or 16) |
| */ |
| private char[] readCharacterRef(int base) |
| throws IOException, XMLStreamException |
| { |
| StringBuffer b = new StringBuffer(); |
| for (int c = readCh(); c != 0x3b && c != -1; c = readCh()) |
| b.append(Character.toChars(c)); |
| try |
| { |
| int ord = Integer.parseInt(b.toString(), base); |
| if (input.xml11) |
| { |
| if (!isXML11Char(ord)) |
| error("illegal XML 1.1 character reference " + |
| "U+" + Integer.toHexString(ord)); |
| } |
| else |
| { |
| if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d)) |
| || (ord >= 0xd800 && ord <= 0xdfff) |
| || ord == 0xfffe || ord == 0xffff |
| || ord > 0x0010ffff) |
| error("illegal XML character reference " + |
| "U+" + Integer.toHexString(ord)); |
| } |
| return Character.toChars(ord); |
| } |
| catch (NumberFormatException e) |
| { |
| error("illegal characters in character reference", b.toString()); |
| return null; |
| } |
| } |
| |
| /** |
| * Parses an NMTOKEN or Name production. |
| * @param isName if a Name, otherwise an NMTOKEN |
| */ |
| private String readNmtoken(boolean isName) |
| throws IOException, XMLStreamException |
| { |
| return readNmtoken(isName, nmtokenBuf); |
| } |
| |
| /** |
| * Parses an NMTOKEN or Name production using the specified buffer. |
| * @param isName if a Name, otherwise an NMTOKEN |
| * @param buf the character buffer to use |
| */ |
| private String readNmtoken(boolean isName, StringBuffer buf) |
| throws IOException, XMLStreamException |
| { |
| buf.setLength(0); |
| int c = readCh(); |
| if (isName) |
| { |
| if (!isNameStartCharacter(c, input.xml11)) |
| error("not a name start character", |
| "U+" + Integer.toHexString(c)); |
| } |
| else |
| { |
| if (!isNameCharacter(c, input.xml11)) |
| error("not a name character", |
| "U+" + Integer.toHexString(c)); |
| } |
| buf.append(Character.toChars(c)); |
| do |
| { |
| mark(1); |
| c = readCh(); |
| switch (c) |
| { |
| case 0x25: // '%' |
| case 0x3c: // '<' |
| case 0x3e: // '>' |
| case 0x26: // '&' |
| case 0x2c: // ',' |
| case 0x7c: // '|' |
| case 0x2a: // '*' |
| case 0x2b: // '+' |
| case 0x3f: // '?' |
| case 0x29: // ')' |
| case 0x3d: // '=' |
| case 0x27: // '\'' |
| case 0x22: // '"' |
| case 0x5b: // '[' |
| case 0x20: // ' ' |
| case 0x09: // '\t' |
| case 0x0a: // '\n' |
| case 0x0d: // '\r' |
| case 0x3b: // ';' |
| case 0x2f: // '/' |
| case -1: |
| reset(); |
| return intern(buf.toString()); |
| default: |
| if (!isNameCharacter(c, input.xml11)) |
| error("not a name character", |
| "U+" + Integer.toHexString(c)); |
| else |
| buf.append(Character.toChars(c)); |
| } |
| } |
| while (true); |
| } |
| |
| /** |
| * Indicates whether the specified Unicode character is an XML 1.1 Char. |
| */ |
| public static boolean isXML11Char(int c) |
| { |
| return ((c >= 0x0001 && c <= 0xD7FF) || |
| (c >= 0xE000 && c < 0xFFFE) || |
| (c >= 0x10000 && c <= 0x10FFFF)); |
| } |
| |
| /** |
| * Indicates whether the specified Unicode character is an XML 1.1 |
| * RestrictedChar. |
| */ |
| public static boolean isXML11RestrictedChar(int c) |
| { |
| return ((c >= 0x0001 && c <= 0x0008) || |
| (c >= 0x000B && c <= 0x000C) || |
| (c >= 0x000E && c <= 0x001F) || |
| (c >= 0x007F && c <= 0x0084) || |
| (c >= 0x0086 && c <= 0x009F)); |
| } |
| |
| /** |
| * Indicates whether the specified text matches the Name or Nmtoken |
| * production. |
| */ |
| private boolean isNmtoken(String text, boolean isName) |
| { |
| try |
| { |
| int[] cp = UnicodeReader.toCodePointArray(text); |
| if (cp.length == 0) |
| return false; |
| if (isName) |
| { |
| if (!isNameStartCharacter(cp[0], input.xml11)) |
| return false; |
| } |
| else |
| { |
| if (!isNameCharacter(cp[0], input.xml11)) |
| return false; |
| } |
| for (int i = 1; i < cp.length; i++) |
| { |
| if (!isNameCharacter(cp[i], input.xml11)) |
| return false; |
| } |
| return true; |
| } |
| catch (IOException e) |
| { |
| return false; |
| } |
| } |
| |
| /** |
| * Indicates whether the specified Unicode character is a Name start |
| * character. |
| */ |
| public static boolean isNameStartCharacter(int c, boolean xml11) |
| { |
| if (xml11) |
| return ((c >= 0x0041 && c <= 0x005a) || |
| (c >= 0x0061 && c <= 0x007a) || |
| c == 0x3a | |
| c == 0x5f | |
| (c >= 0xC0 && c <= 0xD6) || |
| (c >= 0xD8 && c <= 0xF6) || |
| (c >= 0xF8 && c <= 0x2FF) || |
| (c >= 0x370 && c <= 0x37D) || |
| (c >= 0x37F && c <= 0x1FFF) || |
| (c >= 0x200C && c <= 0x200D) || |
| (c >= 0x2070 && c <= 0x218F) || |
| (c >= 0x2C00 && c <= 0x2FEF) || |
| (c >= 0x3001 && c <= 0xD7FF) || |
| (c >= 0xF900 && c <= 0xFDCF) || |
| (c >= 0xFDF0 && c <= 0xFFFD) || |
| (c >= 0x10000 && c <= 0xEFFFF)); |
| else |
| return (c == 0x5f || c == 0x3a || isLetter(c)); |
| } |
| |
| /** |
| * Indicates whether the specified Unicode character is a Name non-initial |
| * character. |
| */ |
| public static boolean isNameCharacter(int c, boolean xml11) |
| { |
| if (xml11) |
| return ((c >= 0x0041 && c <= 0x005a) || |
| (c >= 0x0061 && c <= 0x007a) || |
| (c >= 0x0030 && c <= 0x0039) || |
| c == 0x3a | |
| c == 0x5f | |
| c == 0x2d | |
| c == 0x2e | |
| c == 0xB7 | |
| (c >= 0xC0 && c <= 0xD6) || |
| (c >= 0xD8 && c <= 0xF6) || |
| (c >= 0xF8 && c <= 0x2FF) || |
| (c >= 0x300 && c <= 0x37D) || |
| (c >= 0x37F && c <= 0x1FFF) || |
| (c >= 0x200C && c <= 0x200D) || |
| (c >= 0x203F && c <= 0x2040) || |
| (c >= 0x2070 && c <= 0x218F) || |
| (c >= 0x2C00 && c <= 0x2FEF) || |
| (c >= 0x3001 && c <= 0xD7FF) || |
| (c >= 0xF900 && c <= 0xFDCF) || |
| (c >= 0xFDF0 && c <= 0xFFFD) || |
| (c >= 0x10000 && c <= 0xEFFFF)); |
| else |
| return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a || |
| isLetter(c) || isDigit(c) || |
| isCombiningChar(c) || isExtender(c)); |
| } |
| |
| /** |
| * Indicates whether the specified Unicode character matches the Letter |
| * production. |
| */ |
| public static boolean isLetter(int c) |
| { |
| if ((c >= 0x0041 && c <= 0x005A) || |
| (c >= 0x0061 && c <= 0x007A) || |
| (c >= 0x00C0 && c <= 0x00D6) || |
| (c >= 0x00D8 && c <= 0x00F6) || |
| (c >= 0x00F8 && c <= 0x00FF) || |
| (c >= 0x0100 && c <= 0x0131) || |
| (c >= 0x0134 && c <= 0x013E) || |
| (c >= 0x0141 && c <= 0x0148) || |
| (c >= 0x014A && c <= 0x017E) || |
| (c >= 0x0180 && c <= 0x01C3) || |
| (c >= 0x01CD && c <= 0x01F0) || |
| (c >= 0x01F4 && c <= 0x01F5) || |
| (c >= 0x01FA && c <= 0x0217) || |
| (c >= 0x0250 && c <= 0x02A8) || |
| (c >= 0x02BB && c <= 0x02C1) || |
| c == 0x0386 || |
| (c >= 0x0388 && c <= 0x038A) || |
| c == 0x038C || |
| (c >= 0x038E && c <= 0x03A1) || |
| (c >= 0x03A3 && c <= 0x03CE) || |
| (c >= 0x03D0 && c <= 0x03D6) || |
| c == 0x03DA || |
| c == 0x03DC || |
| c == 0x03DE || |
| c == 0x03E0 || |
| (c >= 0x03E2 && c <= 0x03F3) || |
| (c >= 0x0401 && c <= 0x040C) || |
| (c >= 0x040E && c <= 0x044F) || |
| (c >= 0x0451 && c <= 0x045C) || |
| (c >= 0x045E && c <= 0x0481) || |
| (c >= 0x0490 && c <= 0x04C4) || |
| (c >= 0x04C7 && c <= 0x04C8) || |
| (c >= 0x04CB && c <= 0x04CC) || |
| (c >= 0x04D0 && c <= 0x04EB) || |
| (c >= 0x04EE && c <= 0x04F5) || |
| (c >= 0x04F8 && c <= 0x04F9) || |
| (c >= 0x0531 && c <= 0x0556) || |
| c == 0x0559 || |
| (c >= 0x0561 && c <= 0x0586) || |
| (c >= 0x05D0 && c <= 0x05EA) || |
| (c >= 0x05F0 && c <= 0x05F2) || |
| (c >= 0x0621 && c <= 0x063A) || |
| (c >= 0x0641 && c <= 0x064A) || |
| (c >= 0x0671 && c <= 0x06B7) || |
| (c >= 0x06BA && c <= 0x06BE) || |
| (c >= 0x06C0 && c <= 0x06CE) || |
| (c >= 0x06D0 && c <= 0x06D3) || |
| c == 0x06D5 || |
| (c >= 0x06E5 && c <= 0x06E6) || |
| (c >= 0x0905 && c <= 0x0939) || |
| c == 0x093D || |
| (c >= 0x0958 && c <= 0x0961) || |
| (c >= 0x0985 && c <= 0x098C) || |
| (c >= 0x098F && c <= 0x0990) || |
| (c >= 0x0993 && c <= 0x09A8) || |
| (c >= 0x09AA && c <= 0x09B0) || |
| c == 0x09B2 || |
| (c >= 0x09B6 && c <= 0x09B9) || |
| (c >= 0x09DC && c <= 0x09DD) || |
| (c >= 0x09DF && c <= 0x09E1) || |
| (c >= 0x09F0 && c <= 0x09F1) || |
| (c >= 0x0A05 && c <= 0x0A0A) || |
| (c >= 0x0A0F && c <= 0x0A10) || |
| (c >= 0x0A13 && c <= 0x0A28) || |
| (c >= 0x0A2A && c <= 0x0A30) || |
| (c >= 0x0A32 && c <= 0x0A33) || |
| (c >= 0x0A35 && c <= 0x0A36) || |
| (c >= 0x0A38 && c <= 0x0A39) || |
| (c >= 0x0A59 && c <= 0x0A5C) || |
| c == 0x0A5E || |
| (c >= 0x0A72 && c <= 0x0A74) || |
| (c >= 0x0A85 && c <= 0x0A8B) || |
| c == 0x0A8D || |
| (c >= 0x0A8F && c <= 0x0A91) || |
| (c >= 0x0A93 && c <= 0x0AA8) || |
| (c >= 0x0AAA && c <= 0x0AB0) || |
| (c >= 0x0AB2 && c <= 0x0AB3) || |
| (c >= 0x0AB5 && c <= 0x0AB9) || |
| c == 0x0ABD || |
| c == 0x0AE0 || |
| (c >= 0x0B05 && c <= 0x0B0C) || |
| (c >= 0x0B0F && c <= 0x0B10) || |
| (c >= 0x0B13 && c <= 0x0B28) || |
| (c >= 0x0B2A && c <= 0x0B30) || |
| (c >= 0x0B32 && c <= 0x0B33) || |
| (c >= 0x0B36 && c <= 0x0B39) || |
| c == 0x0B3D || |
| (c >= 0x0B5C && c <= 0x0B5D) || |
| (c >= 0x0B5F && c <= 0x0B61) || |
| (c >= 0x0B85 && c <= 0x0B8A) || |
| (c >= 0x0B8E && c <= 0x0B90) || |
| (c >= 0x0B92 && c <= 0x0B95) || |
| (c >= 0x0B99 && c <= 0x0B9A) || |
| c == 0x0B9C || |
| (c >= 0x0B9E && c <= 0x0B9F) || |
| (c >= 0x0BA3 && c <= 0x0BA4) || |
| (c >= 0x0BA8 && c <= 0x0BAA) || |
| (c >= 0x0BAE && c <= 0x0BB5) || |
| (c >= 0x0BB7 && c <= 0x0BB9) || |
| (c >= 0x0C05 && c <= 0x0C0C) || |
| (c >= 0x0C0E && c <= 0x0C10) || |
| (c >= 0x0C12 && c <= 0x0C28) || |
| (c >= 0x0C2A && c <= 0x0C33) || |
| (c >= 0x0C35 && c <= 0x0C39) || |
| (c >= 0x0C60 && c <= 0x0C61) || |
| (c >= 0x0C85 && c <= 0x0C8C) || |
| (c >= 0x0C8E && c <= 0x0C90) || |
| (c >= 0x0C92 && c <= 0x0CA8) || |
| (c >= 0x0CAA && c <= 0x0CB3) || |
| (c >= 0x0CB5 && c <= 0x0CB9) || |
| c == 0x0CDE || |
| (c >= 0x0CE0 && c <= 0x0CE1) || |
| (c >= 0x0D05 && c <= 0x0D0C) || |
| (c >= 0x0D0E && c <= 0x0D10) || |
| (c >= 0x0D12 && c <= 0x0D28) || |
| (c >= 0x0D2A && c <= 0x0D39) || |
| (c >= 0x0D60 && c <= 0x0D61) || |
| (c >= 0x0E01 && c <= 0x0E2E) || |
| c == 0x0E30 || |
| (c >= 0x0E32 && c <= 0x0E33) || |
| (c >= 0x0E40 && c <= 0x0E45) || |
| (c >= 0x0E81 && c <= 0x0E82) || |
| c == 0x0E84 || |
| (c >= 0x0E87 && c <= 0x0E88) || |
| c == 0x0E8A || |
| c == 0x0E8D || |
| (c >= 0x0E94 && c <= 0x0E97) || |
| (c >= 0x0E99 && c <= 0x0E9F) || |
| (c >= 0x0EA1 && c <= 0x0EA3) || |
| c == 0x0EA5 || |
| c == 0x0EA7 || |
| (c >= 0x0EAA && c <= 0x0EAB) || |
| (c >= 0x0EAD && c <= 0x0EAE) || |
| c == 0x0EB0 || |
| (c >= 0x0EB2 && c <= 0x0EB3) || |
| c == 0x0EBD || |
| (c >= 0x0EC0 && c <= 0x0EC4) || |
| (c >= 0x0F40 && c <= 0x0F47) || |
| (c >= 0x0F49 && c <= 0x0F69) || |
| (c >= 0x10A0 && c <= 0x10C5) || |
| (c >= 0x10D0 && c <= 0x10F6) || |
| c == 0x1100 || |
| (c >= 0x1102 && c <= 0x1103) || |
| (c >= 0x1105 && c <= 0x1107) || |
| c == 0x1109 || |
| (c >= 0x110B && c <= 0x110C) || |
| (c >= 0x110E && c <= 0x1112) || |
| c == 0x113C || |
| c == 0x113E || |
| c == 0x1140 || |
| c == 0x114C || |
| c == 0x114E || |
| c == 0x1150 || |
| (c >= 0x1154 && c <= 0x1155) || |
| c == 0x1159 || |
| (c >= 0x115F && c <= 0x1161) || |
| c == 0x1163 || |
| c == 0x1165 || |
| c == 0x1167 || |
| c == 0x1169 || |
| (c >= 0x116D && c <= 0x116E) || |
| (c >= 0x1172 && c <= 0x1173) || |
| c == 0x1175 || |
| c == 0x119E || |
| c == 0x11A8 || |
| c == 0x11AB || |
| (c >= 0x11AE && c <= 0x11AF) || |
| (c >= 0x11B7 && c <= 0x11B8) || |
| c == 0x11BA || |
| (c >= 0x11BC && c <= 0x11C2) || |
| c == 0x11EB || |
| c == 0x11F0 || |
| c == 0x11F9 || |
| (c >= 0x1E00 && c <= 0x1E9B) || |
| (c >= 0x1EA0 && c <= 0x1EF9) || |
| (c >= 0x1F00 && c <= 0x1F15) || |
| (c >= 0x1F18 && c <= 0x1F1D) || |
| (c >= 0x1F20 && c <= 0x1F45) || |
| (c >= 0x1F48 && c <= 0x1F4D) || |
| (c >= 0x1F50 && c <= 0x1F57) || |
| c == 0x1F59 || |
| c == 0x1F5B || |
| c == 0x1F5D || |
| (c >= 0x1F5F && c <= 0x1F7D) || |
| (c >= 0x1F80 && c <= 0x1FB4) || |
| (c >= 0x1FB6 && c <= 0x1FBC) || |
| c == 0x1FBE || |
| (c >= 0x1FC2 && c <= 0x1FC4) || |
| (c >= 0x1FC6 && c <= 0x1FCC) || |
| (c >= 0x1FD0 && c <= 0x1FD3) || |
| (c >= 0x1FD6 && c <= 0x1FDB) || |
| (c >= 0x1FE0 && c <= 0x1FEC) || |
| (c >= 0x1FF2 && c <= 0x1FF4) || |
| (c >= 0x1FF6 && c <= 0x1FFC) || |
| c == 0x2126 || |
| (c >= 0x212A && c <= 0x212B) || |
| c == 0x212E || |
| (c >= 0x2180 && c <= 0x2182) || |
| (c >= 0x3041 && c <= 0x3094) || |
| (c >= 0x30A1 && c <= 0x30FA) || |
| (c >= 0x3105 && c <= 0x312C) || |
| (c >= 0xAC00 && c <= 0xD7A3)) |
| return true; // BaseChar |
| if ((c >= 0x4e00 && c <= 0x9fa5) || |
| c == 0x3007 || |
| (c >= 0x3021 && c <= 0x3029)) |
| return true; // Ideographic |
| return false; |
| } |
| |
| /** |
| * Indicates whether the specified Unicode character matches the Digit |
| * production. |
| */ |
| public static boolean isDigit(int c) |
| { |
| return ((c >= 0x0030 && c <= 0x0039) || |
| (c >= 0x0660 && c <= 0x0669) || |
| (c >= 0x06F0 && c <= 0x06F9) || |
| (c >= 0x0966 && c <= 0x096F) || |
| (c >= 0x09E6 && c <= 0x09EF) || |
| (c >= 0x0A66 && c <= 0x0A6F) || |
| (c >= 0x0AE6 && c <= 0x0AEF) || |
| (c >= 0x0B66 && c <= 0x0B6F) || |
| (c >= 0x0BE7 && c <= 0x0BEF) || |
| (c >= 0x0C66 && c <= 0x0C6F) || |
| (c >= 0x0CE6 && c <= 0x0CEF) || |
| (c >= 0x0D66 && c <= 0x0D6F) || |
| (c >= 0x0E50 && c <= 0x0E59) || |
| (c >= 0x0ED0 && c <= 0x0ED9) || |
| (c >= 0x0F20 && c <= 0x0F29)); |
| } |
| |
| /** |
| * Indicates whether the specified Unicode character matches the |
| * CombiningChar production. |
| */ |
| public static boolean isCombiningChar(int c) |
| { |
| return ((c >= 0x0300 && c <= 0x0345) || |
| (c >= 0x0360 && c <= 0x0361) || |
| (c >= 0x0483 && c <= 0x0486) || |
| (c >= 0x0591 && c <= 0x05A1) || |
| (c >= 0x05A3 && c <= 0x05B9) || |
| (c >= 0x05BB && c <= 0x05BD) || |
| c == 0x05BF || |
| (c >= 0x05C1 && c <= 0x05C2) || |
| c == 0x05C4 || |
| (c >= 0x064B && c <= 0x0652) || |
| c == 0x0670 || |
| (c >= 0x06D6 && c <= 0x06DC) || |
| (c >= 0x06DD && c <= 0x06DF) || |
| (c >= 0x06E0 && c <= 0x06E4) || |
| (c >= 0x06E7 && c <= 0x06E8) || |
| (c >= 0x06EA && c <= 0x06ED) || |
| (c >= 0x0901 && c <= 0x0903) || |
| c == 0x093C || |
| (c >= 0x093E && c <= 0x094C) || |
| c == 0x094D || |
| (c >= 0x0951 && c <= 0x0954) || |
| (c >= 0x0962 && c <= 0x0963) || |
| (c >= 0x0981 && c <= 0x0983) || |
| c == 0x09BC || |
| c == 0x09BE || |
| c == 0x09BF || |
| (c >= 0x09C0 && c <= 0x09C4) || |
| (c >= 0x09C7 && c <= 0x09C8) || |
| (c >= 0x09CB && c <= 0x09CD) || |
| c == 0x09D7 || |
| (c >= 0x09E2 && c <= 0x09E3) || |
| c == 0x0A02 || |
| c == 0x0A3C || |
| c == 0x0A3E || |
| c == 0x0A3F || |
| (c >= 0x0A40 && c <= 0x0A42) || |
| (c >= 0x0A47 && c <= 0x0A48) || |
| (c >= 0x0A4B && c <= 0x0A4D) || |
| (c >= 0x0A70 && c <= 0x0A71) || |
| (c >= 0x0A81 && c <= 0x0A83) || |
| c == 0x0ABC || |
| (c >= 0x0ABE && c <= 0x0AC5) || |
| (c >= 0x0AC7 && c <= 0x0AC9) || |
| (c >= 0x0ACB && c <= 0x0ACD) || |
| (c >= 0x0B01 && c <= 0x0B03) || |
| c == 0x0B3C || |
| (c >= 0x0B3E && c <= 0x0B43) || |
| (c >= 0x0B47 && c <= 0x0B48) || |
| (c >= 0x0B4B && c <= 0x0B4D) || |
| (c >= 0x0B56 && c <= 0x0B57) || |
| (c >= 0x0B82 && c <= 0x0B83) || |
| (c >= 0x0BBE && c <= 0x0BC2) || |
| (c >= 0x0BC6 && c <= 0x0BC8) || |
| (c >= 0x0BCA && c <= 0x0BCD) || |
| c == 0x0BD7 || |
| (c >= 0x0C01 && c <= 0x0C03) || |
| (c >= 0x0C3E && c <= 0x0C44) || |
| (c >= 0x0C46 && c <= 0x0C48) || |
| (c >= 0x0C4A && c <= 0x0C4D) || |
| (c >= 0x0C55 && c <= 0x0C56) || |
| (c >= 0x0C82 && c <= 0x0C83) || |
| (c >= 0x0CBE && c <= 0x0CC4) || |
| (c >= 0x0CC6 && c <= 0x0CC8) || |
| (c >= 0x0CCA && c <= 0x0CCD) || |
| (c >= 0x0CD5 && c <= 0x0CD6) || |
| (c >= 0x0D02 && c <= 0x0D03) || |
| (c >= 0x0D3E && c <= 0x0D43) || |
| (c >= 0x0D46 && c <= 0x0D48) || |
| (c >= 0x0D4A && c <= 0x0D4D) || |
| c == 0x0D57 || |
| c == 0x0E31 || |
| (c >= 0x0E34 && c <= 0x0E3A) || |
| (c >= 0x0E47 && c <= 0x0E4E) || |
| c == 0x0EB1 || |
| (c >= 0x0EB4 && c <= 0x0EB9) || |
| (c >= 0x0EBB && c <= 0x0EBC) || |
| (c >= 0x0EC8 && c <= 0x0ECD) || |
| (c >= 0x0F18 && c <= 0x0F19) || |
| c == 0x0F35 || |
| c == 0x0F37 || |
| c == 0x0F39 || |
| c == 0x0F3E || |
| c == 0x0F3F || |
| (c >= 0x0F71 && c <= 0x0F84) || |
| (c >= 0x0F86 && c <= 0x0F8B) || |
| (c >= 0x0F90 && c <= 0x0F95) || |
| c == 0x0F97 || |
| (c >= 0x0F99 && c <= 0x0FAD) || |
| (c >= 0x0FB1 && c <= 0x0FB7) || |
| c == 0x0FB9 || |
| (c >= 0x20D0 && c <= 0x20DC) || |
| c == 0x20E1 || |
| (c >= 0x302A && c <= 0x302F) || |
| c == 0x3099 || |
| c == 0x309A); |
| } |
| |
| /** |
| * Indicates whether the specified Unicode character matches the Extender |
| * production. |
| */ |
| public static boolean isExtender(int c) |
| { |
| return (c == 0x00B7 || |
| c == 0x02D0 || |
| c == 0x02D1 || |
| c == 0x0387 || |
| c == 0x0640 || |
| c == 0x0E46 || |
| c == 0x0EC6 || |
| c == 0x3005 || |
| (c >= 0x3031 && c <= 0x3035) || |
| (c >= 0x309D && c <= 0x309E) || |
| (c >= 0x30FC && c <= 0x30FE)); |
| } |
| |
| /** |
| * Indicates whether the specified Unicode character matches the Char |
| * production. |
| */ |
| public static boolean isChar(int c) |
| { |
| return (c >= 0x20 && c < 0xd800) || |
| (c >= 0xe00 && c < 0xfffe) || |
| (c >= 0x10000 && c < 0x110000) || |
| c == 0xa || c == 0x9 || c == 0xd; |
| } |
| |
| /** |
| * Interns the specified text or not, depending on the value of |
| * stringInterning. |
| */ |
| private String intern(String text) |
| { |
| return stringInterning ? text.intern() : text; |
| } |
| |
| /** |
| * Report a parsing error. |
| */ |
| private void error(String message) |
| throws XMLStreamException |
| { |
| error(message, null); |
| } |
| |
| /** |
| * Report a parsing error. |
| */ |
| private void error(String message, Object info) |
| throws XMLStreamException |
| { |
| if (info != null) |
| { |
| if (info instanceof String) |
| message += ": \"" + ((String) info) + "\""; |
| else if (info instanceof Character) |
| message += ": '" + ((Character) info) + "'"; |
| } |
| throw new XMLStreamException(message); |
| } |
| |
| /** |
| * Perform validation of a start-element event. |
| */ |
| private void validateStartElement(String elementName) |
| throws XMLStreamException |
| { |
| if (currentContentModel == null) |
| { |
| // root element |
| // VC: Root Element Type |
| if (!elementName.equals(doctype.rootName)) |
| error("root element name must match name in DTD"); |
| return; |
| } |
| // VC: Element Valid |
| switch (currentContentModel.type) |
| { |
| case ContentModel.EMPTY: |
| error("child element found in empty element", elementName); |
| break; |
| case ContentModel.ELEMENT: |
| LinkedList ctx = (LinkedList) validationStack.getLast(); |
| ctx.add(elementName); |
| break; |
| case ContentModel.MIXED: |
| MixedContentModel mm = (MixedContentModel) currentContentModel; |
| if (!mm.containsName(elementName)) |
| error("illegal element for content model", elementName); |
| break; |
| } |
| } |
| |
| /** |
| * Perform validation of an end-element event. |
| */ |
| private void validateEndElement() |
| throws XMLStreamException |
| { |
| if (currentContentModel == null) |
| { |
| // root element |
| // VC: IDREF |
| if (!idrefs.containsAll(ids)) |
| error("IDREF values must match the value of some ID attribute"); |
| return; |
| } |
| // VC: Element Valid |
| switch (currentContentModel.type) |
| { |
| case ContentModel.ELEMENT: |
| LinkedList ctx = (LinkedList) validationStack.getLast(); |
| ElementContentModel ecm = (ElementContentModel) currentContentModel; |
| validateElementContent(ecm, ctx); |
| break; |
| } |
| } |
| |
| /** |
| * Perform validation of character data. |
| */ |
| private void validatePCData(String text) |
| throws XMLStreamException |
| { |
| // VC: Element Valid |
| switch (currentContentModel.type) |
| { |
| case ContentModel.EMPTY: |
| error("character data found in empty element", text); |
| break; |
| case ContentModel.ELEMENT: |
| boolean white = true; |
| int len = text.length(); |
| for (int i = 0; i < len; i++) |
| { |
| char c = text.charAt(i); |
| if (c != ' ' && c != '\t' && c != '\n' && c != '\r') |
| { |
| white = false; |
| break; |
| } |
| } |
| if (!white) |
| error("character data found in element with element content", text); |
| else if (xmlStandalone == Boolean.TRUE && currentContentModel.external) |
| // VC: Standalone Document Declaration |
| error("whitespace in element content of externally declared " + |
| "element in standalone document"); |
| break; |
| } |
| } |
| |
| /** |
| * Validates the specified validation context (list of child elements) |
| * against the element content model for the current element. |
| */ |
| private void validateElementContent(ElementContentModel model, |
| LinkedList children) |
| throws XMLStreamException |
| { |
| // Use regular expression |
| StringBuffer buf = new StringBuffer(); |
| for (Iterator i = children.iterator(); i.hasNext(); ) |
| { |
| buf.append((String) i.next()); |
| buf.append(' '); |
| } |
| String c = buf.toString(); |
| String regex = createRegularExpression(model); |
| if (!c.matches(regex)) |
| error("element content "+model.text+" does not match expression "+regex, c); |
| } |
| |
| /** |
| * Creates the regular expression used to validate an element content |
| * model. |
| */ |
| private String createRegularExpression(ElementContentModel model) |
| { |
| if (model.regex == null) |
| { |
| StringBuffer buf = new StringBuffer(); |
| buf.append('('); |
| for (Iterator i = model.contentParticles.iterator(); i.hasNext(); ) |
| { |
| ContentParticle cp = (ContentParticle) i.next(); |
| if (cp.content instanceof String) |
| { |
| buf.append('('); |
| buf.append((String) cp.content); |
| buf.append(' '); |
| buf.append(')'); |
| if (cp.max == -1) |
| { |
| if (cp.min == 0) |
| buf.append('*'); |
| else |
| buf.append('+'); |
| } |
| else if (cp.min == 0) |
| buf.append('?'); |
| } |
| else |
| { |
| ElementContentModel ecm = (ElementContentModel) cp.content; |
| buf.append(createRegularExpression(ecm)); |
| } |
| if (model.or && i.hasNext()) |
| buf.append('|'); |
| } |
| buf.append(')'); |
| if (model.max == -1) |
| { |
| if (model.min == 0) |
| buf.append('*'); |
| else |
| buf.append('+'); |
| } |
| else if (model.min == 0) |
| buf.append('?'); |
| model.regex = buf.toString(); |
| } |
| return model.regex; |
| } |
| |
| /** |
| * Performs validation of a document type declaration event. |
| */ |
| void validateDoctype() |
| throws XMLStreamException |
| { |
| for (Iterator i = doctype.entityIterator(); i.hasNext(); ) |
| { |
| Map.Entry entry = (Map.Entry) i.next(); |
| Object entity = entry.getValue(); |
| if (entity instanceof ExternalIds) |
| { |
| ExternalIds ids = (ExternalIds) entity; |
| if (ids.notationName != null) |
| { |
| // VC: Notation Declared |
| ExternalIds notation = doctype.getNotation(ids.notationName); |
| if (notation == null) |
| error("Notation name must match the declared name of a " + |
| "notation", ids.notationName); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Simple test harness for reading an XML file. |
| * args[0] is the filename of the XML file |
| * If args[1] is "-x", enable XInclude processing |
| */ |
| public static void main(String[] args) |
| throws Exception |
| { |
| boolean validating = false; |
| boolean namespaceAware = false; |
| boolean xIncludeAware = false; |
| int pos = 0; |
| while (pos < args.length && args[pos].startsWith("-")) |
| { |
| if ("-x".equals(args[pos])) |
| xIncludeAware = true; |
| else if ("-v".equals(args[pos])) |
| validating = true; |
| else if ("-n".equals(args[pos])) |
| namespaceAware = true; |
| pos++; |
| } |
| if (pos >= args.length) |
| { |
| System.out.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]"); |
| System.out.println("\t-n: use namespace aware mode"); |
| System.out.println("\t-v: use validating parser"); |
| System.out.println("\t-x: use XInclude aware mode"); |
| System.exit(2); |
| } |
| while (pos < args.length) |
| { |
| XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]), |
| absolutize(null, args[pos]), |
| validating, // validating |
| namespaceAware, // namespaceAware |
| true, // coalescing, |
| true, // replaceERefs |
| true, // externalEntities |
| true, // supportDTD |
| true, // baseAware |
| true, // stringInterning |
| true, // extendedEventTypes |
| null, |
| null); |
| XMLStreamReader reader = p; |
| if (xIncludeAware) |
| reader = new XIncludeFilter(p, args[pos], true, true, true); |
| try |
| { |
| int event; |
| //do |
| while (reader.hasNext()) |
| { |
| event = reader.next(); |
| Location loc = reader.getLocation(); |
| System.out.print(loc.getLineNumber() + ":" + |
| loc.getColumnNumber() + " "); |
| switch (event) |
| { |
| case XMLStreamConstants.START_DOCUMENT: |
| System.out.println("START_DOCUMENT version=" + |
| reader.getVersion() + |
| " encoding=" + |
| reader.getEncoding()); |
| break; |
| case XMLStreamConstants.END_DOCUMENT: |
| System.out.println("END_DOCUMENT"); |
| break; |
| case XMLStreamConstants.START_ELEMENT: |
| System.out.println("START_ELEMENT " + |
| reader.getName()); |
| int l = reader.getNamespaceCount(); |
| for (int i = 0; i < l; i++) |
| System.out.println("\tnamespace " + |
| reader.getNamespacePrefix(i) + "='" + |
| reader.getNamespaceURI(i)+"'"); |
| l = reader.getAttributeCount(); |
| for (int i = 0; i < l; i++) |
| System.out.println("\tattribute " + |
| reader.getAttributeName(i) + "='" + |
| reader.getAttributeValue(i) + "'"); |
| break; |
| case XMLStreamConstants.END_ELEMENT: |
| System.out.println("END_ELEMENT " + reader.getName()); |
| break; |
| case XMLStreamConstants.CHARACTERS: |
| System.out.println("CHARACTERS '" + |
| encodeText(reader.getText()) + "'"); |
| break; |
| case XMLStreamConstants.CDATA: |
| System.out.println("CDATA '" + |
| encodeText(reader.getText()) + "'"); |
| break; |
| case XMLStreamConstants.SPACE: |
| System.out.println("SPACE '" + |
| encodeText(reader.getText()) + "'"); |
| break; |
| case XMLStreamConstants.DTD: |
| System.out.println("DTD " + reader.getText()); |
| break; |
| case XMLStreamConstants.ENTITY_REFERENCE: |
| System.out.println("ENTITY_REFERENCE " + reader.getText()); |
| break; |
| case XMLStreamConstants.COMMENT: |
| System.out.println("COMMENT '" + |
| encodeText(reader.getText()) + "'"); |
| break; |
| case XMLStreamConstants.PROCESSING_INSTRUCTION: |
| System.out.println("PROCESSING_INSTRUCTION " + |
| reader.getPITarget() + " " + |
| reader.getPIData()); |
| break; |
| case START_ENTITY: |
| System.out.println("START_ENTITY " + reader.getText()); |
| break; |
| case END_ENTITY: |
| System.out.println("END_ENTITY " + reader.getText()); |
| break; |
| default: |
| System.out.println("Unknown event: " + event); |
| } |
| } |
| } |
| catch (XMLStreamException e) |
| { |
| Location l = reader.getLocation(); |
| System.out.println("At line "+l.getLineNumber()+ |
| ", column "+l.getColumnNumber()+ |
| " of "+l.getSystemId()); |
| throw e; |
| } |
| pos++; |
| } |
| } |
| |
| /** |
| * Escapes control characters in the specified text. For debugging. |
| */ |
| private static String encodeText(String text) |
| { |
| StringBuffer b = new StringBuffer(); |
| int len = text.length(); |
| for (int i = 0; i < len; i++) |
| { |
| char c = text.charAt(i); |
| switch (c) |
| { |
| case '\t': |
| b.append("\\t"); |
| break; |
| case '\n': |
| b.append("\\n"); |
| break; |
| case '\r': |
| b.append("\\r"); |
| break; |
| default: |
| b.append(c); |
| } |
| } |
| return b.toString(); |
| } |
| |
| /** |
| * An attribute instance. |
| */ |
| class Attribute |
| { |
| |
| /** |
| * Attribute name. |
| */ |
| final String name; |
| |
| /** |
| * Attribute type as declared in the DTD, or CDATA otherwise. |
| */ |
| final String type; |
| |
| /** |
| * Whether the attribute was specified or defaulted. |
| */ |
| final boolean specified; |
| |
| /** |
| * The attribute value. |
| */ |
| final String value; |
| |
| /** |
| * The namespace prefix. |
| */ |
| final String prefix; |
| |
| /** |
| * The namespace local-name. |
| */ |
| final String localName; |
| |
| Attribute(String name, String type, boolean specified, String value) |
| { |
| this.name = name; |
| this.type = type; |
| this.specified = specified; |
| this.value = value; |
| int ci = name.indexOf(':'); |
| if (ci == -1) |
| { |
| prefix = null; |
| localName = intern(name); |
| } |
| else |
| { |
| prefix = intern(name.substring(0, ci)); |
| localName = intern(name.substring(ci + 1)); |
| } |
| } |
| |
| public boolean equals(Object other) |
| { |
| if (other instanceof Attribute) |
| { |
| Attribute a = (Attribute) other; |
| if (namespaceAware) |
| { |
| if (!a.localName.equals(localName)) |
| return false; |
| String auri = getNamespaceURI(a.prefix); |
| String uri = getNamespaceURI(prefix); |
| if (uri == null && (auri == null || |
| (input.xml11 && "".equals(auri)))) |
| return true; |
| if (uri != null) |
| { |
| if ("".equals(uri) && input.xml11 && "".equals(auri)) |
| return true; |
| return uri.equals(auri); |
| } |
| return false; |
| } |
| else |
| return a.name.equals(name); |
| } |
| return false; |
| } |
| |
| } |
| |
| /** |
| * Representation of a DTD. |
| */ |
| class Doctype |
| { |
| |
| /** |
| * Name of the root element. |
| */ |
| final String rootName; |
| |
| /** |
| * Public ID, if any, of external subset. |
| */ |
| final String publicId; |
| |
| /** |
| * System ID (URL), if any, of external subset. |
| */ |
| final String systemId; |
| |
| /** |
| * Map of element names to content models. |
| */ |
| private final LinkedHashMap elements = new LinkedHashMap(); |
| |
| /** |
| * Map of element names to maps of attribute declarations. |
| */ |
| private final LinkedHashMap attlists = new LinkedHashMap(); |
| |
| /** |
| * Map of entity names to entities (String or ExternalIds). |
| */ |
| private final LinkedHashMap entities = new LinkedHashMap(); |
| |
| /** |
| * Map of notation names to ExternalIds. |
| */ |
| private final LinkedHashMap notations = new LinkedHashMap(); |
| |
| /** |
| * Map of anonymous keys to comments. |
| */ |
| private final LinkedHashMap comments = new LinkedHashMap(); |
| |
| /** |
| * Map of anonymous keys to processing instructions (String[2] |
| * containing {target, data}). |
| */ |
| private final LinkedHashMap pis = new LinkedHashMap(); |
| |
| /** |
| * List of keys to all markup entries in the DTD. |
| */ |
| private final LinkedList entries = new LinkedList(); |
| |
| /** |
| * Set of the entities defined in the external subset. |
| */ |
| private final HashSet externalEntities = new HashSet(); |
| |
| /** |
| * Set of the notations defined in the external subset. |
| */ |
| private final HashSet externalNotations = new HashSet(); |
| |
| /** |
| * Counter for making anonymous keys. |
| */ |
| private int anon = 1; |
| |
| /** |
| * Constructor. |
| */ |
| Doctype(String rootName, String publicId, String systemId) |
| { |
| this.rootName = rootName; |
| this.publicId = publicId; |
| this.systemId = systemId; |
| } |
| |
| /** |
| * Adds an element declaration. |
| * @param name the element name |
| * @param text the content model text |
| * @param model the parsed content model |
| */ |
| void addElementDecl(String name, String text, ContentModel model) |
| { |
| if (elements.containsKey(name)) |
| return; |
| model.text = text; |
| model.external = (inputStack.size() != 1); |
| elements.put(name, model); |
| entries.add("E" + name); |
| } |
| |
| /** |
| * Adds an attribute declaration. |
| * @param ename the element name |
| * @param aname the attribute name |
| * @param decl the attribute declaration details |
| */ |
| void addAttributeDecl(String ename, String aname, AttributeDecl decl) |
| { |
| LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); |
| if (attlist == null) |
| { |
| attlist = new LinkedHashMap(); |
| attlists.put(ename, attlist); |
| } |
| else if (attlist.containsKey(aname)) |
| return; |
| attlist.put(aname, decl); |
| String key = "A" + ename; |
| if (!entries.contains(key)) |
| entries.add(key); |
| } |
| |
| /** |
| * Adds an entity declaration. |
| * @param name the entity name |
| * @param text the entity replacement text |
| * @param inExternalSubset if we are in the exernal subset |
| */ |
| void addEntityDecl(String name, String text, boolean inExternalSubset) |
| { |
| if (entities.containsKey(name)) |
| return; |
| entities.put(name, text); |
| entries.add("e" + name); |
| if (inExternalSubset) |
| externalEntities.add(name); |
| } |
| |
| /** |
| * Adds an entity declaration. |
| * @param name the entity name |
| * @param ids the external IDs |
| * @param inExternalSubset if we are in the exernal subset |
| */ |
| void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset) |
| { |
| if (entities.containsKey(name)) |
| return; |
| entities.put(name, ids); |
| entries.add("e" + name); |
| if (inExternalSubset) |
| externalEntities.add(name); |
| } |
| |
| /** |
| * Adds a notation declaration. |
| * @param name the notation name |
| * @param ids the external IDs |
| * @param inExternalSubset if we are in the exernal subset |
| */ |
| void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset) |
| { |
| if (notations.containsKey(name)) |
| return; |
| notations.put(name, ids); |
| entries.add("n" + name); |
| if (inExternalSubset) |
| externalNotations.add(name); |
| } |
| |
| /** |
| * Adds a comment. |
| */ |
| void addComment(String text) |
| { |
| String key = Integer.toString(anon++); |
| comments.put(key, text); |
| entries.add("c" + key); |
| } |
| |
| /** |
| * Adds a processing instruction. |
| */ |
| void addPI(String target, String data) |
| { |
| String key = Integer.toString(anon++); |
| pis.put(key, new String[] {target, data}); |
| entries.add("p" + key); |
| } |
| |
| /** |
| * Returns the content model for the specified element. |
| * @param name the element name |
| */ |
| ContentModel getElementModel(String name) |
| { |
| return (ContentModel) elements.get(name); |
| } |
| |
| /** |
| * Returns the attribute definition for the given attribute |
| * @param ename the element name |
| * @param aname the attribute name |
| */ |
| AttributeDecl getAttributeDecl(String ename, String aname) |
| { |
| LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); |
| return (attlist == null) ? null : (AttributeDecl) attlist.get(aname); |
| } |
| |
| /** |
| * Indicates whether the specified attribute was declared in the DTD. |
| * @param ename the element name |
| * @param aname the attribute name |
| */ |
| boolean isAttributeDeclared(String ename, String aname) |
| { |
| LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); |
| return (attlist == null) ? false : attlist.containsKey(aname); |
| } |
| |
| /** |
| * Returns an iterator over the entries in the attribute list for the |
| * given element. |
| * @param ename the element name |
| */ |
| Iterator attlistIterator(String ename) |
| { |
| LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); |
| return (attlist == null) ? Collections.EMPTY_LIST.iterator() : |
| attlist.entrySet().iterator(); |
| } |
| |
| /** |
| * Returns the entity (String or ExternalIds) for the given entity name. |
| */ |
| Object getEntity(String name) |
| { |
| return entities.get(name); |
| } |
| |
| /** |
| * Indicates whether the specified entity was declared in the external |
| * subset. |
| */ |
| boolean isEntityExternal(String name) |
| { |
| return externalEntities.contains(name); |
| } |
| |
| /** |
| * Returns an iterator over the entity map entries. |
| */ |
| Iterator entityIterator() |
| { |
| return entities.entrySet().iterator(); |
| } |
| |
| /** |
| * Returns the notation IDs for the given notation name. |
| */ |
| ExternalIds getNotation(String name) |
| { |
| return (ExternalIds) notations.get(name); |
| } |
| |
| /** |
| * Indicates whether the specified notation was declared in the external |
| * subset. |
| */ |
| boolean isNotationExternal(String name) |
| { |
| return externalNotations.contains(name); |
| } |
| |
| /** |
| * Returns the comment associated with the specified (anonymous) key. |
| */ |
| String getComment(String key) |
| { |
| return (String) comments.get(key); |
| } |
| |
| /** |
| * Returns the processing instruction associated with the specified |
| * (anonymous) key. |
| */ |
| String[] getPI(String key) |
| { |
| return (String[]) pis.get(key); |
| } |
| |
| /** |
| * Returns an iterator over the keys of the markup entries in this DTD, |
| * in the order declared. |
| */ |
| Iterator entryIterator() |
| { |
| return entries.iterator(); |
| } |
| |
| } |
| |
| /** |
| * Combination of an ExternalID and an optional NDataDecl. |
| */ |
| class ExternalIds |
| { |
| |
| /** |
| * The public ID. |
| */ |
| String publicId; |
| |
| /** |
| * The system ID. |
| */ |
| String systemId; |
| |
| /** |
| * The notation name declared with the NDATA keyword. |
| */ |
| String notationName; |
| } |
| |
| /** |
| * A content model. |
| */ |
| abstract class ContentModel |
| { |
| static final int EMPTY = 0; |
| static final int ANY = 1; |
| static final int ELEMENT = 2; |
| static final int MIXED = 3; |
| |
| int min; |
| int max; |
| final int type; |
| String text; |
| boolean external; |
| |
| ContentModel(int type) |
| { |
| this.type = type; |
| min = 1; |
| max = 1; |
| } |
| |
| } |
| |
| /** |
| * The EMPTY content model. |
| */ |
| class EmptyContentModel |
| extends ContentModel |
| { |
| |
| EmptyContentModel() |
| { |
| super(ContentModel.EMPTY); |
| min = 0; |
| max = 0; |
| } |
| |
| } |
| |
| /** |
| * The ANY content model. |
| */ |
| class AnyContentModel |
| extends ContentModel |
| { |
| |
| AnyContentModel() |
| { |
| super(ContentModel.ANY); |
| min = 0; |
| max = -1; |
| } |
| |
| } |
| |
| /** |
| * An element content model. |
| */ |
| class ElementContentModel |
| extends ContentModel |
| { |
| |
| LinkedList contentParticles; |
| boolean or; |
| String regex; // regular expression cache |
| |
| ElementContentModel() |
| { |
| super(ContentModel.ELEMENT); |
| contentParticles = new LinkedList(); |
| } |
| |
| void addContentParticle(ContentParticle cp) |
| { |
| contentParticles.add(cp); |
| } |
| |
| } |
| |
| class ContentParticle |
| { |
| |
| int min = 1; |
| int max = 1; |
| Object content; // Name (String) or ElementContentModel |
| |
| } |
| |
| /** |
| * A mixed content model. |
| */ |
| class MixedContentModel |
| extends ContentModel |
| { |
| |
| private HashSet names; |
| |
| MixedContentModel() |
| { |
| super(ContentModel.MIXED); |
| names = new HashSet(); |
| } |
| |
| void addName(String name) |
| { |
| names.add(name); |
| } |
| |
| boolean containsName(String name) |
| { |
| return names.contains(name); |
| } |
| |
| } |
| |
| /** |
| * An attribute definition. |
| */ |
| class AttributeDecl |
| { |
| |
| /** |
| * The attribute type (CDATA, ID, etc). |
| */ |
| final String type; |
| |
| /** |
| * The default value. |
| */ |
| final String value; |
| |
| /** |
| * The value type (#FIXED, #IMPLIED, etc). |
| */ |
| final int valueType; |
| |
| /** |
| * The enumeration text. |
| */ |
| final String enumeration; |
| |
| /** |
| * The enumeration tokens. |
| */ |
| final HashSet values; |
| |
| /** |
| * Whether this attribute declaration occurred in the external subset. |
| */ |
| final boolean external; |
| |
| AttributeDecl(String type, String value, |
| int valueType, String enumeration, |
| HashSet values, boolean external) |
| { |
| this.type = type; |
| this.value = value; |
| this.valueType = valueType; |
| this.enumeration = enumeration; |
| this.values = values; |
| this.external = external; |
| } |
| |
| } |
| |
| /** |
| * An XML input source. |
| */ |
| static class Input |
| implements Location |
| { |
| |
| int line = 1, markLine; |
| int column, markColumn; |
| int offset, markOffset; |
| final String publicId, systemId, name; |
| final boolean report; // report start- and end-entity |
| final boolean normalize; // normalize CR, etc to LF |
| |
| InputStream in; |
| Reader reader; |
| UnicodeReader unicodeReader; |
| boolean initialized; |
| boolean encodingDetected; |
| String inputEncoding; |
| boolean xml11; |
| |
| Input(InputStream in, Reader reader, String publicId, String systemId, |
| String name, String inputEncoding, boolean report, |
| boolean normalize) |
| { |
| if (inputEncoding == null) |
| inputEncoding = "UTF-8"; |
| this.inputEncoding = inputEncoding; |
| this.publicId = publicId; |
| this.systemId = systemId; |
| this.name = name; |
| this.report = report; |
| this.normalize = normalize; |
| if (in != null) |
| { |
| if (reader != null) |
| throw new IllegalStateException("both byte and char streams "+ |
| "specified"); |
| if (normalize) |
| in = new CRLFInputStream(in); |
| in = new BufferedInputStream(in); |
| this.in = in; |
| } |
| else |
| { |
| this.reader = normalize ? new CRLFReader(reader) : reader; |
| unicodeReader = new UnicodeReader(this.reader); |
| } |
| initialized = false; |
| } |
| |
| // -- Location -- |
| |
| public int getCharacterOffset() |
| { |
| return offset; |
| } |
| |
| public int getColumnNumber() |
| { |
| return column; |
| } |
| |
| public int getLineNumber() |
| { |
| return line; |
| } |
| |
| public String getPublicId() |
| { |
| return publicId; |
| } |
| |
| public String getSystemId() |
| { |
| return systemId; |
| } |
| |
| void init() |
| throws IOException |
| { |
| if (initialized) |
| return; |
| if (in != null) |
| detectEncoding(); |
| initialized = true; |
| } |
| |
| void mark(int len) |
| throws IOException |
| { |
| markOffset = offset; |
| markLine = line; |
| markColumn = column; |
| if (unicodeReader != null) |
| unicodeReader.mark(len); |
| else |
| in.mark(len); |
| } |
| |
| /** |
| * Character read. |
| */ |
| int read() |
| throws IOException |
| { |
| offset++; |
| int ret = (unicodeReader != null) ? unicodeReader.read() : in.read(); |
| if (normalize && |
| (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028)))) |
| { |
| // Normalize CR etc to LF |
| ret = 0x0a; |
| } |
| // Locator handling |
| if (ret == 0x0a) |
| { |
| line++; |
| column = 0; |
| } |
| else |
| column++; |
| return ret; |
| } |
| |
| /** |
| * Block read. |
| */ |
| int read(int[] b, int off, int len) |
| throws IOException |
| { |
| int ret; |
| if (unicodeReader != null) |
| { |
| ret = unicodeReader.read(b, off, len); |
| } |
| else |
| { |
| byte[] b2 = new byte[len]; |
| ret = in.read(b2, 0, len); |
| if (ret != -1) |
| { |
| String s = new String(b2, 0, ret, inputEncoding); |
| int[] c = UnicodeReader.toCodePointArray(s); |
| ret = c.length; |
| System.arraycopy(c, 0, b, off, ret); |
| } |
| } |
| if (ret != -1) |
| { |
| // Locator handling |
| for (int i = 0; i < ret; i++) |
| { |
| int c = b[off + i]; |
| if (normalize && |
| (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028)))) |
| { |
| // Normalize CR etc to LF |
| c = 0x0a; |
| b[off + i] = c; |
| } |
| if (c == 0x0a) |
| { |
| line++; |
| column = 0; |
| } |
| else |
| column++; |
| } |
| } |
| return ret; |
| } |
| |
| void reset() |
| throws IOException |
| { |
| if (unicodeReader != null) |
| unicodeReader.reset(); |
| else |
| in.reset(); |
| offset = markOffset; |
| line = markLine; |
| column = markColumn; |
| } |
| |
| // Detection of input encoding |
| |
| private static final int[] SIGNATURE_UCS_4_1234 = |
| new int[] { 0x00, 0x00, 0x00, 0x3c }; |
| private static final int[] SIGNATURE_UCS_4_4321 = |
| new int[] { 0x3c, 0x00, 0x00, 0x00 }; |
| private static final int[] SIGNATURE_UCS_4_2143 = |
| new int[] { 0x00, 0x00, 0x3c, 0x00 }; |
| private static final int[] SIGNATURE_UCS_4_3412 = |
| new int[] { 0x00, 0x3c, 0x00, 0x00 }; |
| private static final int[] SIGNATURE_UCS_2_12 = |
| new int[] { 0xfe, 0xff }; |
| private static final int[] SIGNATURE_UCS_2_21 = |
| new int[] { 0xff, 0xfe }; |
| private static final int[] SIGNATURE_UCS_2_12_NOBOM = |
| new int[] { 0x00, 0x3c, 0x00, 0x3f }; |
| private static final int[] SIGNATURE_UCS_2_21_NOBOM = |
| new int[] { 0x3c, 0x00, 0x3f, 0x00 }; |
| private static final int[] SIGNATURE_UTF_8 = |
| new int[] { 0x3c, 0x3f, 0x78, 0x6d }; |
| private static final int[] SIGNATURE_UTF_8_BOM = |
| new int[] { 0xef, 0xbb, 0xbf }; |
| |
| /** |
| * Detect the input encoding. |
| */ |
| private void detectEncoding() |
| throws IOException |
| { |
| int[] signature = new int[4]; |
| in.mark(4); |
| for (int i = 0; i < 4; i++) |
| signature[i] = in.read(); |
| in.reset(); |
| |
| // 4-byte encodings |
| if (equals(SIGNATURE_UCS_4_1234, signature)) |
| { |
| in.read(); |
| in.read(); |
| in.read(); |
| in.read(); |
| setInputEncoding("UTF-32BE"); |
| encodingDetected = true; |
| } |
| else if (equals(SIGNATURE_UCS_4_4321, signature)) |
| { |
| in.read(); |
| in.read(); |
| in.read(); |
| in.read(); |
| setInputEncoding("UTF-32LE"); |
| encodingDetected = true; |
| } |
| else if (equals(SIGNATURE_UCS_4_2143, signature) || |
| equals(SIGNATURE_UCS_4_3412, signature)) |
| throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering"); |
| |
| // 2-byte encodings |
| else if (equals(SIGNATURE_UCS_2_12, signature)) |
| { |
| in.read(); |
| in.read(); |
| setInputEncoding("UTF-16BE"); |
| encodingDetected = true; |
| } |
| else if (equals(SIGNATURE_UCS_2_21, signature)) |
| { |
| in.read(); |
| in.read(); |
| setInputEncoding("UTF-16LE"); |
| encodingDetected = true; |
| } |
| else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature)) |
| { |
| //setInputEncoding("UTF-16BE"); |
| throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity"); |
| } |
| else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature)) |
| { |
| //setInputEncoding("UTF-16LE"); |
| throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity"); |
| } |
| // ASCII-derived encodings |
| else if (equals(SIGNATURE_UTF_8, signature)) |
| { |
| // UTF-8 input encoding implied, TextDecl |
| } |
| else if (equals(SIGNATURE_UTF_8_BOM, signature)) |
| { |
| in.read(); |
| in.read(); |
| in.read(); |
| setInputEncoding("UTF-8"); |
| encodingDetected = true; |
| } |
| } |
| |
| private static boolean equals(int[] b1, int[] b2) |
| { |
| for (int i = 0; i < b1.length; i++) |
| { |
| if (b1[i] != b2[i]) |
| return false; |
| } |
| return true; |
| } |
| |
| void setInputEncoding(String encoding) |
| throws IOException |
| { |
| if (encoding.equals(inputEncoding)) |
| return; |
| if ("UTF-16".equalsIgnoreCase(encoding) && |
| inputEncoding.startsWith("UTF-16")) |
| return; |
| if (encodingDetected) |
| throw new UnsupportedEncodingException("document is not in its " + |
| "declared encoding " + |
| inputEncoding + |
| ": " + encoding); |
| inputEncoding = encoding; |
| finalizeEncoding(); |
| } |
| |
| void finalizeEncoding() |
| throws IOException |
| { |
| if (reader != null) |
| return; |
| reader = new BufferedReader(new InputStreamReader(in, inputEncoding)); |
| unicodeReader = new UnicodeReader(reader); |
| mark(1); |
| } |
| |
| } |
| |
| } |
| |