blob: 2a6833737bbf42de97a88a61a1f38fda9549522c [file] [log] [blame]
/* FullSyntaxBuilder.java --
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.xml.validation.relaxng;
import java.io.InputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.xml.XMLConstants;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.relaxng.datatype.DatatypeException;
import org.relaxng.datatype.DatatypeLibrary;
import org.relaxng.datatype.helpers.DatatypeLibraryLoader;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import gnu.xml.stream.XMLParser;
/**
* Parses a RELAX NG XML DOM tree, constructing a compiled internal
* representation.
*
* @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
*/
class FullSyntaxBuilder
{
/**
* Complete vocabulary (elements and attributes) of the full syntax.
*/
static final Map VOCABULARY = new HashMap();
static final Set STRIPPED_ATTRIBUTES = new HashSet();
static final Set PATTERN_ELEMENTS = new HashSet();
static
{
Set elementAttrs = Collections.singleton("name");
Set dataAttrs = new HashSet();
dataAttrs.add("type");
dataAttrs.add("datatypeLibrary");
Set valueAttrs = new HashSet();
valueAttrs.add("type");
valueAttrs.add("datatypeLibrary");
valueAttrs.add("ns");
Set externalAttrs = Collections.singleton("href");
Set startAttrs = Collections.singleton("combine");
Set defineAttrs = new HashSet();
defineAttrs.add("name");
defineAttrs.add("combine");
Set nsAttrs = Collections.singleton("ns");
VOCABULARY.put("element", elementAttrs);
VOCABULARY.put("attribute", elementAttrs);
VOCABULARY.put("group", Collections.EMPTY_SET);
VOCABULARY.put("interleave", Collections.EMPTY_SET);
VOCABULARY.put("choice", Collections.EMPTY_SET);
VOCABULARY.put("optional", Collections.EMPTY_SET);
VOCABULARY.put("zeroOrMore", Collections.EMPTY_SET);
VOCABULARY.put("oneOrMore", Collections.EMPTY_SET);
VOCABULARY.put("list", Collections.EMPTY_SET);
VOCABULARY.put("mixed", Collections.EMPTY_SET);
VOCABULARY.put("ref", elementAttrs);
VOCABULARY.put("parentRef", elementAttrs);
VOCABULARY.put("empty", Collections.EMPTY_SET);
VOCABULARY.put("text", Collections.EMPTY_SET);
VOCABULARY.put("value", valueAttrs);
VOCABULARY.put("data", dataAttrs);
VOCABULARY.put("notAllowed", Collections.EMPTY_SET);
VOCABULARY.put("externalRef", externalAttrs);
VOCABULARY.put("grammar", Collections.EMPTY_SET);
VOCABULARY.put("param", elementAttrs);
VOCABULARY.put("except", Collections.EMPTY_SET);
VOCABULARY.put("div", Collections.EMPTY_SET);
VOCABULARY.put("include", externalAttrs);
VOCABULARY.put("start", startAttrs);
VOCABULARY.put("define", defineAttrs);
VOCABULARY.put("name", nsAttrs);
VOCABULARY.put("anyName", Collections.EMPTY_SET);
VOCABULARY.put("nsName", nsAttrs);
STRIPPED_ATTRIBUTES.add("name");
STRIPPED_ATTRIBUTES.add("type");
STRIPPED_ATTRIBUTES.add("combine");
PATTERN_ELEMENTS.add("element");
PATTERN_ELEMENTS.add("attribute");
PATTERN_ELEMENTS.add("group");
PATTERN_ELEMENTS.add("interleave");
PATTERN_ELEMENTS.add("choice");
PATTERN_ELEMENTS.add("optional");
PATTERN_ELEMENTS.add("zeroOrMore");
PATTERN_ELEMENTS.add("oneOrMore");
PATTERN_ELEMENTS.add("list");
PATTERN_ELEMENTS.add("mixed");
PATTERN_ELEMENTS.add("ref");
PATTERN_ELEMENTS.add("parentRef");
PATTERN_ELEMENTS.add("empty");
PATTERN_ELEMENTS.add("text");
PATTERN_ELEMENTS.add("value");
PATTERN_ELEMENTS.add("data");
PATTERN_ELEMENTS.add("notAllowed");
PATTERN_ELEMENTS.add("externalRef");
PATTERN_ELEMENTS.add("grammar");
}
private Set urls; // recursion checking
private int refCount; // creation of ref names
private Map datatypeLibraries;
/**
* Parse the specified document into a grammar.
*/
synchronized Grammar parse(Document doc)
throws IOException
{
urls = new HashSet();
refCount = 1;
doc.normalizeDocument(); // Normalize XML document
transform(doc); // Apply transformation rules to provide simple syntax
// 4.18. grammar element
Element p = doc.getDocumentElement();
Element grammar =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI, "grammar");
Element start =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI, "start");
doc.removeChild(p);
doc.appendChild(grammar);
grammar.appendChild(start);
start.appendChild(p);
transformGrammar(grammar, p);
Element define = getNextSiblingElement(start);
while (define != null)
{
Element next = getNextSiblingElement(define);
String name = define.getAttribute("new-name");
if (name != null)
{
define.setAttribute("name", name);
define.removeAttribute("new-name");
}
else
grammar.removeChild(define); // unreferenced
define = next;
}
// 4.19. define and ref elements
Set allDefines = new HashSet(), reachableDefines = new HashSet();
getDefines(allDefines, grammar, grammar, false);
getDefines(reachableDefines, grammar, start, true);
allDefines.removeAll(reachableDefines);
for (Iterator i = allDefines.iterator(); i.hasNext(); )
{
// remove unreachable defines
Element d = (Element) i.next();
Node parent = d.getParentNode();
parent.removeChild(d);
}
// replace all elements that are not children of defines by refs to new
// defines
Set elements = new HashSet();
getElements(elements, grammar, grammar);
for (Iterator i = elements.iterator(); i.hasNext(); )
{
Element element = (Element) i.next();
Node parent = element.getParentNode();
if (!reachableDefines.contains(parent))
{
define =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI, "define");
Element ref =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI, "ref");
String name = createRefName();
define.setAttribute("name", name);
ref.setAttribute("name", name);
parent.insertBefore(ref, element);
define.appendChild(element);
grammar.appendChild(define);
reachableDefines.add(define);
}
}
// Get defines that don't have element children
for (Iterator i = reachableDefines.iterator(); i.hasNext(); )
{
Element d = (Element) i.next();
Element child = getFirstChildElement(d);
if (child != null && "element".equals(child.getLocalName()))
i.remove();
}
// Expand refs that refer to these defines
expandRefs(reachableDefines, grammar);
// Remove any defines that don't have element children
for (Iterator i = reachableDefines.iterator(); i.hasNext(); )
{
Element d = (Element) i.next();
Node parent = d.getParentNode();
parent.removeChild(d);
}
transform2(p); // Apply second stage transformation rules
Grammar ret = parseGrammar(grammar);
datatypeLibraries = null; // free datatype libraries cache
return ret;
}
private void getDefines(Set defines, Element grammar, Element node,
boolean followRefs)
{
String elementName = node.getLocalName();
if ("define".equals(elementName))
defines.add(node);
else if ("ref".equals(elementName) && followRefs)
{
String rname = node.getAttribute("name");
Element define = getFirstChildElement(grammar);
define = getNextSiblingElement(define);
while (define != null)
{
String dname = define.getAttribute("name");
if (rname.equals(dname))
{
getDefines(defines, grammar, node, followRefs);
break;
}
define = getNextSiblingElement(define);
}
}
for (Element child = getFirstChildElement(node); child != null;
child = getNextSiblingElement(child))
getDefines(defines, grammar, child, followRefs);
}
private void getElements(Set elements, Element grammar, Element node)
{
String elementName = node.getLocalName();
if ("element".equals(elementName))
elements.add(node);
for (Element child = getFirstChildElement(node); child != null;
child = getNextSiblingElement(child))
getElements(elements, grammar, child);
}
private void expandRefs(Set defines, Element node)
throws GrammarException
{
String elementName = node.getLocalName();
if ("ref".equals(elementName))
{
String rname = node.getAttribute("name");
for (Iterator i = defines.iterator(); i.hasNext(); )
{
Element define = (Element) i.next();
String dname = define.getAttribute("name");
if (rname.equals(dname))
{
Element child = getFirstChildElement(define);
forbidRefs(child, rname);
Element refChild = (Element) child.cloneNode(true);
Node parent = node.getParentNode();
parent.insertBefore(refChild, node);
parent.removeChild(node);
node = refChild;
break;
}
}
}
for (Element child = getFirstChildElement(node); child != null;
child = getNextSiblingElement(child))
expandRefs(defines, child);
}
private void forbidRefs(Element node, String name)
throws GrammarException
{
String elementName = node.getLocalName();
if ("ref".equals(elementName))
{
String rname = node.getAttribute("name");
if (name.equals(rname))
throw new GrammarException("cannot expand ref with name '" + name +
"' due to circularity");
}
for (Element child = getFirstChildElement(node); child != null;
child = getNextSiblingElement(child))
forbidRefs(child, name);
}
private void transform(Node node)
throws IOException
{
Node parent = node.getParentNode();
switch (node.getNodeType())
{
case Node.ELEMENT_NODE:
// 4.1 Annotations
String elementNs = node.getNamespaceURI();
String elementName = node.getLocalName();
if (!XMLConstants.RELAXNG_NS_URI.equals(elementNs) ||
!VOCABULARY.containsKey(elementName))
parent.removeChild(node);
else
{
Set allowedAttrs = (Set) VOCABULARY.get(elementName);
NamedNodeMap attrs = node.getAttributes();
int len = attrs.getLength();
for (int i = len - 1; i >= 0; i--)
{
Node attr = attrs.item(i);
String attrNs = attr.getNamespaceURI();
String attrName = attr.getLocalName();
if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attrNs))
continue; // ignore namespace nodes
if (!(XMLConstants.RELAXNG_NS_URI.equals(attrNs) ||
attrNs == null) ||
!allowedAttrs.contains(attrName))
attrs.removeNamedItemNS(attrNs, attrName);
else
{
// 4.2 Whitespace
if (STRIPPED_ATTRIBUTES.contains(attrName))
attr.setNodeValue(attr.getNodeValue().trim());
// 4.3 datatypeLibrary attribute
else if ("datatypeLibrary".equals(attrName))
{
String dl = attr.getNodeValue();
attr.setNodeValue(escapeURL(dl));
}
// 4.5. href attribute
else if ("href".equals(attrName))
{
String href = attr.getNodeValue();
href = XMLParser.absolutize(node.getBaseURI(),
escapeURL(href));
attr.setNodeValue(href);
}
}
}
// 4.3 datatypeLibrary attribute
if ("data".equals(elementName) || "value".equals(elementName))
{
Element element = (Element) node;
String dl = element.getAttribute("datatypeLibrary");
if (dl == null)
{
Node p = parent;
while (dl == null && p != null &&
p.getNodeType() == Node.ELEMENT_NODE)
{
dl = ((Element) p)
.getAttribute("datatypeLibrary");
p = p.getParentNode();
}
if (dl == null)
dl = "";
element.setAttribute("datatypeLibrary", dl);
}
// 4.4. type attribute of value element
if ("value".equals(elementName))
{
String type = element.getAttribute("type");
if (type == null)
{
element.setAttribute("type", "token");
element.setAttribute("datatypeLibrary", "");
}
}
// 4.16. Constraints
// TODO validate type
}
// 4.6. externalRef element
else if ("externalRef".equals(elementName))
{
Element externalRef = (Element) node;
String href = externalRef.getAttribute("href");
// check for recursion
if (urls.contains(href))
throw new GrammarException("recursive href");
urls.add(href);
Element element = resolve(href);
String eNs = element.getNamespaceURI();
String eName = element.getLocalName();
if (!(XMLConstants.RELAXNG_NS_URI.equals(eNs) ||
eNs == null) ||
!PATTERN_ELEMENTS.contains(eName))
throw new GrammarException("externally referenced element " +
"is not a pattern");
transform(element);
urls.remove(href);
String ns = element.getAttribute("ns");
if (ns != null)
element.setAttribute("ns",
externalRef.getAttribute("ns"));
element = (Element) externalRef.getOwnerDocument()
.importNode(element, true);
parent.replaceChild(element, externalRef);
return;
}
// 4.7 include element
else if ("include".equals(elementName))
{
Element include = (Element) node;
String href = include.getAttribute("href");
// check for recursion
if (urls.contains(href))
throw new GrammarException("recursive href");
urls.add(href);
Element element = resolve(href);
String eNs = element.getNamespaceURI();
String eName = element.getLocalName();
if (!(XMLConstants.RELAXNG_NS_URI.equals(eNs) ||
eNs == null) ||
!"grammar".equals(eName))
throw new GrammarException("included element is not " +
"a grammar");
transform(element);
urls.remove(href);
// handle components
List includeComponents = getComponents(include);
List grammarComponents = getComponents(element);
for (Iterator i = includeComponents.iterator(); i.hasNext(); )
{
Element comp = (Element) i.next();
String compName = comp.getLocalName();
if ("start".equals(compName))
{
boolean found = false;
for (Iterator j = grammarComponents.iterator();
j.hasNext(); )
{
Element c2 = (Element) j.next();
if ("start".equals(c2.getLocalName()))
{
c2.getParentNode().removeChild(c2);
found = true;
}
}
if (!found)
throw new GrammarException("no start component in " +
"included grammar");
}
else if ("define".equals(compName))
{
String name = comp.getAttribute("name");
boolean found = false;
for (Iterator j = grammarComponents.iterator();
j.hasNext(); )
{
Element c2 = (Element) j.next();
if ("define".equals(c2.getLocalName()) &&
name.equals(c2.getAttribute("name")))
{
c2.getParentNode().removeChild(c2);
found = true;
}
}
if (!found)
throw new GrammarException("no define component " +
"with name '" + name +
"' in included grammar");
}
}
// transform to div element
Document doc = include.getOwnerDocument();
Element includeDiv =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI, "div");
Element grammarDiv =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI, "div");
// XXX copy include non-href attributes (none defined?)
element = (Element) doc.importNode(element, true);
Node ctx = element.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
grammarDiv.appendChild(ctx);
ctx = next;
}
includeDiv.appendChild(grammarDiv);
ctx = include.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
includeDiv.appendChild(ctx);
ctx = next;
}
parent.replaceChild(includeDiv, include);
transform(includeDiv);
return;
}
// 4.8. name attribute of element and attribute elements
else if ("attribute".equals(elementName) ||
"element".equals(elementName))
{
Element element = (Element) node;
String name = element.getAttribute("name");
if (name != null)
{
Document doc = element.getOwnerDocument();
Element n =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI, "name");
n.appendChild(doc.createTextNode(name));
Node first = element.getFirstChild();
if (first != null)
element.insertBefore(n, first);
else
element.appendChild(n);
if ("attribute".equals(elementName))
{
String ns = element.getAttribute("ns");
if (ns != null)
{
n.setAttribute("ns", ns);
element.removeAttribute("ns");
}
}
element.removeAttribute("name");
}
// 4.12. Number of child elements
if ("attribute".equals(elementName))
{
if (getComponents(node).size() == 1)
{
Document doc = node.getOwnerDocument();
Element text =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
"text");
node.appendChild(text);
}
}
else // element
{
if (node.getChildNodes().getLength() > 2)
{
// transform to 2 child elements
Document doc = node.getOwnerDocument();
Element child =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
"group");
Node ctx = getFirstChildElement(node);
ctx = getNextSiblingElement(ctx); // skip 1
while (ctx != null)
{
Node next = getNextSiblingElement(ctx);
child.appendChild(ctx);
ctx = next;
}
node.appendChild(child);
}
}
}
// 4.11. div element
else if ("div".equals(elementName))
{
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
parent.insertBefore(ctx, node);
transform(ctx);
ctx = next;
}
parent.removeChild(node);
return;
}
else if ("mixed".equals(elementName))
{
// 4.12. Number of child elements
transformToOneChildElement(node, "group");
// 4.13. mixed element
Document doc = node.getOwnerDocument();
Node interleave =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
"interleave");
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
interleave.appendChild(ctx);
ctx = next;
}
Node text =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
"text");
interleave.appendChild(text);
parent.insertBefore(interleave, node);
parent.removeChild(node);
node = interleave;
}
else if ("optional".equals(elementName))
{
// 4.12. Number of child elements
transformToOneChildElement(node, "group");
// 4.14. optional element
Document doc = node.getOwnerDocument();
Node choice =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
"choice");
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
choice.appendChild(ctx);
ctx = next;
}
Node empty =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
"empty");
choice.appendChild(empty);
parent.insertBefore(choice, node);
parent.removeChild(node);
node = choice;
}
else if ("zeroOrMore".equals(elementName))
{
// 4.12. Number of child elements
transformToOneChildElement(node, "group");
// 4.15. zeroOrMore element
Document doc = node.getOwnerDocument();
Node choice =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
"choice");
Node oneOrMore =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
"oneOrMore");
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
oneOrMore.appendChild(ctx);
ctx = next;
}
Node empty =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
"empty");
choice.appendChild(oneOrMore);
choice.appendChild(empty);
parent.insertBefore(choice, node);
parent.removeChild(node);
node = choice;
}
else if ("list".equals(elementName) ||
"oneOrMore".equals(elementName) ||
"define".equals(elementName))
{
// 4.12. Number of child elements
transformToOneChildElement(node, "group");
}
else if ("except".equals(elementName))
{
// 4.12. Number of child elements
transformToOneChildElement(node, "choice");
// 4.16. Constraints
String parentName = parent.getLocalName();
if ("anyName".equals(parentName))
forbidDescendants(node, Collections.singleton("anyName"));
else if ("nsName".equals(parentName))
{
Set names = new HashSet();
names.add("nsName");
names.add("anyName");
forbidDescendants(node, names);
}
}
else if ("choice".equals(elementName) ||
"group".equals(elementName) ||
"interleave".equals(elementName))
{
// 4.12. Number of child elements
Node ctx = getFirstChildElement(node);
Node next = getNextSiblingElement(ctx);
if (next == null)
{
// replace
parent.insertBefore(ctx, node);
parent.removeChild(node);
transform(ctx);
return;
}
else
{
// transform to 2 child elements
Node next2 = getNextSiblingElement(next);
if (next2 != null)
{
Document doc = node.getOwnerDocument();
Node child =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
elementName);
child.appendChild(ctx);
child.appendChild(next);
node.insertBefore(next2, child);
transform(node); // recurse
}
}
}
// 4.17. combine attribute
else if ("grammar".equals(elementName))
{
String combine = null;
List nodes = new LinkedList();
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
if ("start".equals(ctx.getLocalName()))
{
String c = ((Element) ctx).getAttribute("combine");
if (combine != null && !combine.equals(c))
throw new GrammarException("multiple start elements "+
"but no combine attribute");
combine = c;
nodes.add(ctx);
}
ctx = next;
}
if (!nodes.isEmpty())
combineNodes(node, combine, "start", nodes);
// defines
Map defines = new HashMap();
Map defineCombines = new HashMap();
ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
if ("define".equals(ctx.getLocalName()))
{
String name = ((Element) ctx).getAttribute("name");
combine = (String) defineCombines.get(name);
String c = ((Element) ctx).getAttribute("combine");
if (combine != null && !combine.equals(c))
throw new GrammarException("multiple define " +
"elements with name '"+
name + "' but no " +
"combine attribute");
defineCombines.put(name, c);
nodes = (List) defines.get(name);
if (nodes == null)
{
nodes = new LinkedList();
defines.put(name, nodes);
}
nodes.add(ctx);
}
ctx = next;
}
for (Iterator i = defines.keySet().iterator(); i.hasNext(); )
{
String name = (String) i.next();
combine = (String) defineCombines.get(name);
nodes = (List) defines.get(name);
if (!nodes.isEmpty())
combineNodes(node, combine, "define", nodes);
}
}
// 4.9. ns attribute
if ("name".equals(elementName) ||
"nsName".equals(elementName) ||
"value".equals(elementName))
{
Element element = (Element) node;
String ns = element.getAttribute("ns");
if (ns == null)
{
Node ctx = parent;
while (ns == null && ctx != null &&
ctx.getNodeType() == Node.ELEMENT_NODE)
{
ns = ((Element) ctx).getAttribute("ns");
ctx = ctx.getParentNode();
}
element.setAttribute("ns", (ns == null) ? "" : ns);
}
if ("name".equals(elementName))
{
// 4.10. QNames
String name = element.getTextContent();
int ci = name.indexOf(':');
if (ci != -1)
{
String prefix = name.substring(0, ci);
element.setTextContent(name.substring(ci + 1));
ns = element.lookupNamespaceURI(prefix);
element.setAttribute("ns", (ns == null) ? "" : ns);
}
// 4.16. Constraints
if (isDescendantOfFirstChildOfAttribute(element) &&
"".equals(element.getAttribute("ns")) &&
"xmlns".equals(element.getTextContent()))
throw new GrammarException("name cannot be xmlns");
}
else if ("nsName".equals(elementName))
{
// 4.16. Constraints
if (isDescendantOfFirstChildOfAttribute(element) &&
"http://www.w3.org/2000/xmlns"
.equals(element.getAttribute("ns")))
throw new GrammarException("nsName cannot be XMLNS URI");
}
}
}
break;
case Node.TEXT_NODE:
case Node.CDATA_SECTION_NODE:
// 4.2 Whitespace
String parentName = parent.getLocalName();
if ("name".equals(parentName))
node.setNodeValue(node.getNodeValue().trim());
if (!"param".equals(parentName) &&
!"value".equals(parentName) &&
isWhitespace(node.getNodeValue()))
parent.removeChild(node);
break;
case Node.DOCUMENT_NODE:
break;
default:
parent.removeChild(node);
}
// Transform children
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
transform(ctx);
ctx = next;
}
}
/**
* Transforms the schema to place all defines under the top-level grammar
* element and replace all other grammar elements by their start child.
*/
private void transformGrammar(Node grammar, Node node)
throws GrammarException
{
if (node.getNodeType() == Node.ELEMENT_NODE)
{
String elementName = node.getLocalName();
if ("grammar".equals(elementName))
{
handleRefs(grammar, node, node);
Node start = null;
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
String childName = ctx.getLocalName();
if ("define".equals(childName))
grammar.appendChild(ctx);
else if ("start".equals(childName))
start = ctx;
ctx = next;
}
if (start == null)
throw new GrammarException("no start element for grammar");
Node p = getFirstChildElement(start);
Node parent = node.getParentNode();
parent.insertBefore(p, node);
parent.removeChild(node);
node = p;
}
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
transformGrammar(grammar, ctx);
ctx = next;
}
}
}
/**
* Checks that all references in the specified grammar match a define in
* the grammar.
*/
private void handleRefs(Node grammar1, Node grammar2, Node node)
throws GrammarException
{
if (node.getNodeType() == Node.ELEMENT_NODE)
{
String elementName = node.getLocalName();
if ("ref".equals(elementName) || "parentRef".equals(elementName))
{
Node grammar = grammar2;
if ("parentRef".equals(elementName))
grammar = grammar1;
String name = ((Element) node).getAttribute("name");
if (name != null)
throw new GrammarException("no name attribute on " +
elementName);
Node define = null;
for (Node ctx = grammar.getFirstChild();
define == null && ctx != null;
ctx = ctx.getNextSibling())
{
if ("define".equals(ctx.getLocalName()))
{
String dname = ((Element) ctx).getAttribute("name");
if (name.equals(dname))
define = ctx;
}
}
if (define == null)
throw new GrammarException("no define for '" + name + "'");
name = ((Element) define).getAttribute("new-name");
if (name == null)
{
name = createRefName();
((Element) define).setAttribute("new-name", name);
}
if ("parentRef".equals(elementName))
{
Document doc = node.getOwnerDocument();
Node ref = doc.createElementNS(XMLConstants.RELAXNG_NS_URI,
"ref");
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
ref.appendChild(ctx);
ctx = next;
}
Node parent = node.getParentNode();
parent.insertBefore(ref, node);
parent.removeChild(node);
node = ref;
}
((Element) node).setAttribute("name", name);
}
else if ("grammar".equals(elementName))
{
grammar1 = grammar2;
grammar2 = node;
}
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
handleRefs(grammar1, grammar2, ctx);
ctx = next;
}
}
}
private String createRefName()
{
return "ref" + Integer.toString(refCount++);
}
private void transform2(Node node)
throws GrammarException
{
Node parent = node.getParentNode();
if (node.getNodeType() == Node.ELEMENT_NODE)
{
String elementName = node.getLocalName();
// 4.20. notAllowed element
if ("notAllowed".equals(elementName))
{
String parentName = parent.getLocalName();
if ("attribute".equals(parentName) ||
"list".equals(parentName) ||
"group".equals(parentName) ||
"interleave".equals(parentName) ||
"oneOrMore".equals(parentName))
{
Node pp = parent.getParentNode();
pp.insertBefore(node, parent);
pp.removeChild(parent);
transform2(node); // apply recursively
return;
}
else if ("choice".equals(parentName))
{
Node p1 = getFirstChildElement(parent);
Node p2 = getNextSiblingElement(p1);
if (p1 == null || p2 == null)
throw new GrammarException("choice does not have two " +
"children");
String p1Name = p1.getLocalName();
String p2Name = p2.getLocalName();
Node pp = parent.getParentNode();
if ("notAllowed".equals(p1Name) &&
"notAllowed".equals(p2Name))
{
pp.insertBefore(p1, parent);
pp.removeChild(parent);
transform2(p1); //apply recursively
return;
}
else if ("notAllowed".equals(p1Name))
{
pp.insertBefore(p2, parent);
pp.removeChild(parent);
transform2(p2);
return;
}
else
{
pp.insertBefore(p1, parent);
pp.removeChild(parent);
transform2(p1);
return;
}
}
else if ("except".equals(parentName))
{
Node pp = parent.getParentNode();
pp.removeChild(parent);
return;
}
}
// 4.21. empty element
else if ("empty".equals(elementName))
{
String parentName = parent.getLocalName();
if ("group".equals(parentName) ||
"interleave".equals(parentName))
{
Node p1 = getFirstChildElement(parent);
Node p2 = getNextSiblingElement(p1);
if (p1 == null || p2 == null)
throw new GrammarException(parentName + " does not have " +
"two children");
String p1Name = p1.getLocalName();
String p2Name = p2.getLocalName();
Node pp = parent.getParentNode();
if ("empty".equals(p1Name) &&
"empty".equals(p2Name))
{
pp.insertBefore(p1, parent);
pp.removeChild(parent);
transform2(p1);
return;
}
else if ("empty".equals(p1Name))
{
pp.insertBefore(p2, parent);
pp.removeChild(parent);
transform2(p2);
return;
}
else
{
pp.insertBefore(p1, parent);
pp.removeChild(parent);
transform2(p1);
return;
}
}
else if ("choice".equals(parentName))
{
Node p1 = getFirstChildElement(parent);
Node p2 = getNextSiblingElement(p1);
if (p1 == null || p2 == null)
throw new GrammarException(parentName + " does not have " +
"two children");
String p1Name = p1.getLocalName();
String p2Name = p2.getLocalName();
Node pp = parent.getParentNode();
if ("empty".equals(p1Name) &&
"empty".equals(p2Name))
{
pp.insertBefore(p1, parent);
pp.removeChild(parent);
transform2(p1);
return;
}
}
else if ("oneOrMore".equals(parentName))
{
Node pp = parent.getParentNode();
pp.insertBefore(node, parent);
pp.removeChild(parent);
transform2(node);
return;
}
}
Node ctx = node.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
transform2(ctx);
ctx = next;
}
}
}
private static boolean isWhitespace(String text)
{
int len = text.length();
for (int i = 0; i < len; i++)
{
char c = text.charAt(i);
if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
return false;
}
return true;
}
private static String escapeURL(String url)
{
try
{
return URLEncoder.encode(url, "UTF-8");
}
catch (UnsupportedEncodingException e)
{
RuntimeException e2 = new RuntimeException("UTF-8 is unsupported");
e2.initCause(e);
throw e2;
}
}
/**
* Resolve a URL to an element, as described in section 4.5.
*/
private static Element resolve(String url)
throws IOException
{
try
{
URL u = new URL(url);
InputStream in = u.openStream();
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();
f.setNamespaceAware(true);
f.setCoalescing(true);
f.setExpandEntityReferences(true);
f.setIgnoringComments(true);
f.setIgnoringElementContentWhitespace(true);
DocumentBuilder b = f.newDocumentBuilder();
Document doc = b.parse(in, url);
in.close();
String fragment = u.getRef();
if (fragment != null)
return doc.getElementById(fragment);
return doc.getDocumentElement();
}
catch (SAXException e)
{
IOException e2 = new IOException("error parsing included element");
e2.initCause(e);
throw e2;
}
catch (ParserConfigurationException e)
{
IOException e2 = new IOException("error parsing included element");
e2.initCause(e);
throw e2;
}
}
/**
* Returns the "components" of an element, as described in section 4.7.
*/
private List getComponents(Node node)
{
List ret = new LinkedList();
for (Node ctx = node.getFirstChild(); ctx != null;
ctx = ctx.getNextSibling())
{
if (ctx.getNodeType() != Node.ELEMENT_NODE)
continue;
String ns = ctx.getNamespaceURI();
if (ns != null && !ns.equals(XMLConstants.RELAXNG_NS_URI))
continue;
String name = ctx.getLocalName();
if ("div".equals(name))
ret.addAll(getComponents(ctx));
else if (VOCABULARY.containsKey(name))
ret.add(ctx);
}
return ret;
}
private static void transformToOneChildElement(Node node, String name)
{
if (node.getChildNodes().getLength() < 2)
return;
Document doc = node.getOwnerDocument();
Element child = doc.createElementNS(XMLConstants.RELAXNG_NS_URI, name);
Node ctx = getFirstChildElement(node);
while (ctx != null)
{
Node next = getNextSiblingElement(ctx);
child.appendChild(ctx);
ctx = next;
}
node.appendChild(child);
}
private static Element getFirstChildElement(Node node)
{
Node ctx = node.getFirstChild();
while (ctx != null && ctx.getNodeType() != Node.ELEMENT_NODE)
ctx = ctx.getNextSibling();
return (Element) ctx;
}
private static Element getNextSiblingElement(Node node)
{
Node ctx = node.getNextSibling();
while (ctx != null && ctx.getNodeType() != Node.ELEMENT_NODE)
ctx = ctx.getNextSibling();
return (Element) ctx;
}
private static void forbidDescendants(Node node, Set names)
throws GrammarException
{
for (Node ctx = node.getFirstChild(); ctx != null;
ctx = ctx.getNextSibling())
{
String ns = ctx.getNamespaceURI();
if (!XMLConstants.RELAXNG_NS_URI.equals(ns))
continue;
String name = ctx.getLocalName();
if (names.contains(name))
throw new GrammarException("name not allowed: " + name);
forbidDescendants(ctx, names);
}
}
private static boolean isDescendantOfFirstChildOfAttribute(Node node)
{
Node child = node;
Node parent = node.getParentNode();
while (parent != null && !"attribute".equals(parent.getLocalName()))
{
child = parent;
parent = child.getParentNode();
}
if (parent == null)
return false;
Node firstChild = getFirstChildElement(parent);
return firstChild == child;
}
private static void combineNodes(Node node, String combine, String name,
List nodes)
{
Document doc = node.getOwnerDocument();
Node child =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI, name);
Node combineNode =
doc.createElementNS(XMLConstants.RELAXNG_NS_URI, combine);
child.appendChild(combineNode);
boolean inserted = false;
for (Iterator i = nodes.iterator(); i.hasNext(); )
{
Node startNode = (Node) i.next();
if (!inserted)
{
node.insertBefore(child, startNode);
inserted = true;
}
Node ctx = startNode.getFirstChild();
while (ctx != null)
{
Node next = ctx.getNextSibling();
combineNode.appendChild(ctx);
ctx = next;
}
node.removeChild(startNode);
}
}
Grammar parseGrammar(Element node)
throws GrammarException
{
checkName(node, "grammar");
Grammar grammar = new Grammar();
Element start = getFirstChildElement(node);
grammar.start = parsePattern(getFirstChildElement(start));
for (Element define = getNextSiblingElement(start); define != null;
define = getNextSiblingElement(define))
grammar.defines.add(parseDefine(define));
return grammar;
}
Define parseDefine(Element node)
throws GrammarException
{
checkName(node, "define");
Define define = new Define();
define.name = node.getAttribute("name");
define.element = parseElement(getFirstChildElement(node));
return define;
}
Pattern parseTop(Element node)
throws GrammarException
{
String name = node.getLocalName();
if ("notAllowed".equals(name))
return parseNotAllowed(node);
return parsePattern(node);
}
Pattern parsePattern(Element node)
throws GrammarException
{
String name = node.getLocalName();
if ("empty".equals(name))
return parseEmpty(node);
return parseNonEmptyPattern(node);
}
Pattern parseNonEmptyPattern(Element node)
throws GrammarException
{
String name = node.getLocalName();
if ("text".equals(name))
return parseText(node);
else if ("data".equals(name))
return parseData(node);
else if ("value".equals(name))
return parseValue(node);
else if ("list".equals(name))
return parseList(node);
else if ("attribute".equals(name))
return parseAttribute(node);
else if ("ref".equals(name))
return parseRef(node);
else if ("oneOrMore".equals(name))
return parseOneOrMore(node);
else if ("choice".equals(name))
return parseChoice(node);
else if ("group".equals(name))
return parseGroup(node);
else if ("interleave".equals(name))
return parseInterleave(node);
throw new GrammarException("invalid pattern: " + name);
}
ElementPattern parseElement(Element node)
throws GrammarException
{
checkName(node, "element");
ElementPattern element = new ElementPattern();
Element nameClass = getFirstChildElement(node);
element.nameClass = parseNameClass(nameClass);
element.pattern = parseTop(getNextSiblingElement(nameClass));
return element;
}
NotAllowedPattern parseNotAllowed(Element node)
throws GrammarException
{
checkName(node, "notAllowed");
return NotAllowedPattern.INSTANCE;
}
EmptyPattern parseEmpty(Element node)
throws GrammarException
{
checkName(node, "empty");
return EmptyPattern.INSTANCE;
}
TextPattern parseText(Element node)
throws GrammarException
{
checkName(node, "text");
return TextPattern.INSTANCE;
}
DataPattern parseData(Element node)
throws GrammarException
{
checkName(node, "data");
DataPattern data = new DataPattern();
DatatypeLibrary dl =
getDatatypeLibrary(node.getAttribute("datatypeLibrary"));
String type = node.getAttribute("type");
try
{
data.type = dl.createDatatype(type);
data.datatypeLibrary = dl;
}
catch (DatatypeException e)
{
GrammarException e2 = new GrammarException(type);
e2.initCause(e);
throw e2;
}
Element ctx = getFirstChildElement(node);
while (ctx != null)
{
Element next = getNextSiblingElement(ctx);
String name = ctx.getLocalName();
if ("param".equals(name))
data.params.add(parseParam(ctx));
else if ("except".equals(name) && next == null)
data.exceptPattern = parsePattern(getFirstChildElement(ctx));
else
throw new GrammarException("invalid element: " + name);
ctx = next;
}
return data;
}
Param parseParam(Element node)
throws GrammarException
{
checkName(node, "param");
Param param = new Param();
param.name = node.getAttribute("name");
param.value = node.getTextContent();
return param;
}
ValuePattern parseValue(Element node)
throws GrammarException
{
checkName(node, "value");
ValuePattern value = new ValuePattern();
DatatypeLibrary dl =
getDatatypeLibrary(node.getAttribute("datatypeLibrary"));
String type = node.getAttribute("type");
try
{
value.type = dl.createDatatype(type);
value.datatypeLibrary = dl;
}
catch (DatatypeException e)
{
GrammarException e2 = new GrammarException(type);
e2.initCause(e);
throw e2;
}
value.ns = node.getAttribute("ns");
value.value = node.getTextContent();
return value;
}
ListPattern parseList(Element node)
throws GrammarException
{
checkName(node, "list");
ListPattern list = new ListPattern();
list.pattern = parsePattern(getFirstChildElement(node));
return list;
}
AttributePattern parseAttribute(Element node)
throws GrammarException
{
checkName(node, "attribute");
AttributePattern attribute = new AttributePattern();
Element nameClass = getFirstChildElement(node);
attribute.nameClass = parseNameClass(nameClass);
attribute.pattern = parsePattern(getNextSiblingElement(nameClass));
return attribute;
}
RefPattern parseRef(Element node)
throws GrammarException
{
checkName(node, "ref");
RefPattern ref = new RefPattern();
ref.name = node.getAttribute("name");
return ref;
}
OneOrMorePattern parseOneOrMore(Element node)
throws GrammarException
{
checkName(node, "oneOrMore");
OneOrMorePattern oneOrMore = new OneOrMorePattern();
oneOrMore.pattern = parseNonEmptyPattern(getFirstChildElement(node));
return oneOrMore;
}
ChoicePattern parseChoice(Element node)
throws GrammarException
{
checkName(node, "choice");
ChoicePattern choice = new ChoicePattern();
Element p1 = getFirstChildElement(node);
Element p2 = getNextSiblingElement(p1);
choice.pattern1 = parsePattern(p1);
choice.pattern2 = parseNonEmptyPattern(p2);
return choice;
}
GroupPattern parseGroup(Element node)
throws GrammarException
{
checkName(node, "group");
GroupPattern group = new GroupPattern();
Element p1 = getFirstChildElement(node);
Element p2 = getNextSiblingElement(p1);
group.pattern1 = parseNonEmptyPattern(p1);
group.pattern2 = parseNonEmptyPattern(p2);
return group;
}
InterleavePattern parseInterleave(Element node)
throws GrammarException
{
checkName(node, "interleave");
InterleavePattern interleave = new InterleavePattern();
Element p1 = getFirstChildElement(node);
Element p2 = getNextSiblingElement(p1);
interleave.pattern1 = parseNonEmptyPattern(p1);
interleave.pattern2 = parseNonEmptyPattern(p2);
return interleave;
}
NameClass parseNameClass(Element node)
throws GrammarException
{
String name = node.getLocalName();
if ("anyName".equals(name))
return parseAnyName(node);
else if ("name".equals(name))
return parseName(node);
else if ("nsName".equals(name))
return parseNsName(node);
else if ("choice".equals(name))
return parseChoiceNameClass(node);
throw new GrammarException("invalid name class: " + name);
}
AnyNameNameClass parseAnyName(Element node)
throws GrammarException
{
checkName(node, "anyName");
AnyNameNameClass anyName = new AnyNameNameClass();
Element except = getFirstChildElement(node);
if (except != null) {
checkName(except, "except");
anyName.exceptNameClass = parseNameClass(getFirstChildElement(except));
}
return anyName;
}
NameNameClass parseName(Element node)
throws GrammarException
{
checkName(node, "name");
NameNameClass name = new NameNameClass();
name.ns = node.getAttribute("ns");
name.name = node.getTextContent();
return name;
}
NSNameNameClass parseNsName(Element node)
throws GrammarException
{
checkName(node, "nsName");
NSNameNameClass nsName = new NSNameNameClass();
nsName.ns = node.getAttribute("ns");
Element except = getFirstChildElement(node);
if (except != null) {
checkName(except, "except");
nsName.exceptNameClass = parseNameClass(getFirstChildElement(except));
}
return nsName;
}
ChoiceNameClass parseChoiceNameClass(Element node)
throws GrammarException
{
checkName(node, "choice");
ChoiceNameClass choice = new ChoiceNameClass();
Element c1 = getFirstChildElement(node);
Element c2 = getNextSiblingElement(c1);
choice.name1 = parseNameClass(c1);
choice.name2 = parseNameClass(c2);
return choice;
}
void checkName(Element node, String name)
throws GrammarException
{
if (!name.equals(node.getLocalName()))
throw new GrammarException("expecting " + name);
}
DatatypeLibrary getDatatypeLibrary(String uri)
throws GrammarException
{
if (datatypeLibraries == null)
datatypeLibraries = new HashMap();
DatatypeLibrary library = (DatatypeLibrary) datatypeLibraries.get(uri);
if (library == null)
{
library = new DatatypeLibraryLoader().createDatatypeLibrary(uri);
if (library == null)
throw new GrammarException("Datatype library not supported: " + uri);
datatypeLibraries.put(uri, library);
}
return library;
}
}