blob: 02607a4e06d27cab5110dd8d31311f2239326b74 [file] [log] [blame]
/* XIncludeFilter.java --
Copyright (C) 2001,2002 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.xml.pipeline;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Hashtable;
import java.util.Stack;
import java.util.Vector;
import org.xml.sax.Attributes;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import gnu.xml.util.Resolver;
/**
* Filter to process an XPointer-free subset of
* <a href="http://www.w3.org/TR/xinclude">XInclude</a>, supporting its
* use as a kind of replacement for parsed general entities.
* XInclude works much like the <code>#include</code> of C/C++ but
* works for XML documents as well as unparsed text files.
* Restrictions from the 17-Sept-2002 CR draft of XInclude are as follows:
*
* <ul>
*
* <li> URIs must not include fragment identifiers.
* The CR specifies support for XPointer <em>element()</em> fragment IDs,
* which is not currently implemented here.
*
* <li> <em>xi:fallback</em> handling of resource errors is not
* currently supported.
*
* <li> DTDs are not supported in included files, since the SAX DTD events
* must have completely preceded any included file.
* The CR explicitly allows the DTD related portions of the infoset to
* grow as an effect of including XML documents.
*
* <li> <em>xml:base</em> fixup isn't done.
*
* </ul>
*
* <p> XML documents that are included will normally be processed using
* the default SAX namespace rules, meaning that prefix information may
* be discarded. This may be changed with {@link #setSavingPrefixes
* setSavingPrefixes()}. <em>You are strongly advised to do this.</em>
*
* <p> Note that XInclude allows highly incompatible implementations, which
* are specialized to handle application-specific infoset extensions. Some
* such implementations can be implemented by subclassing this one, but
* they may only be substituted in applications at "user option".
*
* <p>TBD: "IURI" handling.
*
* @author David Brownell
*/
public class XIncludeFilter extends EventFilter implements Locator
{
private Hashtable extEntities = new Hashtable (5, 5);
private int ignoreCount;
private Stack uris = new Stack ();
private Locator locator;
private Vector inclusions = new Vector (5, 5);
private boolean savingPrefixes;
/**
*/
public XIncludeFilter (EventConsumer next)
throws SAXException
{
super (next);
setContentHandler (this);
// DTDHandler callbacks pass straight through
setProperty (DECL_HANDLER, this);
setProperty (LEXICAL_HANDLER, this);
}
private void fatal (SAXParseException e) throws SAXException
{
ErrorHandler eh;
eh = getErrorHandler ();
if (eh != null)
eh.fatalError (e);
throw e;
}
/**
* Passes "this" down the filter chain as a proxy locator.
*/
public void setDocumentLocator (Locator locator)
{
this.locator = locator;
super.setDocumentLocator (this);
}
/** Used for proxy locator; do not call directly. */
public String getSystemId ()
{ return (locator == null) ? null : locator.getSystemId (); }
/** Used for proxy locator; do not call directly. */
public String getPublicId ()
{ return (locator == null) ? null : locator.getPublicId (); }
/** Used for proxy locator; do not call directly. */
public int getLineNumber ()
{ return (locator == null) ? -1 : locator.getLineNumber (); }
/** Used for proxy locator; do not call directly. */
public int getColumnNumber ()
{ return (locator == null) ? -1 : locator.getColumnNumber (); }
/**
* Assigns the flag controlling the setting of the SAX2
* <em>namespace-prefixes</em> flag.
*/
public void setSavingPrefixes (boolean flag)
{ savingPrefixes = flag; }
/**
* Returns the flag controlling the setting of the SAX2
* <em>namespace-prefixes</em> flag when parsing included documents.
* The default value is the SAX2 default (false), which discards
* information that can be useful.
*/
public boolean isSavingPrefixes ()
{ return savingPrefixes; }
//
// Two mechanisms are interacting here.
//
// - XML Base implies a stack of base URIs, updated both by
// "real entity" boundaries and element boundaries.
//
// - Active "Real Entities" (for document and general entities,
// and by xincluded files) are tracked to prevent circular
// inclusions.
//
private String addMarker (String uri)
throws SAXException
{
if (locator != null && locator.getSystemId () != null)
uri = locator.getSystemId ();
// guard against InputSource objects without system IDs
if (uri == null)
fatal (new SAXParseException ("Entity URI is unknown", locator));
try {
URL url = new URL (uri);
uri = url.toString ();
if (inclusions.contains (uri))
fatal (new SAXParseException (
"XInclude, circular inclusion", locator));
inclusions.addElement (uri);
uris.push (url);
} catch (IOException e) {
// guard against illegal relative URIs (Xerces)
fatal (new SAXParseException ("parser bug: relative URI",
locator, e));
}
return uri;
}
private void pop (String uri)
{
inclusions.removeElement (uri);
uris.pop ();
}
//
// Document entity boundaries get both treatments.
//
public void startDocument () throws SAXException
{
ignoreCount = 0;
addMarker (null);
super.startDocument ();
}
public void endDocument () throws SAXException
{
inclusions.setSize (0);
extEntities.clear ();
uris.setSize (0);
super.endDocument ();
}
//
// External general entity boundaries get both treatments.
//
public void externalEntityDecl (String name,
String publicId, String systemId)
throws SAXException
{
if (name.charAt (0) == '%')
return;
try {
URL url = new URL (locator.getSystemId ());
systemId = new URL (url, systemId).toString ();
} catch (IOException e) {
// what could we do?
}
extEntities.put (name, systemId);
}
public void startEntity (String name)
throws SAXException
{
if (ignoreCount != 0) {
ignoreCount++;
return;
}
String uri = (String) extEntities.get (name);
if (uri != null)
addMarker (uri);
super.startEntity (name);
}
public void endEntity (String name)
throws SAXException
{
if (ignoreCount != 0) {
if (--ignoreCount != 0)
return;
}
String uri = (String) extEntities.get (name);
if (uri != null)
pop (uri);
super.endEntity (name);
}
//
// element boundaries only affect the base URI stack,
// unless they're XInclude elements.
//
public void
startElement (String uri, String localName, String qName, Attributes atts)
throws SAXException
{
if (ignoreCount != 0) {
ignoreCount++;
return;
}
URL baseURI = (URL) uris.peek ();
String base;
base = atts.getValue ("http://www.w3.org/XML/1998/namespace", "base");
if (base == null)
uris.push (baseURI);
else {
URL url;
if (base.indexOf ('#') != -1)
fatal (new SAXParseException (
"xml:base with fragment: " + base,
locator));
try {
baseURI = new URL (baseURI, base);
uris.push (baseURI);
} catch (Exception e) {
fatal (new SAXParseException (
"xml:base with illegal uri: " + base,
locator, e));
}
}
if (!"http://www.w3.org/2001/XInclude".equals (uri)) {
super.startElement (uri, localName, qName, atts);
return;
}
if ("include".equals (localName)) {
String href = atts.getValue ("href");
String parse = atts.getValue ("parse");
String encoding = atts.getValue ("encoding");
URL url = (URL) uris.peek ();
SAXParseException x = null;
if (href == null)
fatal (new SAXParseException (
"XInclude missing href",
locator));
if (href.indexOf ('#') != -1)
fatal (new SAXParseException (
"XInclude with fragment: " + href,
locator));
if (parse == null || "xml".equals (parse))
x = xinclude (url, href);
else if ("text".equals (parse))
x = readText (url, href, encoding);
else
fatal (new SAXParseException (
"unknown XInclude parsing mode: " + parse,
locator));
if (x == null) {
// strip out all child content
ignoreCount++;
return;
}
// FIXME the 17-Sept-2002 CR of XInclude says we "must"
// use xi:fallback elements to handle resource errors,
// if they exist.
fatal (x);
} else if ("fallback".equals (localName)) {
fatal (new SAXParseException (
"illegal top level XInclude 'fallback' element",
locator));
} else {
ErrorHandler eh = getErrorHandler ();
// CR doesn't say this is an error
if (eh != null)
eh.warning (new SAXParseException (
"unrecognized toplevel XInclude element: " + localName,
locator));
super.startElement (uri, localName, qName, atts);
}
}
public void endElement (String uri, String localName, String qName)
throws SAXException
{
if (ignoreCount != 0) {
if (--ignoreCount != 0)
return;
}
uris.pop ();
if (!("http://www.w3.org/2001/XInclude".equals (uri)
&& "include".equals (localName)))
super.endElement (uri, localName, qName);
}
//
// ignore all content within non-empty xi:include elements
//
public void characters (char ch [], int start, int length)
throws SAXException
{
if (ignoreCount == 0)
super.characters (ch, start, length);
}
public void processingInstruction (String target, String value)
throws SAXException
{
if (ignoreCount == 0)
super.processingInstruction (target, value);
}
public void ignorableWhitespace (char ch [], int start, int length)
throws SAXException
{
if (ignoreCount == 0)
super.ignorableWhitespace (ch, start, length);
}
public void comment (char ch [], int start, int length)
throws SAXException
{
if (ignoreCount == 0)
super.comment (ch, start, length);
}
public void startCDATA () throws SAXException
{
if (ignoreCount == 0)
super.startCDATA ();
}
public void endCDATA () throws SAXException
{
if (ignoreCount == 0)
super.endCDATA ();
}
public void startPrefixMapping (String prefix, String uri)
throws SAXException
{
if (ignoreCount == 0)
super.startPrefixMapping (prefix, uri);
}
public void endPrefixMapping (String prefix) throws SAXException
{
if (ignoreCount == 0)
super.endPrefixMapping (prefix);
}
public void skippedEntity (String name) throws SAXException
{
if (ignoreCount == 0)
super.skippedEntity (name);
}
// JDK 1.1 seems to need it to be done this way, sigh
void setLocator (Locator l) { locator = l; }
Locator getLocator () { return locator; }
//
// for XIncluded entities, manage the current locator and
// filter out events that would be incorrect to report
//
private class Scrubber extends EventFilter
{
Scrubber (EventFilter f)
throws SAXException
{
// delegation passes to next in chain
super (f);
// process all content events
super.setContentHandler (this);
super.setProperty (LEXICAL_HANDLER, this);
// drop all DTD events
super.setDTDHandler (null);
super.setProperty (DECL_HANDLER, null);
}
// maintain proxy locator
// only one startDocument()/endDocument() pair per event stream
public void setDocumentLocator (Locator l)
{ setLocator (l); }
public void startDocument ()
{ }
public void endDocument ()
{ }
private void reject (String message) throws SAXException
{ fatal (new SAXParseException (message, getLocator ())); }
// only the DTD from the "base document" gets reported
public void startDTD (String root, String publicId, String systemId)
throws SAXException
{ reject ("XIncluded DTD: " + systemId); }
public void endDTD ()
throws SAXException
{ reject ("XIncluded DTD"); }
// ... so this should never happen
public void skippedEntity (String name) throws SAXException
{ reject ("XInclude skipped entity: " + name); }
// since we rejected DTDs, only builtin entities can be reported
}
// <xi:include parse='xml' ...>
// relative to the base URI passed
private SAXParseException xinclude (URL url, String href)
throws SAXException
{
XMLReader helper;
Scrubber scrubber;
Locator savedLocator = locator;
// start with a parser acting just like our input
// modulo DTD-ish stuff (validation flag, entity resolver)
helper = XMLReaderFactory.createXMLReader ();
helper.setErrorHandler (getErrorHandler ());
helper.setFeature (FEATURE_URI + "namespace-prefixes", true);
// Set up the proxy locator and event filter.
scrubber = new Scrubber (this);
locator = null;
bind (helper, scrubber);
// Merge the included document, except its DTD
try {
url = new URL (url, href);
href = url.toString ();
if (inclusions.contains (href))
fatal (new SAXParseException (
"XInclude, circular inclusion", locator));
inclusions.addElement (href);
uris.push (url);
helper.parse (new InputSource (href));
return null;
} catch (java.io.IOException e) {
return new SAXParseException (href, locator, e);
} finally {
pop (href);
locator = savedLocator;
}
}
// <xi:include parse='text' ...>
// relative to the base URI passed
private SAXParseException readText (URL url, String href, String encoding)
throws SAXException
{
InputStream in = null;
try {
URLConnection conn;
InputStreamReader reader;
char buf [] = new char [4096];
int count;
url = new URL (url, href);
conn = url.openConnection ();
in = conn.getInputStream ();
if (encoding == null)
encoding = Resolver.getEncoding (conn.getContentType ());
if (encoding == null) {
ErrorHandler eh = getErrorHandler ();
if (eh != null)
eh.warning (new SAXParseException (
"guessing text encoding for URL: " + url,
locator));
reader = new InputStreamReader (in);
} else
reader = new InputStreamReader (in, encoding);
while ((count = reader.read (buf, 0, buf.length)) != -1)
super.characters (buf, 0, count);
in.close ();
return null;
} catch (IOException e) {
return new SAXParseException (
"can't XInclude text",
locator, e);
}
}
}