| /* WellFormednessFilter.java -- |
| Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. |
| |
| This file is part of GNU Classpath. |
| |
| GNU Classpath is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| GNU Classpath is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GNU Classpath; see the file COPYING. If not, write to the |
| Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301 USA. |
| |
| Linking this library statically or dynamically with other modules is |
| making a combined work based on this library. Thus, the terms and |
| conditions of the GNU General Public License cover the whole |
| combination. |
| |
| As a special exception, the copyright holders of this library give you |
| permission to link this library with independent modules to produce an |
| executable, regardless of the license terms of these independent |
| modules, and to copy and distribute the resulting executable under |
| terms of your choice, provided that you also meet, for each linked |
| independent module, the terms and conditions of the license of that |
| module. An independent module is a module which is not derived from |
| or based on this library. If you modify this library, you may extend |
| this exception to your version of the library, but you are not |
| obligated to do so. If you do not wish to do so, delete this |
| exception statement from your version. */ |
| |
| package gnu.xml.pipeline; |
| |
| import java.util.EmptyStackException; |
| import java.util.Stack; |
| |
| import org.xml.sax.Attributes; |
| import org.xml.sax.ErrorHandler; |
| import org.xml.sax.Locator; |
| import org.xml.sax.SAXException; |
| import org.xml.sax.SAXParseException; |
| |
| /** |
| * This filter reports fatal exceptions in the case of event streams that |
| * are not well formed. The rules currently tested include: <ul> |
| * |
| * <li>setDocumentLocator ... may be called only before startDocument |
| * |
| * <li>startDocument/endDocument ... must be paired, and all other |
| * calls (except setDocumentLocator) must be nested within these. |
| * |
| * <li>startElement/endElement ... must be correctly paired, and |
| * may never appear within CDATA sections. |
| * |
| * <li>comment ... can't contain "--" |
| * |
| * <li>character data ... can't contain "]]>" |
| * |
| * <li>whitespace ... can't contain CR |
| * |
| * <li>whitespace and character data must be within an element |
| * |
| * <li>processing instruction ... can't contain "?>" or CR |
| * |
| * <li>startCDATA/endCDATA ... must be correctly paired. |
| * |
| * </ul> |
| * |
| * <p> Other checks for event stream correctness may be provided in |
| * the future. For example, insisting that |
| * entity boundaries nest correctly, |
| * namespace scopes nest correctly, |
| * namespace values never contain relative URIs, |
| * attributes don't have "<" characters; |
| * and more. |
| * |
| * @author David Brownell |
| */ |
| public final class WellFormednessFilter extends EventFilter |
| { |
| private boolean startedDoc; |
| private Stack elementStack = new Stack (); |
| private boolean startedCDATA; |
| private String dtdState = "before"; |
| |
| |
| /** |
| * Swallows all events after performing well formedness checks. |
| */ |
| // constructor used by PipelineFactory |
| public WellFormednessFilter () |
| { this (null); } |
| |
| |
| /** |
| * Passes events through to the specified consumer, after first |
| * processing them. |
| */ |
| // constructor used by PipelineFactory |
| public WellFormednessFilter (EventConsumer consumer) |
| { |
| super (consumer); |
| |
| setContentHandler (this); |
| setDTDHandler (this); |
| |
| try { |
| setProperty (LEXICAL_HANDLER, this); |
| } catch (SAXException e) { /* can't happen */ } |
| } |
| |
| /** |
| * Resets state as if any preceding event stream was well formed. |
| * Particularly useful if it ended through some sort of error, |
| * and the endDocument call wasn't made. |
| */ |
| public void reset () |
| { |
| startedDoc = false; |
| startedCDATA = false; |
| elementStack.removeAllElements (); |
| } |
| |
| |
| private SAXParseException getException (String message) |
| { |
| SAXParseException e; |
| Locator locator = getDocumentLocator (); |
| |
| if (locator == null) |
| return new SAXParseException (message, null, null, -1, -1); |
| else |
| return new SAXParseException (message, locator); |
| } |
| |
| private void fatalError (String message) |
| throws SAXException |
| { |
| SAXParseException e = getException (message); |
| ErrorHandler handler = getErrorHandler (); |
| |
| if (handler != null) |
| handler.fatalError (e); |
| throw e; |
| } |
| |
| /** |
| * Throws an exception when called after startDocument. |
| * |
| * @param locator the locator, to be used in error reporting or relative |
| * URI resolution. |
| * |
| * @exception IllegalStateException when called after the document |
| * has already been started |
| */ |
| public void setDocumentLocator (Locator locator) |
| { |
| if (startedDoc) |
| throw new IllegalStateException ( |
| "setDocumentLocator called after startDocument"); |
| super.setDocumentLocator (locator); |
| } |
| |
| public void startDocument () throws SAXException |
| { |
| if (startedDoc) |
| fatalError ("startDocument called more than once"); |
| startedDoc = true; |
| startedCDATA = false; |
| elementStack.removeAllElements (); |
| super.startDocument (); |
| } |
| |
| public void startElement ( |
| String uri, String localName, |
| String qName, Attributes atts |
| ) throws SAXException |
| { |
| if (!startedDoc) |
| fatalError ("callback outside of document?"); |
| if ("inside".equals (dtdState)) |
| fatalError ("element inside DTD?"); |
| else |
| dtdState = "after"; |
| if (startedCDATA) |
| fatalError ("element inside CDATA section"); |
| if (qName == null || "".equals (qName)) |
| fatalError ("startElement name missing"); |
| elementStack.push (qName); |
| super.startElement (uri, localName, qName, atts); |
| } |
| |
| public void endElement (String uri, String localName, String qName) |
| throws SAXException |
| { |
| if (!startedDoc) |
| fatalError ("callback outside of document?"); |
| if (startedCDATA) |
| fatalError ("element inside CDATA section"); |
| if (qName == null || "".equals (qName)) |
| fatalError ("endElement name missing"); |
| |
| try { |
| String top = (String) elementStack.pop (); |
| |
| if (!qName.equals (top)) |
| fatalError ("<" + top + " ...>...</" + qName + ">"); |
| // XXX could record/test namespace info |
| } catch (EmptyStackException e) { |
| fatalError ("endElement without startElement: </" + qName + ">"); |
| } |
| super.endElement (uri, localName, qName); |
| } |
| |
| public void endDocument () throws SAXException |
| { |
| if (!startedDoc) |
| fatalError ("callback outside of document?"); |
| dtdState = "before"; |
| startedDoc = false; |
| super.endDocument (); |
| } |
| |
| |
| public void startDTD (String root, String publicId, String systemId) |
| throws SAXException |
| { |
| if (!startedDoc) |
| fatalError ("callback outside of document?"); |
| if ("before" != dtdState) |
| fatalError ("two DTDs?"); |
| if (!elementStack.empty ()) |
| fatalError ("DTD must precede root element"); |
| dtdState = "inside"; |
| super.startDTD (root, publicId, systemId); |
| } |
| |
| public void notationDecl (String name, String publicId, String systemId) |
| throws SAXException |
| { |
| // FIXME: not all parsers will report startDTD() ... |
| // we'd rather insist we're "inside". |
| if ("after" == dtdState) |
| fatalError ("not inside DTD"); |
| super.notationDecl (name, publicId, systemId); |
| } |
| |
| public void unparsedEntityDecl (String name, |
| String publicId, String systemId, String notationName) |
| throws SAXException |
| { |
| // FIXME: not all parsers will report startDTD() ... |
| // we'd rather insist we're "inside". |
| if ("after" == dtdState) |
| fatalError ("not inside DTD"); |
| super.unparsedEntityDecl (name, publicId, systemId, notationName); |
| } |
| |
| // FIXME: add the four DeclHandler calls too |
| |
| public void endDTD () |
| throws SAXException |
| { |
| if (!startedDoc) |
| fatalError ("callback outside of document?"); |
| if ("inside" != dtdState) |
| fatalError ("DTD ends without start?"); |
| dtdState = "after"; |
| super.endDTD (); |
| } |
| |
| public void characters (char ch [], int start, int length) |
| throws SAXException |
| { |
| int here = start, end = start + length; |
| if (elementStack.empty ()) |
| fatalError ("characters must be in an element"); |
| while (here < end) { |
| if (ch [here++] != ']') |
| continue; |
| if (here == end) // potential problem ... |
| continue; |
| if (ch [here++] != ']') |
| continue; |
| if (here == end) // potential problem ... |
| continue; |
| if (ch [here++] == '>') |
| fatalError ("character data can't contain \"]]>\""); |
| } |
| super.characters (ch, start, length); |
| } |
| |
| public void ignorableWhitespace (char ch [], int start, int length) |
| throws SAXException |
| { |
| int here = start, end = start + length; |
| if (elementStack.empty ()) |
| fatalError ("characters must be in an element"); |
| while (here < end) { |
| if (ch [here++] == '\r') |
| fatalError ("whitespace can't contain CR"); |
| } |
| super.ignorableWhitespace (ch, start, length); |
| } |
| |
| public void processingInstruction (String target, String data) |
| throws SAXException |
| { |
| if (data.indexOf ('\r') > 0) |
| fatalError ("PIs can't contain CR"); |
| if (data.indexOf ("?>") > 0) |
| fatalError ("PIs can't contain \"?>\""); |
| } |
| |
| public void comment (char ch [], int start, int length) |
| throws SAXException |
| { |
| if (!startedDoc) |
| fatalError ("callback outside of document?"); |
| if (startedCDATA) |
| fatalError ("comments can't nest in CDATA"); |
| int here = start, end = start + length; |
| while (here < end) { |
| if (ch [here] == '\r') |
| fatalError ("comments can't contain CR"); |
| if (ch [here++] != '-') |
| continue; |
| if (here == end) |
| fatalError ("comments can't end with \"--->\""); |
| if (ch [here++] == '-') |
| fatalError ("comments can't contain \"--\""); |
| } |
| super.comment (ch, start, length); |
| } |
| |
| public void startCDATA () |
| throws SAXException |
| { |
| if (!startedDoc) |
| fatalError ("callback outside of document?"); |
| if (startedCDATA) |
| fatalError ("CDATA starts can't nest"); |
| startedCDATA = true; |
| super.startCDATA (); |
| } |
| |
| public void endCDATA () |
| throws SAXException |
| { |
| if (!startedDoc) |
| fatalError ("callback outside of document?"); |
| if (!startedCDATA) |
| fatalError ("CDATA end without start?"); |
| startedCDATA = false; |
| super.endCDATA (); |
| } |
| } |