Next page Previous page Start of chapter End of chapter

Validation

JAXP supports validation against DTD and W3C XML Schema. However, DTD validation is coupled with parsing and defined in javax.xml.parsers package, while Schema validation is separated from parsing and defined in javax.xml.validation package.

To validate a document against a DTD, include the DTD declaration in the XML document and set the validation feature for the parser as follows:

In any case, in order to track validation errors, you have to override methods warning(), error() and fatalError() of the org.xml.sax.DefaultHandler class (this is the same class you used to respond to events produced by a SAX parser). The warning() method is called when a warning is issued, the error() method is invoked when a recoverable error (for instance, a validity error) is issued, and the fatalError() method in used when a non-recoverable error (for instance, a well-formedness error) is issued.

By default, warning() and error() methods do nothing and fatalError() just throws a SAXParseException. This means that, by default, a validity error will not be reported unless it is a fatal (well-formedness) error. To catch validity errors, you have to override at least the error() method. In the overridden method, you can print information about the error and, maybe, exit the program.

When you have defined your error handler by extending the DefaultHandler class, you have to register the handler with the parser. You can use the setErrorHandler() method of the parser, or directly pass the handler to the parse() method of the parser in case you are using a SAXParser object.

As an example, the code for a validating SAX parser follows:

import java.io.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import javax.xml.parsers.*;

public class SAXValid {
  
  public static void main(String[] args) {
    
    if (args.length != 1) {
      System.err.println("Usage: java SAXValid document.xml");
      System.exit(1);
    }
    // name of the XML document to parse
    String filename = args[0];
    SAXParserFactory factory = null;
    // create a parser factory instance 
    try {
      factory = SAXParserFactory.newInstance();
    } catch (FactoryConfigurationError fce) {
      // The implementation is not available or cannot be instantiated
      System.err.println(fce.getMessage());
      System.exit(1);
    }
    // set validation against the DTD linked in the XML document
    factory.setValidating(true);
    SAXParser parser = null;
    // use the factory to create a parser instance
    try {
      parser = factory.newSAXParser();
    } catch (ParserConfigurationException pce) {
      // a parser cannot be created which satisfies the requested configuration
      System.err.println(pce.getMessage());
      System.exit(1);
    }
    catch (SAXException se) {
      // SAX general error
      System.err.println(se.getMessage());
      System.exit(1);
    }
    // create a default handler (error handler)
    ValidHandler handler = new ValidHandler();
    // register the handler, parse and validate the document
    try {
      parser.parse(new File(filename), handler);
    } catch (SAXException se) {
      // do nothing since an error handler has been installed
    } catch (IOException ioe) {
      // Some IO error occurred
      System.err.println(ioe.getMessage());
      System.exit(1);
    }
    
    System.out.println("The document is valid.");
  }

}

where the error handler ValidHandler subclasses DefaultHandler as follows:

import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

/* Extends DefaultHandler with error handler methods to catch 
warnings and validation errors */
public class ValidHandler extends DefaultHandler {
    
  public String relativeSystemID(String id) {
    
    if (id != null) {
      int index = id.lastIndexOf('/');
      if (index != -1) {
        id = id.substring(index + 1);
      }
    }
    return id;
  }
  
  public void printError(String type, SAXParseException spe) {
    
    System.err.print("[" + type + "] ");
    System.err.print(relativeSystemID(spe.getSystemId()));
    System.err.print(":" + spe.getLineNumber() + 
                     ":" + spe.getColumnNumber() + 
                     ": " + spe.getMessage());
    System.err.println();
    System.err.flush();
  }
  
  // A warning message. The program continues.
  public void warning(SAXParseException spe) throws SAXException {
    printError("warning", spe);
  }

  // A validation error. The program decides to exit.
  public void error(SAXParseException spe) throws SAXException {
    printError("error", spe);
    System.exit(1);
  }
  
  // A well-formedness error. The program must exit.
  public void fatalError(SAXParseException spe) throws SAXException {
    printError("fatal error", spe);
    System.exit(1);
  }
}

The code for a validating DOM parser follows:

import java.io.*;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.helpers.*;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

public class DOMValid {

  public static void main(String[] args) {
    
    if (args.length != 1) {
      System.err.println("Usage: java DOMValid document.xml");
      System.exit(1);
    }
    // name of the XML document to parse
    String filename = args[0];
    
    DocumentBuilderFactory factory = null;
    // create a parser factory instance 
    try {
      factory = DocumentBuilderFactory.newInstance();
    } catch (FactoryConfigurationError fce) {
      // The implementation is not available or cannot be instantiated
      System.err.println(fce.getMessage());
      System.exit(1);
    }
    // set validation against the DTD linked in the XML document
    factory.setValidating(true);
    DocumentBuilder parser = null;
    // use the factory to create a parser instance
    try {
      parser = factory.newDocumentBuilder();
    } catch (ParserConfigurationException pce) {
      // a parser cannot be created which satisfies the requested configuration
      System.err.println(pce.getMessage());
      System.exit(1);
    }
    // register an error handler with the parser
    parser.setErrorHandler(new ValidHandler());
    // parse the document
    Document document = null;
    try {
      document = parser.parse(new File(filename));
    } catch (SAXException se) {
      // do nothing since an error handler has been installed
    } catch (IOException ioe) {
      // Some IO error occurred
      System.err.println(ioe.getMessage());
      System.exit(1);
    }
    
    System.out.println("The document is valid.");

  }
  
}

where ValidHandler is as above.

Validation against W3C XML Schema is separated from parsing. To validate a document follow these steps:

  1. create a schema factory capable of understanding W3C XML Schema:
    SchemaFactory factory =
        SchemaFactory.newInstance(javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI);
    
  2. load a specific schema:
    schema = factory.newSchema(new File(schemaDocument))
    
    This method parses the schema document. You may also skip this step if the instance document links to the schema document. The schema document will be parsed during validation. In this case load an empty schema:
    schema = factory.newSchema()
    
  3. create a validator instance:
    Validator validator = schema.newValidator();
    
  4. possibly, set a custom error handler:
    validator.setErrorHandler(errorHandler);
    
  5. finally, perform validation of a validation source:
    validator.validate(validationSource);
    

The validation source is essentially an XML instance and a method to read it. There are 3 possibilities: a stream source, that reads the XML instance as a stream of characters or bytes, a SAX source, that reads the XML instance as a stream events, and a DOM source, that reads the XML instance as a tree of nodes.

The following code illustrates the validation process:

import java.io.*;
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import org.w3c.dom.*;
import javax.xml.transform.stream.*;
import javax.xml.transform.sax.*;
import javax.xml.transform.dom.*;
import javax.xml.validation.*;

/**
Validates XML against W3C XML Schema. Reads the instance document from
different validation sources.
*/   
public class XMLValid {
  
  static final String usage = 
      "Syntax: java XMLValid -s (stream|sax|dom) document.xml [schema.xsd]";

  public static void main(String[] args) {
    
    if ((args.length < 3) || (args.length > 4)) {
      System.out.println(usage);
      System.exit(1);
    }
    
    // validation source
    String source = args[1];
    if (!(source.equals("stream") || source.equals("sax") || source.equals("dom"))) {
      System.out.println(usage);
      System.exit(1);
    }    
      
    try {
      // create a schema factory capable of understanding W3C XML Schema
      SchemaFactory factory =
          SchemaFactory.newInstance(javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI);

      // load a schema
      Schema schema;
      if (args.length == 4) {
        schema = factory.newSchema(new File(args[3]));
      }
      else {
        schema = factory.newSchema();
      }
          
      // create a validator instance
      Validator validator = schema.newValidator();
      
      // set a custom error handler
      validator.setErrorHandler(new ValidHandler());

      if (source.equals("stream")) {
        // perform validation of a stream source
        validator.validate(new StreamSource(args[2]));
      }
      
      if (source.equals("sax")) {
        // perform validation of a SAX source
        validator.validate(new SAXSource(new InputSource(args[2])));
      }

      if (source.equals("dom")) {
        // perform validation of a DOM source
        DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
        /* This is because XSLT (javax.xml.transform package) 
        requires namespace support */ 
        docFactory.setNamespaceAware(true);
        DocumentBuilder parser = docFactory.newDocumentBuilder();
        Document document = parser.parse(new File(args[2]));
        validator.validate(new DOMSource(document, args[2]));
      }      
      
    } catch(Exception e) {
      System.err.println(e.getMessage());
      System.exit(1);
    } 
    
    System.out.println("The document is valid (" + source + " input source)");

  }
}

where ValidHandler is as above. Notice that, if no error handler is set, then both errors and fatal errors throw an exception and thus are reported by the validator. This is different from DTD validation where errors, and in particular, validation errors, are not reported.

Next page Previous page Start of chapter End of chapter
Caffè XML - Massimo Franceschet