Windows Systems Programming: Spring 2004

[ Home | Syllabus | Course Notes | Assignments | Search]


XML(chap 13): emerging lingua franca of the Web?

XML documents must start with an XML declaration - for example

<?xml version="1.0"?>
<?xml version="1.0" encoding="ISO-8859-1"?>
<?xml version="1.0" encoding="UTF-16"?>
<?xml version="1.0"?>
<Guitars>
   ...
</Guitars>

XML Elements

<?xml version="1.0"?>
<Guitars> 
  <Guitar> 
    <Make>Gibson</Make>
    <Model>SG</Model>
    <Year>1977</Year>
    <Color>Tobacco Sunburst</Color>
    <Neck>Rosewood</Neck>
  </Guitar>
  <Guitar>
    <Make>Fender</Make>
    <Model>Stratocaster</Model>
    <Year></Year>
    <Color>Black</Color>
    <Neck>Maple</Neck>
  </Guitar>
</Guitars>


Attributes

<Guitar Year="1977">
  <Make>Gibson</Make>
  <Model>SG</Model>
  <Color>Tobacco Sunburst</Color>
  <Neck>Rosewood</Neck>
</Guitar>
<Guitar Image="MySG.jpeg">
  <Make>Gibson</Make>
  <Model>SG</Model>
  <Year>1977</Year>
  <Color>Tobacco Sunburst</Color>
  <Neck>Rosewood</Neck>
</Guitar>

CDATA, PCDATA, and Entity References

Symbol

Corresponding Entity

<

lt

>

gt

&

amp

apos

quot


NameSpaces

Namespaces avoid name conflicts in a global distributed information sharing world

For example

<?xml version="1.0"?>
<win:Guitars
  xmlns:win="http://www.wintellect.com/classic-guitars"
  xmlns:gibson="http://www.gibson.com/finishes" 
  xmlns:fender="http://www.fender.com/finishes">
  <win:Guitar>
    <win:Make>Gibson</win:Make>
    <win:Model>SG</win:Model>
    <win:Year>1977</win:Year>
    <gibson:Color>Tobacco Sunburst</gibson:Color>
    <win:Neck>Rosewood</win:Neck>
  </win:Guitar>
  <win:Guitar>
    <win:Make>Fender</win:Make>
    <win:Model>Stratocaster</win:Model>
    <win:Year>1990</win:Year>
    <fender:Color>Black</fender:Color>
    <win:Neck>Maple</win:Neck>
  </win:Guitar>
</win:Guitars>
<?xml version="1.0"?>
<Guitars
  xmlns="http://www.wintellect.com/classic-guitars"
  xmlns:gibson="http://www.gibson.com/finishes"
  xmlns:fender="http://www.fender.com/finishes">
  <Guitar>
    <Make>Gibson</Make>
    <Model>SG</Model>
    <Year>1977</Year>
    <gibson:Color>Tobacco Sunburst</gibson:Color>
    <Neck>Rosewood</Neck>
  </Guitar>
  <Guitar>
    <Make>Fender</Make>
    <Model>Stratocaster</Model>
    <Year></Year>
    <fender:Color>Black</fender:Color>
    <Neck>Maple</Neck>
  </Guitar>
<Guitars>

Document Validity and Schemas

<?xml version="1.0"?> 
<xsd:schema id="Guitars" xmlns=""
  xmlns:xsd="http://www.w3.org/2001/XMLSchema">
  <xsd:element name="Guitars"> 
    <xsd:complexType>
      <xsd:choice maxOccurs="unbounded">
        <xsd:element name="Guitar"> 
          <xsd:complexType>
            <xsd:sequence>
              <xsd:element name="Make" type="xsd:string" /> 
              <xsd:element name="Model" type="xsd:string" /> 
              <xsd:element name="Year" type="xsd:gYear"
                minOccurs="0"  />
              <xsd:element name="Color" type="xsd:string"
                minOccurs="0" />
              <xsd:element name="Neck" type="xsd:string"
                minOccurs="0" />
            </xsd:sequence>
          </xsd:complexType>
        </xsd:element>
      </xsd:choice>
    </xsd:complexType>
  </xsd:element>
</xsd:schema>
The following is well formed but invalid
<?xml version="1.0"?>
<Guitars>
  <Guitar>
    <Model>SG</Model>
    <Year>1977</Year>
    <Color>Tobacco Sunburst</Color>
    <Color>Gun-Metal Gray</Color>
    <Neck>Rosewood</Neck>
  </Guitar>
</Guitars>

XML Parsers

There are two common parsing models


Simple Parse Example

Given

<?xml version="1.0"?>
<Guitars>
  <Guitar Image="MySG.jpeg">
    <Make>Gibson</Make>
    <Model>SG</Model>
    <Year>1977</Year>
    <Color>Tobacco Sunburst</Color>
    <Neck>Rosewood</Neck>
  </Guitar>
</Guitars>

A DOM parser would produce the following tree structure

Simple Parse Program

using System;
using System.Xml;

class MyApp
{
    static void Main ()
    {
        XmlDocument doc = new XmlDocument ();
        doc.Load ("Guitars.xml");
        XmlNodeList nodes = doc.GetElementsByTagName ("Guitar");
        foreach (XmlNode node in nodes) {
            Console.WriteLine ("{0} {1}", node["Make"].InnerText,
                node["Model"].InnerText);
        }
    }
}

When run against this file

<?xml version="1.0"?>
<Guitars>
  <Guitar Image="MySG.jpeg">
    <Make>Gibson</Make>
    <Model>SG</Model>
    <Year>1977</Year>
    <Color>Tobacco Sunburst</Color>
    <Neck>Rosewood</Neck>
  </Guitar>
  <Guitar Image="MyStrat.jpeg" PreviousOwner="Eric Clapton">
    <Make>Fender</Make>
    <Model>Stratocaster</Model>
    <Year>1990</Year>
    <Color>Black</Color>
    <Neck>Maple</Neck>
  </Guitar>
</Guitars>
Will produce the following output
Gibson SG
Fender Stratocaster

Reading and Writing XML

Again there are two models for writing XML

  • One based on the DOM tree model (build a DOM tree then serialize it) (shown above)

  • Use a stream oriented creation model


The XmlDocument Class

  • complies with the DOM Level 2 Core specification

  • Each node is an instance of XmlNode

  • Example of displaying all nodes in the parse tree

XmlDocument doc = new XmlDocument ();
doc.Load ("Guitars.xml");
OutputNode (doc.DocumentElement);
  .
  .
  .
void OutputNode (XmlNode node)
{
    Console.WriteLine ("Type={0}\tName={1}\tValue={2}",
        node.NodeType, node.Name, node.Value);

    if (node.HasChildNodes ) {
        XmlNodeList children = node.ChildNodes ;
        foreach (XmlNode child in children)
            OutputNode (child); // recursive tree walk algorithm
    }
}
Would produce the following output on the above XML file
Type=Element    Name=Guitars    Value= 
Type=Element    Name=Guitar     Value=
Type=Element    Name=Make       Value=
Type=Text       Name=#text      Value=Gibson 
Type=Element    Name=Model      Value=
Type=Text       Name=#text      Value=SG
Type=Element    Name=Year       Value=
Type=Text       Name=#text      Value=1977
Type=Element    Name=Color      Value=
Type=Text       Name=#text      Value=Tobacco Sunburst
Type=Element    Name=Neck       Value=
Type=Text       Name=#text      Value=Rosewood
Type=Element    Name=Guitar     Value=
Type=Element    Name=Make       Value=
Type=Text       Name=#text      Value=Fender
Type=Element    Name=Model      Value=
Type=Text       Name=#text      Value=Stratocaster
Type=Element    Name=Year       Value=
Type=Text       Name=#text      Value=1990
Type=Element    Name=Color      Value=
Type=Text       Name=#text      Value=Black
Type=Element    Name=Neck       Value=
Type=Text       Name=#text      Value=Maple

Here is a list of allowable "Type"s

XmlNodeType

Example

Attribute

<Guitar Image="MySG.jpeg">

CDATA

<![CDATA["This is character data"]]>

Comment

<!-- This is a comment -->

Document

<Guitars>

DocumentType

<!DOCTYPE Guitars SYSTEM "Guitars.dtd">

Element

<Guitar>

Entity

<!ENTITY filename "Strats.xml">

EntityReference

&lt;

Notation

<!NOTATION GIF89a SYSTEM "gif">

ProcessingInstruction

<?xml-stylesheet type="text/xsl" href="Guitars.xsl"?>

Text

<Model>Stratocaster</Model>

Whitespace

<Make/>\r\n<Model/>

XmlDeclaration

<?xml version="1.0"?>

To get attributes printed as well use

void OutputNode (XmlNode node)
{
    Console.WriteLine ("Type={0}\tName={1}\tValue={2}",
        node.NodeType, node.Name, node.Value);

    if (node.Attributes != null) {
        foreach (XmlAttribute attr in node.Attributes)
            Console.WriteLine ("Type={0}\tName={1}\tValue={2}",
                attr.NodeType, attr.Name, attr.Value);
    }

    if (node.HasChildNodes) {
        foreach (XmlNode child in node.ChildNodes)
            OutputNode (child);
    }
}

Writing XML documents

XmlDocument doc = new XmlDocument ();
doc.Load ("Guitars.xml");

// Delete the first Guitar element
XmlNode root = doc.DocumentElement;
root.RemoveChild (root.FirstChild);

// Create element nodes
XmlNode guitar = doc.CreateElement ("Guitar");        
XmlNode elem1 = doc.CreateElement ("Make");
XmlNode elem2 = doc.CreateElement ("Model");
XmlNode elem3 = doc.CreateElement ("Year");
XmlNode elem4 = doc.CreateElement ("Color");
XmlNode elem5 = doc.CreateElement ("Neck");

// Create text nodes
XmlNode text1 = doc.CreateTextNode ("Gibson");
XmlNode text2 = doc.CreateTextNode ("Les Paul");
XmlNode text3 = doc.CreateTextNode ("1959");
XmlNode text4 = doc.CreateTextNode ("Gold");
XmlNode text5 = doc.CreateTextNode ("Rosewood");

// Attach the text nodes to the element nodes
elem1.AppendChild (text1);
elem2.AppendChild (text2);
elem3.AppendChild (text3);
elem4.AppendChild (text4);
elem5.AppendChild (text5);

// Attach the element nodes to the Guitar node
guitar.AppendChild (elem1);
guitar.AppendChild (elem2);
guitar.AppendChild (elem3);
guitar.AppendChild (elem4);
guitar.AppendChild (elem5);

// Attach the Guitar node to the document node
rooEŰkkpendChild (guitar);

// Save the modified document
doc.Save ("Guitars.xml");

SAX based stream XMLreader

Fast sequential parser

XmlTextReader reader = null;

try {
    reader = new XmlTextReader ("Guitars.xml");
    reader.WhitespaceHandling = WhitespaceHandling.None;
    while (reader.Read ()) {
        Console.WriteLine ("Type={0}\tName={1}\tValue={2}",
            reader.NodeType, reader.Name, reader.Value);
    }
}
finally {
    if (reader != null)
        reader.Close ();
}
Reads EndElements unlike DOM model

The XmlValidatingReader Class

XmlTextReader nvr = new XmlTextReader ("Guitars.xml"); XmlValidatingReader reader = new XmlValidatingReader (nvr); reader.Schemas.Add ("", "Guitars.xsd");

//// set a handler for exceptions

reader.ValidationEventHandler +=
    new ValidationEventHandler (OnValidationError);
//// example
using System;
using System.Xml;
using System.Xml.Schema;

class MyApp
{
    static void Main (string[] args)
    {
        if (args.Length < 2) {
            Console.WriteLine ("Syntax: VALIDATE xmldoc schemadoc");
            return;
        }

        XmlValidatingReader reader = null;

        try {
            XmlTextReader nvr = new XmlTextReader (args[0]);
            nvr.WhitespaceHandling = WhitespaceHandling.None;

            reader = new XmlValidatingReader (nvr);
            reader.Schemas.Add (GetTargetNamespace (args[1]), args[1]);
            reader.ValidationEventHandler +=
                new ValidationEventHandler (OnValidationError);

            while (reader.Read ());
        }
        catch (Exception ex) {
            Console.WriteLine (ex.Message);
        }
        finally {
            if (reader != null)
                reader.Close ();
        }
    }

    static void OnValidationError (object sender, ValidationEventArgs e)
    {
        Console.WriteLine (e.Message);
    }

}

The XmlTextWriter Class

XmlTextWriter writer = null;

try {
    writer = new XmlTextWriter ("Guitars.xml", System.Text.Encoding.Unicode);
    writer.Formatting = Formatting.Indented;

    writer.WriteStartDocument ();
    writer.WriteStartElement ("Guitars");
    writer.WriteStartElement ("Guitar");
    writer.WriteAttributeString ("Image", "MySG.jpeg");
    writer.WriteElementString ("Make", "Gibson");
    writer.WriteElementString ("Model", "SG");
    writer.WriteElementString ("Year", "1977");
    writer.WriteElementString ("Color", "Tobacco Sunburst");
    writer.WriteElementString ("Neck", "Rosewood");
    writer.WriteEndElement ();
    writer.WriteEndElement ();
}
finally {
    if (writer != null)
        writer.Close ();
}
Here’s what the generated document looks like:

<?xml version="1.0" encoding="utf-16"?>
<Guitars>
  <Guitar Image="MySG.jpeg">
    <Make>Gibson</Make>
    <Model>SG</Model>
    <Year>1977</Year>
    <Color>Tobacco Sunburst</Color>
    <Neck>Rosewood</Neck>
  </Guitar>
</Guitars>

XPath

  • XPath is a way to query and search XML documents (like SQL)

  • Described here http://www.w3.org/TR/xpath.

  • Location paths - give location using path in the parse tree, e.g.
    /Guitars/Guitar
    Path describing all "Guitar" elements in the document
    /Guitars/Guitar/@Image
    Path describing all Attributes named "Image" inside a "Guitar Element"

  • Above are absolute paths - can also have anywhere paths
    //Guitar
    All "Guitar" elements regardless of position in the tree

  • This names all the children
    /Guitars/*

  • All attributes of all "Guitar" elements anywhere in the document
    //Guitar/@*

  • Can also have Relative Paths

    Can add filtering expressions
    //Guitar[@Image]
    //Guitar[@Image = "MyStrat.jpeg"]
    //Guitar[Year > 1980]
    //Guitar[Year > 1980][Make = "Fender"]
    //Guitar[Year > 1980 and Make = "Fender"]
    //Guitar[Year > 1980 or Make = "Fender"]
    //Guitar[starts-with (Make, "G")]

  • A XPath expression returns a Node set

 

XPathNavigator and Friends

Several interesting classes:

  • XPathDocument, which represents XML documents that you want to query with XPath

  • XPathNavigator, which provides a mechanism for performing XPath queries

  • XPathNodeIterator, which represents node sets generated by XPath queries and lets you iterate over them.

XPathDocument doc = new XPathDocument ("Guitars.xml");
XPathNavigator nav = doc.CreateNavigator ();
XPathNodeIterator iterator = nav.Select ("//Guitar");
Console.WriteLine ("Select returned {0} nodes", iterator.Count);
while (iterator.MoveNext ()) {
    Console.WriteLine ("Type={0}, Name={1}, Value={2}",
        iterator.Current.NodeType,
        iterator.Current.Name,
        iterator.Current.Value);
}
//// or more sophisticated (since otherwise gives entire subtree)
while (iterator.MoveNext ())
    OutputNode (iterator.Current);
      .
      .
      .
void OutputNode (XPathNavigator nav)
{
    Console.WriteLine ("Type={0}, Name={1}, Value={2}",
        nav.NodeType, nav.Name, nav.Value);

    if (nav.HasAttributes) {
        nav.MoveToFirstAttribute ();
        do {
            OutputNode (nav);
        } while (nav.MoveToNextAttribute ());
        nav.MoveToParent ();
    }

    if (nav.HasChildren) {
        nav.MoveToFirstChild ();
        do {
            OutputNode (nav);
        } while (nav.MoveToNext ());
        nav.MoveToParent ();
    }
}


A Do-It-Yourself XPath Expression Evaluator

using System;
using System.Drawing;
using System.Windows.Forms;
using System.Xml.XPath;

class AnalyzerForm : Form
{
    GroupBox DocumentGB;
    TextBox Source;
    Button LoadButton;
    GroupBox ExpressionGB;
    TextBox Expression;
    Button ExecuteButton;
    ImageList NodeImages;
    TreeView XmlView;

    XPathNavigator Navigator;

    AnalyzerForm ()
    {
        // Initialize the form's properties
        ...
        XmlView = new TreeView ();
...
        NodeImages = new ImageList ();
        NodeImages.ImageSize = new Size (12, 12);
        NodeImages.Images.AddStrip (new Bitmap (GetType (), "Buttons"));
        NodeImages.TransparentColor = Color.White;

        XmlView.Anchor = AnchorStyles.Top | AnchorStyles.Bottom |
            AnchorStyles.Left | AnchorStyles.Right;
        XmlView.Location = new System.Drawing.Point (16, 176);
        XmlView.Size = new System.Drawing.Size (456, 232);
        XmlView.ImageList = NodeImages;
        XmlView.TabIndex = 4;
        XmlView.Name = "XmlView";

       ...
    }

    void OnLoadDocument (object sender, EventArgs e)
    {
        try {
            XPathDocument doc = new XPathDocument (Source.Text);
            Navigator = doc.CreateNavigator ();
            ExecuteButton.Enabled = true;
        }
        catch (Exception ex)  {
            MessageBox.Show (ex.Message);
        }
    }

    void OnExecuteExpression (object sender, EventArgs e)
    {
        try {
            XPathNodeIterator iterator =
                Navigator.Select (Expression.Text);
            XmlView.Nodes.Clear ();
            while (iterator.MoveNext ())
                AddNodeAndChildren (iterator.Current, null);
        }
        catch (Exception ex) {
            MessageBox.Show (ex.Message);
        }
    }

    void AddNodeAndChildren (XPathNavigator nav, TreeNode tnode)
    {
        TreeNode child = AddNode (nav, tnode);

        if (nav.HasAttributes) {
            nav.MoveToFirstAttribute ();
            do {
                AddAttribute (nav, child);
            } while (nav.MoveToNextAttribute ());
            nav.MoveToParent ();
        }

        if (nav.HasChildren) {
            nav.MoveToFirstChild ();
            do {
                AddNodeAndChildren (nav, child);
            } while (nav.MoveToNext ());
            nav.MoveToParent ();
        }
    }

    TreeNode AddNode (XPathNavigator nav, TreeNode tnode)
    {
        string text = null;
        TreeNode child = null;

        TreeNodeCollection tnodes = (tnode == null) ?
            XmlView.Nodes : tnode.Nodes;

        switch (nav.NodeType) {

        case XPathNodeType.Root:
        case XPathNodeType.Element:
            tnodes.Add (child = new TreeNode (nav.Name, 0, 0));
            break;

        case XPathNodeType.Attribute:
            text = String.Format ("{0}={1}", nav.Name, nav.Value);
            tnodes.Add (child = new TreeNode (text, 1, 1));
            break;

        case XPathNodeType.Text:
            text = nav.Value;
            if (text.Length > 128)
                text = text.Substring (0, 128) + "...";
            tnodes.Add (child = new TreeNode (text, 2, 2));
            break;

        case XPathNodeType.Comment:
            text = String.Format ("<!--{0}-->", nav.Value);
            tnodes.Add (child = new TreeNode (text, 4, 4));
            break;

        case XPathNodeType.ProcessingInstruction:
            text = String.Format ("<?{0} {1}?>", nav.Name, nav.Value);
            tnodes.Add (child = new TreeNode (text, 5, 5));
            break;
        }
        return child;
    }

    void AddAttribute (XPathNavigator nav, TreeNode tnode)
    {
        string text = String.Format ("{0}={1}", nav.Name, nav.Value);
        tnode.Nodes.Add (new TreeNode (text, 1, 1));
    }

    static void Main () 
    {
        Application.Run (new AnalyzerForm ());
    }
}

 

 

 

 

using

 


Copyright chris wild 1999-2004.
For problems or questions regarding this web contact [Dr. Wild].
Last updated: December 22, 2003.