2019 / Andreas Koenzen


<< Back

Algorithm for converting SAX to DOM


This pseudocode represents the necessary steps to use a SAX parser to create a DOM document. It's a very simple approach to convert between SAX and DOM, it doesn's support namespaces nor complex XML processing, but it should give you a starting point from where to move on forward.

Node  _nextSibling
Node  _lastSibling
Node  _root
Stack _stack

  _stack --> push(_root = new Node)

while (!EOF)
  if (startElement)
    _node --> create(namespace, qName) // Create the node.
    for (numberOfAttributes)
      _node --> setAttribute(key, value)
    Node _last = _stack --> peek() // Get the last node, but without taking it from the stack.
    _last --> appendChild(_node)
    _stack --> push(_node)
    _lastSibling = NIL
  if (characters)
    _last = _stack --> peek()
    if (_last != _root) // No Text node can be a child of root.
      _lastSibling = _last --> appendChild(text)
  if (endElement)
    _stack --> pop()
    _lastSibling = NIL
  _stack --> pop()

Objective-C implementation:

This code is part of my Objective-C static library ESXP for Objective-C. Which can be found here. There you can find all supporting classes to make this code fully workable. If you decide to use this code in your project, I will appreciate it if you could send me some comments about what you think of it... Also this code uses the BSD 2-Clause License.

#import <Foundation/Foundation.h>
#import "ESXPDocument.h"
#import "ESXPElement.h"
#import "ESXPNode.h"
#import "ESXPText.h"
#import "ObjectiveCToolbox.h"

@interface ESXPSAX2DOM : NSObject <NSXMLParserDelegate>
@property (nonatomic, strong) id<ESXPNode> nextSibling;
@property (nonatomic, strong) id<ESXPNode> lastSibling;
@property (nonatomic, strong) ESXPDocument *document;
@property (nonatomic, strong) AKStack      *stack;

/// Returns the XML file as a DOM representation.
/// \return The DOM object.
- (ESXPDocument *)getDOM;

static BOOL const kDEBUG_MODE = YES;

@implementation ESXPSAX2DOM
- (ESXPSAX2DOM *)init {
    self = [super init];
    if (self) {
        self.document = [ESXPDocument newBuild:@"_root"];    // Create a new DOM document.
        self.stack    = [[AKStack alloc] initWithSize:1000]; // Initialize the stack with just 1000 nodes.
    return self;

- (void)parserDidStartDocument:(NSXMLParser *)parser {
    [self.stack push:[self.document getRootNode]]; // Push the document's root node into the stack.

- (void)parser:(NSXMLParser *)parser didStartElement:(NSString *)elementName 
                                        namespaceURI:(NSString *)namespaceURI 
                                       qualifiedName:(NSString *)qName 
                                          attributes:(NSDictionary *)attributeDict {
    if (kDEBUG_MODE)
        NSLog(@"didStartElement --> %@", elementName);
    ESXPElement *tmp = [ESXPElement newBuild:elementName];
    // Add the attributes to the node.
    NSEnumerator *enumerator = [attributeDict keyEnumerator];
    id key;
    while ((key = [enumerator nextObject]))
        [tmp setAttribute:key value:[attributeDict objectForKey:key]];
    // Append the new node into the stack.
    ESXPElement *last = (ESXPElement *)[self.stack peek];
    [last appendChild:tmp];
    [self.stack push:tmp];
    self.lastSibling = nil;

- (void)parser:(NSXMLParser *)parser foundCharacters:(NSString *)string {
    if (kDEBUG_MODE)
        NSLog(@"foundCharacters --> %@", string);
    ESXPElement *last = (ESXPElement *)[self.stack peek];
    ESXPText    *text = [ESXPText newBuild:nil];
    [text setNodeValue:string];
    self.lastSibling = (ESXPText *) [last appendChild:text];

- (void)parser:(NSXMLParser *)parser didEndElement:(NSString *)elementName 
                                      namespaceURI:(NSString *)namespaceURI 
                                     qualifiedName:(NSString *)qName {
    if (kDEBUG_MODE)
        NSLog(@"didEndElement   --> %@", elementName);
    [self.stack popLastObject];
    self.lastSibling = nil;

- (void)parserDidEndDocument:(NSXMLParser *)parser { [self.stack popLastObject]; }

- (ESXPDocument *)getDOM { return self.document; }