/* shakespearedoc.cpp
 An example class for Jon Bosak's Shakespeare play documents, specifically the
  subset we're interested in for our example application.
  Demonstrates the use of the expatParser class and docObjectIface interface.
  http://projects.zillabit.com/xml.html

Copyright (c) 2002, Earl Levine 
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted 
provided that the following conditions are met:

-Redistributions of source code must retain the above copyright notice, this list of conditions 
and the following disclaimer.

-The name of Earl Levine may not be used to endorse or promote products derived from this 
software without specific prior written permission. 

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS 
OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 
AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR 
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
POSSIBILITY OF SUCH DAMAGE. 
*/

#include "shakespearedoc.h"
#include <string.h>

/*
Because we can ignore some features of the documents (e.g. we don't care about the 
Dramatis Personae information or the Scene Description) we have a pretty simple 
view of the documents: 

<PLAY> 
  [we expect only one PLAY per document]
<TITLE>The Tragedy of Hamlet, Prince of Denmark</TITLE>
  [we only expect one TITLE tag per PLAY]

<ACT>
  [we expect multiple ACTs per PLAY]
<TITLE>ACT I</TITLE>
  [we expect only one TITLE tag per ACT]

<SCENE>
  [we expect multiple SCENEs per ACT]
<TITLE>SCENE I.  Elsinore. A platform before the castle.</TITLE>
  [we expect only one TITLE tag per SCENE]

<SPEECH> 
  [we expect multiple SPEECHes per SCENE]
<SPEAKER>BERNARDO</SPEAKER> 
  [we expect only one SPEAKER per SPEECH]
<LINE>Well, good night.</LINE> 
  [we expect multiple LINEs per SPEECH]
<LINE>If you do meet Horatio and Marcellus,</LINE>
<LINE>The rivals of my watch, bid them make haste.</LINE>
</SPEECH>

[more SPEECHes here]
</SCENE>

[more SCENEs here]
</ACT>

[more ACTs here]
</PLAY>

 
At any point, if we encounter any unrecognized tags we'll just ignore them. 
If we expected exactly one instance of a tag, but we encounter zero or more 
than one instance, we'll print a warning message but will deal with the 
situation gracefully.  (In fact, in the plays there are some SPEECHes 
with multiple SPEAKERs, but for our application noting only one of them 
is fine.)
 */


ShakespeareDoc::ShakespeareDoc(FILE *warnlog) {
  pRootElem = new ShakespeareDocElementRoot(warnlog);
  pParsingElem = pRootElem;
}

ShakespeareDoc::~ShakespeareDoc() {
  delete pRootElem;
}

const ShakespeareDocElementPlay* ShakespeareDoc::getPlay() const {
  if(pRootElem) {
    return pRootElem->getPlay();
  } else {
    return NULL;
  }
}

void ShakespeareDoc::StartElementHandler(const char *name, const char **atts) {
  pParsingElem = pParsingElem->ParseStart(name);
}

void ShakespeareDoc::EndElementHandler(const char *name) {
  pParsingElem = pParsingElem->ParseEnd();
}

void ShakespeareDoc::CharacterDataHandler(const XML_Char *s, int len) {
  pParsingElem->ParseCharData((const char *)s, len);
}


ShakespeareDocElement::ShakespeareDocElement(ShakespeareDocElement *_parentElement,
					     FILE *_warnlog) {
  parentElement = _parentElement;
  parseState_ignoringSubelement = false;
  uniqueSubelement = NULL;
  listSubelements = NULL;
  characterData = NULL;
  characterDataLen = 0;
  warnlog = _warnlog;
}

ShakespeareDocElement::~ShakespeareDocElement() {
  int i;
  if(listSubelements) {
    for(i=0; i<numListSubelements; i++) {
      delete listSubelements[i];
    }
  }
  delete uniqueSubelement;
  delete characterData;
}

ShakespeareDocElement* ShakespeareDocElement::ParseStart(const char *name) {
  if(parseState_ignoringSubelement) {
    // If we are in a subelement of an element we're ignoring, 
    //  simply note that we're decending further into the depths
    parseState_ignoringSubelementDepth++;
    return this;
  } else {
    if(matchesListSubelemName(name)) {
      // This is another (or the first) of the "list" type subelements
      ShakespeareDocElement** old = listSubelements;
      if(listSubelements == NULL) {
	numListSubelements = 0;
      }
      // Grow the list by one
      listSubelements = new ShakespeareDocElement*[1 + numListSubelements];
      memcpy(listSubelements, old, numListSubelements*sizeof(ShakespeareDocElement*));
      // Construct the appropriate type of docElement subclass
      listSubelements[numListSubelements] = constructListSubelem();
      numListSubelements++;
      // continue parsing recursively
      return listSubelements[numListSubelements - 1];
    } else if(matchesUniqueSubelemName(name)) {
      // This is the start of the "unique" subelement, 
      //  if that exists for this element type
      if(uniqueSubelement) {
	// Oops, this element is supposed to be unique but we've already found one.
	// Discard the old one and print a warning.
	warnNonUnique();
	delete uniqueSubelement;
	uniqueSubelement = NULL;
      }
      // Construct the appropriate type of docElement subclass and 
      //  continue parsing recursively
      uniqueSubelement = constructUniqueSubelem();
      return uniqueSubelement;
    } else {
      // This is an unknown subelement type, so ignore it (and its subelements)
      parseState_ignoringSubelement = true;
      parseState_ignoringSubelementDepth = 0;
      return this;
    }
  }
}

ShakespeareDocElement* ShakespeareDocElement::ParseEnd() {
  // Append a NULL character onto our character data now
  ParseCharData("\0", 1);
  if(parseState_ignoringSubelement) {
    // We just finished an element we're ignoring, or one of its subelements
    if(parseState_ignoringSubelementDepth == 0) {
      // All done with ignoring this subelement and it's children.
      // Back to business after this.
      parseState_ignoringSubelement = false;
    } else {
      // Just ascending from a subelement of the element we're ignoring
      parseState_ignoringSubelementDepth--;
    }
    return this;
  } else {
    // Now that we're done parsing this element, check to see whether we ever 
    //  parsed an instance of our "unique" subelement (if applicable for this element type).
    //  If not, print a warning.
    if(!uniqueSubelement && (getUniqueSubelementName()!=NULL)) {
      warnUniqueNeverFound();
    }
    // All done with this subelement, pop up to my parent.
    return parentElement;
  }
}

void ShakespeareDocElement::ParseCharData(const char *s, int len) {
  if(!parseState_ignoringSubelement) {
    // Append these characters to my own characterData
    char *newCharData = new char[characterDataLen + len];
    memcpy(newCharData, characterData, characterDataLen);
    memcpy(newCharData + characterDataLen, s, len);
    delete characterData;
    characterData = newCharData;
    characterDataLen += len;
  }
}

const char* ShakespeareDocElement::getCharacterData() const {
  return characterData;
}

int ShakespeareDocElement::getCharacterDataLen() const {
  return characterDataLen;
}

const ShakespeareDocElement *ShakespeareDocElement::getUniqueSubelement() const {
  return uniqueSubelement;
}

int ShakespeareDocElement::getNumListSubelements() const {
  if(listSubelements == NULL) return 0;
  else return numListSubelements;
}

const ShakespeareDocElement *ShakespeareDocElement::getListSubelement (int num) const {
  if(listSubelements == NULL) return NULL;
  if (num < 0 || num >= numListSubelements) return NULL;
  return listSubelements[num];
}

const char* ShakespeareDocElement::getUniqueSubelementName() {
  const char* result;
  ShakespeareDocElement* elem = constructUniqueSubelem();
  if(!elem) return NULL;
  result = elem->getElementName();
  delete elem;
  return result;
}

ShakespeareDocElement* ShakespeareDocElement::constructUniqueSubelem() {
  // By default, subclasses will have no
  //  "unique" subelement type, returning NULL here
  return NULL;
}

const char* ShakespeareDocElement::getListSubelementName() {
  const char* result;
  ShakespeareDocElement* elem = constructListSubelem();
  if(!elem) return NULL;
  result = elem->getElementName();
  delete elem;
  return result;
}

ShakespeareDocElement* ShakespeareDocElement::constructListSubelem() {
  // By default, subclasses will have no
  //  "list" subelement type, returning NULL here
  return NULL;
}

bool ShakespeareDocElement::matchesUniqueSubelemName(const char* name) {
  if(name==NULL || getUniqueSubelementName()==NULL) {
    // if getUniqueSubelementName()==NULL, that indicates that there is no
    //  "unique" subelement type for this element type
    return false;
  } else {
    return (0 == strcmp(name, getUniqueSubelementName()));
  }
}

bool ShakespeareDocElement::matchesListSubelemName(const char* name) {
  if(name==NULL || getListSubelementName()==NULL) {
    // if getListSubelementName()==NULL, that indicates that there is no
    //  "list" subelement type for this element type
    return false;
  } else {
    return (0 == strcmp(name, getListSubelementName()));
  }
}

void ShakespeareDocElement::warnNonUnique() {
  if(warnlog) {
    fprintf(warnlog, 
	    "Parse warning: duplicate %s found in %s, ignoring previous instance.\n",
	    getUniqueSubelementName(), getElementName());
  }
  warnLocation();
}

void ShakespeareDocElement::warnUniqueNeverFound() {
  if(warnlog) {
    fprintf(warnlog, 
	    "Parse warning: no %s found in %s (was expecting one instance).\n",
	    getUniqueSubelementName(), getElementName());
  }
  warnLocation();
}

void ShakespeareDocElement::warnLocation() {
  if(warnlog) {
    fprintf(warnlog, "location: \n");
    ShakespeareDocElement *nextUp = parentElement;
    while(nextUp->parentElement) {
      const char* cdata;
      fprintf(warnlog, "%s = ", nextUp->getElementName());
      if(nextUp->getUniqueSubelement()
	 && (cdata = nextUp->getUniqueSubelement()->getCharacterData())) {
	fprintf(warnlog, "%s", cdata);
      } else {
	fprintf(warnlog, "?");
      }
      fprintf(warnlog, "\n");
      nextUp = nextUp->parentElement;
    }
  }
}


// Thanks to the ShakespeareDocElement base class it's trivial to implement all
//  the element types that define the document structure

// Title
const char* ShakespeareDocElementTitle::getElementName() const {
  return "TITLE";
}

// Line
const char* ShakespeareDocElementLine::getElementName() const {
  return "LINE";
}

// Speaker
const char* ShakespeareDocElementSpeaker::getElementName() const {
  return "SPEAKER";
}

// Speech
const char* ShakespeareDocElementSpeech::getElementName() const {
  return "SPEECH";
}
const ShakespeareDocElementSpeaker* ShakespeareDocElementSpeech::getSpeaker() const {
  return (ShakespeareDocElementSpeaker*) getUniqueSubelement();
}
ShakespeareDocElement* ShakespeareDocElementSpeech::constructUniqueSubelem() {
  return new ShakespeareDocElementSpeaker(this, warnlog);
}
int ShakespeareDocElementSpeech::getNumLines() const {
  return getNumListSubelements();
}
const ShakespeareDocElementLine* ShakespeareDocElementSpeech::getLine (int num) const {
  return (ShakespeareDocElementLine*) getListSubelement(num);
}
ShakespeareDocElement* ShakespeareDocElementSpeech::constructListSubelem() {
  return new ShakespeareDocElementLine(this, warnlog);
}

// Element With Title (useful base class)
const ShakespeareDocElementTitle* ShakespeareDocElementWithTitle::getTitle() const {
  return (ShakespeareDocElementTitle*) getUniqueSubelement();
}
ShakespeareDocElement* ShakespeareDocElementWithTitle::constructUniqueSubelem() {
  return new ShakespeareDocElementTitle(this, warnlog);
}

// Scene
const char* ShakespeareDocElementScene::getElementName() const {
  return "SCENE";
}
ShakespeareDocElement* ShakespeareDocElementScene::constructListSubelem() {
  return new ShakespeareDocElementSpeech(this, warnlog);
}
int ShakespeareDocElementScene::getNumSpeeches() const {
  return getNumListSubelements();
}
const ShakespeareDocElementSpeech* ShakespeareDocElementScene::getSpeech (int num) const {
  return (ShakespeareDocElementSpeech*) getListSubelement(num);
}

// Act
const char* ShakespeareDocElementAct::getElementName() const {
  return "ACT";
}
ShakespeareDocElement* ShakespeareDocElementAct::constructListSubelem() {
  return new ShakespeareDocElementScene(this, warnlog);
}
int ShakespeareDocElementAct::getNumScenes() const {
  return getNumListSubelements();
}
const ShakespeareDocElementScene* ShakespeareDocElementAct::getScene (int num) const {
  return (ShakespeareDocElementScene*) getListSubelement(num);
}

// Play
const char* ShakespeareDocElementPlay::getElementName() const {
  return "PLAY";
}
ShakespeareDocElement* ShakespeareDocElementPlay::constructListSubelem() {
  return new ShakespeareDocElementAct(this, warnlog);
}
int ShakespeareDocElementPlay::getNumActs() const {
  return getNumListSubelements();
}
const ShakespeareDocElementAct* ShakespeareDocElementPlay::getAct (int num) const {
  return (ShakespeareDocElementAct*) getListSubelement(num);
}

// Document root
const char* ShakespeareDocElementRoot::getElementName() const {
  return "(document root)";
}
ShakespeareDocElement* ShakespeareDocElementRoot::constructUniqueSubelem() {
  return new ShakespeareDocElementPlay(this, warnlog);
}
const ShakespeareDocElementPlay* ShakespeareDocElementRoot::getPlay() const {
  return (ShakespeareDocElementPlay*) getUniqueSubelement();
}
