/*

APPENDIX B Store.cpp

Index of HTML Docs

APPENDIX D QueryModifications >>

 

*/

 

//  AN ANALYSIS AND EVALUATION OF A NATIVE XML DATABASE

//        BY KEN WENKER, PH.D.

//

//       APPENDIX C:  EXTENSIBILITY DOCUMENT GENERATION TOOL

//

//       This is an HTML version for viewing only.

//       It compiles and runs in its original form on MS Visual Studio 6.0

//

// This program creates XML documents with widely varying tagset structures, so

// that you can test a database's ability to deal automatically, i.e., without

// manual intervention, with the extensibility of XML.

 

#include <stdio.h>

#include <stdlib.h>

#include <iostream.h>

#include <time.h>

#include <fstream.h>

#include <string.h>

 

int node_number = 2;          // root will be node 1; next node after root will be 2

 

int get_initial_filenumber(int num_docs);

void make_new_node(int max_l, int level, int parent, fstream of);

void make_node_name(char* name);

void make_optional_attribute_set(char* optional_attribute_set);

void make_embedded_node(fstream of);

 

void main(int argc, char *argv[]){

 

      int num_docs = 10;

      int min_level = 3;

      int max_level = 10;

      char oput_file_stem[15] = "neo";

 

      fstream of;

      char filename[15];

      char temp_filename[] = "extensibility_temp";

 

     

          /********************************************************************

            ***                                                               ***

            ***               PROCESS COMMAND-LINE ARGUMENTS                  ***

            ***                                                               ***

            ********************************************************************/        

      int j = 0;

      for ( int i = 1; i < argc; i++ ) {

            j = i;

            i++;

            switch(argv[j][1]) {

            case 'n':

                  num_docs = atoi(argv[i]);

                  break;

            case 'M':

                  max_level = atoi(argv[i]);

                  break;

            case 'm':

                  min_level = atoi(argv[i]);

                  break;

            case 'o':

                  oput_file_stem[0] = '\0';

                  strcat(oput_file_stem, argv[i]);

                  break;

            default:

                  cout << endl;

                  cout << "Unrecognized option: " << argv[j] << ".  Legal options are: " << endl;

                  cout << "     -n num_docs--nmbr of XML docs to create; default: 10 " << endl;

                  cout << "     -M maximum max_level--default: 10" << endl;

                  cout << "     -m minimum max_level--default:  3" << endl;

                  cout << "     -o oput file stem--default: neo  ; to which is appended the" << endl;

                  cout << "            number from next_file_number.dat (incrementing for each doc created)" << endl;

                  cout << "            and then '.xml'" << endl;

                  exit (1);

                  break;

            } // switch

      }  // for i

 

      int file_number = get_initial_filenumber(num_docs);

 

      //initialize random number generator

      int seed;

      seed = time(NULL);

      srand(seed);

     

      // set up loop to create num_docs XML documents

      for (int file_counter = 1; file_counter <= num_docs; file_counter++ )

      {

           

            filename[0] = '\0';

            sprintf( filename, "%s%d.xml", oput_file_stem, file_number );

            file_number++;

 

            // max_level:  for ALL documents, the deepest level permitted

            // this_max_level: for THIS document, the deepest level permitted

            int levels_possible = max_level - min_level;

            int this_max_level = (int) (rand() % levels_possible ) + min_level;

            node_number = 2;  // root is node nmbr 1; the one after that is node #2

           

            of.open(temp_filename, ios::app);

 

            of << "<root nn=\"1\" np=\"0\" l=\"1\">" << endl;

                  // nn:  node number

                  // np:  node parent

                  // l:   level

 

            for ( int four_counter = 1; four_counter <= 4; four_counter++ )

                  // The root node will always have four child nodes; beneath

                  // that, everything will be random.

                  make_new_node(this_max_level, 2, 1, of);

                 

            of << "</root>" << endl;

            of.close();

            rename(temp_filename, filename);

 

      }   // end "for" loop: file_counter <= 10

}  // end main

 

int get_initial_filenumber(int num_docs)

{

      FILE *fp;

      int number_size = 7;

      char file_number_string[7];

      if ( (fp = fopen("next_file_number.dat", "r+")) == NULL )

       {cout << "Need file 'next_file_number.dat; exiting."; exit (1);}

      int number = atoi(fgets(file_number_string, number_size, fp));

      rewind(fp);

      int next_file_number = number + num_docs;

      fprintf( fp, "%d", next_file_number);

      fclose(fp);

      return number;

}  // end function get_initial_filenumber

 

void make_new_node(int max_l, int level, int parent, fstream of)

{

      char tabstring[25];

      tabstring[0] = '\0';

      for ( int tabcount = 1; tabcount <= level - 1; tabcount++ )

            strcat(tabstring, "   ");   // for human readability, indent 3 spaces per level

      int this_node = node_number++;

      int next_level = level + 1;

      char tag_name[5];

      char optional_attribute_set[50];

 

      make_node_name(tag_name);

      make_optional_attribute_set(optional_attribute_set);

 

      of << tabstring << "<" << tag_name;

      of << " nn=\"" << this_node << "\"";

      of << " np=\"" << parent << "\"";

      of << " l=\"" << level << "\"";

      of << optional_attribute_set;

      of << ">" << endl;

 

      if ( level == max_l ) {

            of << tabstring << "This is the text string for node number " << this_node << "." << endl;

            of << tabstring << "</" << tag_name << ">"  << endl;

            return;

      }

 

      // There are 18 types of nodes that can be generated; which

      // one is generated is totally random. This insures that the documents

      // generated are different not just in tag names and values, but also in

      // the basic structure of each document.

      int node_type = (int) (rand() % 18);

 

      switch (node_type) {

            case 0: of << tabstring << "This is the text string for node number " << this_node << "." << endl; break;

            case 1: make_new_node( max_l, next_level, this_node, of); break;

            case 2: of << tabstring << "This is the text string for node number " << this_node << "." << endl;

                        make_new_node( max_l, next_level, this_node, of); break;

            case 3: make_new_node( max_l, next_level, this_node, of);

                        of << tabstring << "This is the text string for node number " << this_node << "." << endl; break;

            case 4: make_new_node( max_l, next_level, this_node, of);

                        make_new_node( max_l, next_level, this_node, of); break;

            case 5: of << tabstring << "This is the first part of the text string for node number " << this_node << ", and ";

                        make_embedded_node(of);

                        of << "this is the second part of the text string for node number " << this_node << "." << endl;

                        break;

            case 6: make_new_node( max_l, next_level, this_node, of);

                        of << tabstring << "This is the text string for node number " << this_node << "." << endl;

                        make_new_node( max_l, next_level, this_node, of); break;

            case 7: make_new_node( max_l, next_level, this_node, of);

                        make_new_node( max_l, next_level, this_node, of);

                        of << tabstring << "this is the text string for node number " << this_node << "." << endl;

                        break;

            case 8: make_new_node( max_l, next_level, this_node, of);

                        make_new_node( max_l, next_level, this_node, of);

                        make_new_node( max_l, next_level, this_node, of);

                        break;

            case 9: of << tabstring << "This is the text string for node number " << this_node << "." << endl;

                        make_new_node( max_l, next_level, this_node, of);

                        make_new_node( max_l, next_level, this_node, of);

                        break;

            case 10: of << tabstring << "This is the first part of the text string for node number " << this_node << ", and ";

                         make_embedded_node(of);

                         of << "this is the second part of the text string for node number " << this_node << "." << endl;

                         make_new_node( max_l, next_level, this_node, of);

                         break;

            case 11: of << tabstring << "This is the first part of the text string for node number " << this_node << ", and ";

                         make_embedded_node(of);

                         of << "this is the second part of the text string for node number " << this_node << "." << endl;

                         make_new_node( max_l, next_level, this_node, of);

                         break;

            case 12: of << tabstring << "This is the text string for node number " << this_node << "." << endl;

                         make_new_node( max_l, next_level, this_node, of);

                         make_new_node( max_l, next_level, this_node, of);

                         make_new_node( max_l, next_level, this_node, of);

                         break;

            case 13: make_new_node( max_l, next_level, this_node, of);

                         of << tabstring << "This is the first part of the text string for node number " << this_node << ", and ";

                         make_embedded_node(of);

                         of << "this is the second part of the text string for node number " << this_node << "." << endl;

                         break;

            case 14: make_new_node( max_l, next_level, this_node, of);

                         of << tabstring << "This is the text string for node number " << this_node << "." << endl;

                         make_new_node( max_l, next_level, this_node, of);

                         make_new_node( max_l, next_level, this_node, of);

                         break;

            case 15: make_new_node( max_l, next_level, this_node, of);

                         make_new_node( max_l, next_level, this_node, of);

                         of << tabstring << "This is the text string for node number " << this_node << "." << endl;

                         make_new_node( max_l, next_level, this_node, of);

                         break;

            case 16: make_new_node( max_l, next_level, this_node, of);

                         make_new_node( max_l, next_level, this_node, of);

                         make_new_node( max_l, next_level, this_node, of);

                         of << tabstring << "This is the text string for node number " << this_node << "." << endl;

                         break;

            case 17: make_new_node( max_l, next_level, this_node, of);

                         make_new_node( max_l, next_level, this_node, of);

                         make_new_node( max_l, next_level, this_node, of);

                         make_new_node( max_l, next_level, this_node, of);

                         break;

      }  // end switch

      of << tabstring << "</" << tag_name << ">" << endl;

}  // end function make_new_node

 

void make_node_name(char* name)

{

      int name_len = (rand() % 4) + 1;

     

      for ( int name_count = 0; name_count <= name_len -1; name_count++ ) {

            int ch = (rand() % 26) + 97;   // 97 is ascii for "a"

            name[name_count] = ch;

      }

      name[name_count] = '\0';

}  // end function make_node_name

 

void make_optional_attribute_set(char* optional_attribute_set)

{

      optional_attribute_set[0] = '\0';

      int nmbr_attributes = (rand() % 4) + 1;

      char aname[4][5];

      char attribute_string[50];

      bool node_is_duplicate = 1;

      int ac = 0;  //attribute count

 

      for (ac = 0; ac <= nmbr_attributes -1; ac++ ) {      

            do

            {

                  aname[ac][0] = '\0';

                  node_is_duplicate = 0;

                  make_node_name(aname[ac]);

                  if ( strcmp(aname[ac], "l") == 0 || strcmp(aname[ac], "np") == 0 || strcmp(aname[ac], "nn") == 0 ) {

                        node_is_duplicate = 1;

                  }

                  else

                  for ( int i = 0; i <= ac - 1; i++ ) {

                        if ( strcmp(aname[ac], aname[i]) == 0 ) {

                              node_is_duplicate = 1;

                              break;

                        }

                  }

            } while ( node_is_duplicate );

 

            sprintf( attribute_string, " %s=\"%d\"", aname[ac], rand());

            strcat(optional_attribute_set, attribute_string);

      }  // for ac

}  // end function make_optional_attribute_set

 

void make_embedded_node(fstream of)

{

      char emNodeName[5];

      make_node_name(emNodeName);

      of << "<" << emNodeName << ">This is embedded text string." << "</" << emNodeName << ">";

}

 

 

 

/*

APPENDIX B Store.cpp

Index of HTML Docs

APPENDIX D QueryModifications >>

 

*/