/*

APPENDIX_A_IconTables

Index of HTML Docs

APPENDIX C Extensibility.cpp >>

*/

 

 

//  AN ANALYSIS AND EVALUATION OF A NATIVE XML DATABASE

//        BY KEN WENKER, PH.D.

//

//       APPENDIX B:  STORE TOOL

//

//       This is an HTML version for viewing only.

//       It compiles and runs in its original form on MS Visual Studio 6.0

//

// This program takes suitably-named XML documents in the current directory and

// sends them to the NeoCore XMS for storage. It also times the store process and

// gathers a ton of statistics which are then output to a space-delimited flat

// file for separate analysis using a spreadsheet. You must have the httpclient4.dll

// and associated .h files from NeoCore.

 

#include <SessionManagedNeoConnection.h>

#include <iostream.h>

#include <string.h>

#include <signal.h>

#include <time.h>

#include <fstream.h>

#include <sys/types.h>

#include <sys/timeb.h>

 

/*  THIS PROGRAM ASSUMES THE NEOSERVER IS RUNNING. The first thing it does is log in.

The program then looks for a file whose filename contains the pattern designated in the

variable "seekfile." THIS PROGRAM ASSUMES THAT ANY FILE TO BE STORED

CONTAINS ONLY VALID XML. The program sends the file to the neoserver for storage. Then the

program looks at the stored file to grab some statistics about the file, and it twice

asks the NeoServer for a bunch of statistics. It parses out the information that is

needed. Then it sends all the statistics to a single row of an output file. After

clean-up, it loops to look for another file.  HINT:  Do not "COPY" a file containing the

"seekfile" pattern in the filename into the active directory. This program can grab it

and process it before "copy" has completed, thus attempting to send an incomplete file

to the NeoServer. Instead, copy it to the active directory with some temporary name;

when the copy process has completed, then rename the file to the "seekfile" format. As

soon as the program sees the new name, the file is already complete and ready for

the NeoServer */

 

int get_file_name(char* seekfile, char* filename, long* docsize);

WIN32_FIND_DATA getNextFile (char seek_file_name[]);

void get_db_filesizes(int filesizes[]);

void update_log(char* storage_stats,

                        char* admin_stats,

                        long start_time,

                        long stop_time,

                        char* file_name,

                        char* oput_file,

                        long docsize,

                        char* results,

                        char* sid,

                        int *log_header_needed,

                        int filesizes[]);

void remove_file(char* filename);

double getPercentage(char *big_string);

double getNmbrThreads(char *admin_string);

int processResults( char *res, char* docsProcessed, char* doc_id);

void find_data_items(char* filename,

                               int *nmbr_tags,

                               int *data_items,

                               int *deepest_tag_level);

 

 

int main (int argc, char *argv[])

{

      // The first eight variables listed here can be overridden by command-line arguments.

      char login[30] = "Administrator";

      char password[30] = "abc123";

      char host[16] = "127.0.0.1";

      int port = 7700;

      char instance[5] = "";

      char seekfile[] = "neo*.xml";

      char outputfile[20] = "neoTestData.txt";   // to hold test data for later analysis

      int log_header_needed = 1;

 

      char filename[15];

      long doc_size;

      long start_time;

      long stop_time;

      char* results;

      enum SERVER_STATISTIC_TYPES { ALL = 0, ADMIN, STORAGE, ACCESS, BUFFER,

            TRANSACTION, WINDOW };

      SERVER_STATISTIC_TYPES myStatType;

      char* storage_stats;

      char* admin_stats;

      char* sid;

      struct _timeb tstruct1, tstruct2;

      int db_filesizes[13];

 

        /********************************************************************

            ***                                                               ***

            ***               PROCESS COMMAND-LINE ARGUMENTS                  ***

            ***                                                               ***

            ********************************************************************/        

      int j = 0;

      for ( int i = 1; i < argc; i++ ) {

            j = i;

            i++;

            switch(argv[j][1]) {

            case 'l':

                  login[0] = '\0';

                  strcat(login, argv[i]);

                  break;

            case 'w':

                  password[0] = '\0';

                  strcat(password, argv[i]);

                  break;

            case 'h':

                  host[0] = '\0';

                  strcat(host, argv[i]);

                  break;

            case 'p':

                  port = atoi(argv[i]);

                  break;

            case 'i':

                  instance[0] = '\0';

                  strcat(instance, argv[i]);

                  seekfile[0] = '\0';

                  strcat(seekfile, "neo");

                  strcat(seekfile, instance);

                  strcat(seekfile, "*.xml");

                  outputfile[0] = '\0';

                  strcat(outputfile, "neoTestData");

                  strcat(outputfile, instance);

                  strcat(outputfile, ".txt");

                  if ( strcmp(instance, "1") == 0 ) log_header_needed = 1;

                  else log_header_needed = 0;

                  break;

            case 's':

                  seekfile[0] = '\0';

                  strcat(seekfile, argv[i]);

                  break;

            case 'o':

                  outputfile[0] = '\0';

                  strcat(outputfile, argv[i]);

                  break;

            case 'n':

                  log_header_needed = atoi(argv[i]);

                  break;

            default:

                  cout << endl;

                  cout << "Unrecognized option: " << argv[j] << ".  Legal options are: " << endl;

                  cout << "     -l Login_id--for the NeoServer; default: Administrator " << endl;

                  cout << "     -w passWord--for the NeoServer; default: abc123" << endl;

                  cout << "     -h Host--on which the NeoServer is running; default: 127.0.0.1" << endl;

                  cout << "     -p Port--on which the NeoServer is listening; default: 7700" << endl;

                  cout << "     -i Instance--if you are running multiple instances of this" << endl;

                  cout << "            program, every instance must have an instance number," << endl;

                  cout << "            typically, 1, 2, 3, etc.  Default:  null" << endl;

                  cout << "     -s Seekfile--files which match this pattern will be processed by" << endl;

                  cout << "            this instance of the program; each instance must have a " << endl;

                  cout << "            unique seekfile pattern. Default: neo*xml. This default " << endl;

                  cout << "            is changed to neo3*xml, for example, if this instance of " << endl;

                  cout << "            the program is instance 3." << endl;

                  cout << "     -o Outputfile--filename for the space-delimited flat file for " << endl;

                  cout << "            the data generated by this program. Each instance of this" << endl;

                  cout << "            program must have its own file name. Default if there is" << endl;

                  cout << "            no instance number: neoTestData.txt. Default for" << endl;

                  cout << "            instance number 3: neoTestData3.txt." << endl;

                  cout << "     -n Need_log_header--'1' will provide a header for"<< endl;

                  cout << "            the output file; '0' leaves off the header row. Default is " << endl;

                  cout << "            1 for no instance number or if the instance number is 1." << endl;

                  cout << "            Default is '0' otherwise. " << endl << endl;

                  cout << "Note that the '-i' option resets seekfile, outputfile, and " << endl;

                  cout << "need_header to their default settings for instances. If you" << endl;

                  cout << "use an instance number and still want to force specific values" << endl;

                  cout << "for seekfile, outputfile, or need_header, then make sure the" << endl;

                  cout << "'-i' option comes before these other options on the command line." << endl;

                  exit (1);

                  break;

            } // switch

      }  // for i

      cout << endl << endl << "***********  STARTING AUTOMATED XML STORE TOOL   ****************" << endl << endl;

      cout << "You are running this program with the following options." << endl;

      cout << "These options can be changed from the command line. To see a list" << endl;

      cout << "    of options, run this program with the option: -?" << endl << endl;

      cout << "         Login:  " << login << endl;

    cout << "      PassWord:  " << password << endl;

      cout << "          Host:  " << host << endl;

      cout << "          Port:  " << port << endl;

      cout << "      Instance:  " << instance << endl;

      cout << "      Seekfile:  " << seekfile << endl;

      cout << "    Outputfile:  " << outputfile << endl;

      cout << "    NeedHeader:  " << log_header_needed << endl;

     

 

      //get filesizes for all the files in the neoxml/db directory.

      get_db_filesizes( db_filesizes );

 

      try

      {

            /********************************************************************

            ***                                                               ***

            ***               CONNECT TO SERVER AND LOGIN                     ***

            ***                                                               ***

            ********************************************************************/

            // If the next command crashes, the most likely reason is the NeoServer

            // is not running.

            SessionManagedNeoConnection neosession(host, port) ;

            sid = neosession.login((char*)login, (char*)password) ;

            cout << "           sid:  " << sid << endl << endl;

 

            while (1) {           // each loop processes one file

            /********************************************************************

            ***                                                               ***

            ***                      STORE ONE FILE                           ***

            ***                                                               ***

            ********************************************************************/

                 

                 

                  get_file_name(seekfile, filename, &doc_size);

                  cout << endl << "Processing File: " << filename << endl << "    ";

 

                  _ftime(&tstruct1);

                  results = neosession.storeFileXML(filename, NULL, NULL);

                  _ftime(&tstruct2);

                  start_time = (tstruct1.time * 1000) + tstruct1.millitm;

                  stop_time  = (tstruct2.time * 1000) + tstruct2.millitm;

 

 

            /********************************************************************

            ***                                                               ***

            ***                      LOG ONE FILE                             ***

            ***                                                               ***

            ********************************************************************/

 

                  // get some statistics from the neoServer

                  myStatType = STORAGE;

                  storage_stats = neosession.getServerStatistics((const enum NeoConnection::SERVER_STATISTIC_TYPES) myStatType);

                  myStatType = ADMIN;

                  admin_stats = neosession.getServerStatistics((const enum NeoConnection::SERVER_STATISTIC_TYPES) myStatType);

                 

                  update_log(storage_stats,

                                    admin_stats,

                                    start_time,

                                    stop_time,

                                    filename,

                                    outputfile,

                                    doc_size,

                                    results,

                                    sid,

                                    &log_header_needed,

                                    db_filesizes);

 

            /********************************************************************

            ***                                                               ***

            ***                   CLEAN UP FOR NEXT FILE                      ***

            ***                                                               ***

            ********************************************************************/

 

                  neosession.releaseBuffer(results);

                  results = NULL;

                  neosession.releaseBuffer(storage_stats);

                  storage_stats = NULL;

 

                  remove_file(filename);

 

            } // end  while loop

      }     // end  try

      catch (NeoException e)

      {

            const char* msg = e.getMessage() ;

            cout << e.getMessage() << endl ;

      }

     

      return 0 ;

}   // end main

 

int get_file_name(char* seekfile, char* filename, long *docsize)

{

      WIN32_FIND_DATA file_data;

 

      filename[0] = '\0';

      file_data = getNextFile(seekfile);

      *docsize = (MAXDWORD * file_data.nFileSizeHigh) + file_data.nFileSizeLow;

      strcpy(filename, file_data.cFileName);

      return 0;

}

 

WIN32_FIND_DATA getNextFile (char seek_file_name[])

{

      WIN32_FIND_DATA this_file_data;

      HANDLE h = NULL;

 

      while (1) {      

            h = FindFirstFile(seek_file_name, &this_file_data);

 

            if (h != INVALID_HANDLE_VALUE )

            break;

            // if there is no such file in the work directory, try again later

            cout << "Waiting for next file." << endl;

            Sleep(5000);

      }

      FindClose(h);

      return this_file_data;

}

 

void get_db_filesizes(int filesizes[])

{

      char path_and_filename[50];

      WIN32_FIND_DATA file_data;

      static char *dbfilename[13] = {"neoXDBD.dct", "neoXDBT.dct",

                                                   "neoXDB001.map", "neoXDB002.map",

                                                   "neoXDB003.map", "neoXDB004.map",

                                                   "neoData.inx", "neoData.dup",

                                                   "neoTag.inx", "neoTag.dup",

                                                   "neoTPD.inx", "neoTPD.dup",

                                                   "neoXDB.adm"};

 

 

      for ( int i=0; i <= 12; i++) {

            path_and_filename[0] = '\0';

            strcat(path_and_filename, "C:\\NeoCore\\neoxml\\db\\");

            strcat(path_and_filename, dbfilename[i]);

            file_data = getNextFile(path_and_filename);

            filesizes[i] = (MAXDWORD * file_data.nFileSizeHigh) + file_data.nFileSizeLow;

      }

}

 

void update_log(char* storage_stats,

                        char* admin_stats,

                        long start_time,

                        long stop_time,

                        char* filename,

                        char* oput,

                        long docsize,

                        char* results,

                        char* sid,

                        int *log_header_needed,

                        int filesizes[])

{

      int nmbr_tags = 0;

      int data_items = 0;

      int deepest_tag_level = 0;

      char docsProcessed[15];

      char doc_id[15];

      fstream iof;                        // neoTestData.txt

      double file_percents[13];

      double file_footprints[13];

      double total_footprint = 0;

      static int total_doc_size_this_inst = 0;

      long store_time = stop_time - start_time;

 

      total_doc_size_this_inst = total_doc_size_this_inst + docsize;

 

      // Extract some statistics about the stored file

      find_data_items(filename, &nmbr_tags, &data_items, &deepest_tag_level);

 

      // Get some information from the NeoServer's response to our store command.

      processResults(results, docsProcessed, doc_id);

     

      iof.open(oput, ios::app);

           

       if ( *log_header_needed ) {

            iof << "SID";

            iof << "       " << "FILE";

            iof << "     " << "#_DOCS";

            iof << " " << "DocID";

            iof << " " << "NMBR_TAGS";

            iof << " " << "DEPTH";

            iof << " " << "#_DATA_ITEMS";

            iof << " " << "DOC_SIZE";

            iof << " " << "TOTAL_DOC_SIZE_THIS_INST";

            iof << " " << "STORE_TIME";

            iof << " " << "START_TIME";

            iof << " " << "STOP_TIME";

            iof << " " << "NMBR_THREADS";

            iof << " " << "TOT_FTPRINT";

            iof << " " << "DD_USED";

            iof << " " << "DD_PCNT";

            iof << " " << "TD_USED";

            iof << " " << "TD_PCNT";

            iof << " " << "Mp1_USED";

            iof << " " << "Mp1_PCNT";

            iof << " " << "Mp2_USED";

            iof << " " << "Mp2_PCNT";

            iof << " " << "Mp3_USED";

            iof << " " << "Mp3_PCNT";

            iof << " " << "Mp4_USED";

            iof << " " << "Mp4_PCNT";

            iof << " " << "DIn_USED";

            iof << " " << "DIn_PCNT";

            iof << " " << "TIn_USED";

            iof << " " << "TIn_PCNT";

            iof << " " << "TDu_USED";

            iof << " " << "TDu_PCNT";

            iof << " " << "TDI_USED";

            iof << " " << "TDI_PCNT";

            iof << " " << "TDD_USED";

            iof << " " << "TDD_PCNT" << endl;

            *log_header_needed = 0;

      }

     

      double nmbr_threads   = getNmbrThreads((char *) admin_stats);

     

      // Note that the order of items in the 3 13-element arrays must all match the order

      // in which the values occur in "storage stats". It won't work if you change the

      // order around. Each time you send "storage_stats" to the getPercentage function,

      // it cuts the front end off of "storage stats", so storage_stats will not be

      // available later for any purpose.

      for ( int i = 0; i <= 11; i++ ) {

            file_percents[i] = getPercentage((char *) storage_stats);

            file_footprints[i] = filesizes[i] * file_percents[i];

            total_footprint = total_footprint + file_footprints[i];

      }

     

      total_footprint = total_footprint + filesizes[12];   // Admin file

      total_footprint = total_footprint + filesizes[8];    // For the null data dup file

      total_footprint = total_footprint + 2759182;         // Binaries

      total_footprint = total_footprint + 13738;           // Configs

 

      iof << sid;

      iof << " " << filename; 

      iof << " " << docsProcessed;

      iof << " " << doc_id;

      iof << " " << nmbr_tags;

      iof << " " << deepest_tag_level;

      iof << " " << data_items;

      iof << " " << docsize;

      iof << " " << total_doc_size_this_inst;

      iof << " " << store_time;

      iof << " " << start_time;

      iof << " " << stop_time;

      iof << " " << nmbr_threads;

      iof << " " << total_footprint;

      for ( int j=0; j <= 11; j++ ) {

            if ( j == 8 ) continue;  // don't process the data dup index

            iof << " " << file_footprints[j];

            iof << " " << file_percents[j];

      }

      iof << endl;

      iof.close();

}

 

void remove_file(char* filename) {

      if ( ( remove( filename ) ) == 0 )

            cout << "Done with " << filename << endl;

      else

            cout << "Unsuccessful removing:  " << filename << endl;

}

 

 

double getPercentage(char *big_string)

{

      char *p;

      int len_bigstring;

      int len_nextstring = 14;

      char next_string[] = "<Percent-Full>";

     

      // Cut off leading characters in big_string up to "next_string"

      p = strstr(big_string, next_string);

      big_string[0] = '\0';

      strcpy(big_string, p);

 

      // Cut off "next_string" from front of big_string

      len_bigstring = strlen(big_string);

      len_nextstring = strlen(next_string);

      for ( int i = 0; i <= len_bigstring - len_nextstring; i++)

            big_string[i] = big_string[i+len_nextstring];

 

      // Grab the numeric digits from the front of big_string; to be returned

      double return_value = strtod(big_string, NULL);

 

      return return_value;

}

 

double getNmbrThreads(char *admin_string)

{

      char *p;

      int len_adminstring;

      char thread_tag[] = "<Thread-Contexts-In-Use>";

 

      p = strstr(admin_string, thread_tag);

      admin_string[0] = '\0';

      strcpy(admin_string, p);

      len_adminstring = strlen(admin_string);

      for ( int i = 0; i <= len_adminstring - 24; i++ )

            admin_string[i] = admin_string[i+24];

      double return_value = strtod(admin_string, NULL);

 

      return return_value;

}

 

 

int processResults( char *res, char* docsProcessed, char* doc_id)

{

      // Parses the return string received from the NeoServer when the document

      // was sent to the NeoServer for storage. Updates the values for

      // "docs processed" and "document_id" when it finds them in the results string.

      int nmbr_chars;

      char searchstring[15];

      int len_results;

      char ret_value[10];

 

      strcpy(searchstring, "Processed>");

      nmbr_chars = 10;   // number of characters in "Processed>"

      char *p;

     

      for ( int i = 0; i <=1; i++ ) {

            p = strstr(res, searchstring);

            res[0] = '\0';

            strcpy(res, p);

            len_results = strlen(res);

 

            for ( int j = 0; j <= len_results - nmbr_chars ; j++ )

                  res[j] = res[j+nmbr_chars];

 

            int k = 0;

 

            for ( j = 0; j <= 8; j++ ) {

                  if (res[j] == '<' ) {

                        ret_value[k] = '\0';

                        if ( i == 0 ) {

                              docsProcessed[0] = '\0';

                              strcat(docsProcessed, ret_value);

                              ret_value[0] = '\0';

                              searchstring[0] = '\0';

                              strcpy(searchstring, "ID>");

                              nmbr_chars = 3;

                              k = 0;

                              break;

                        }

                        else {

                              strcpy(doc_id, ret_value);

                              break;

                        }

                  }

                  else if ( res[j] == ' ' ) continue;

                  else {

                        ret_value[k] = res[j];

                        k++;

                  }

            }  // end for j

      }  // end for i

 

      return 0;

}  // end function

 

void find_data_items(char* filename,

                               int *nmbr_tags,

                               int *data_items,

                               int *deepest_tag_level)

{

      // This function goes through the file character by character. It counts

      // the total number of tags in the document and the total number of data

      // items. It also calculates the depth of the deepest tag.

      int mode = 1;     //Modes are:

                              //  1. At beginning of document

                              //  2. Just found a left angle bracket '<'

                              //  3. Inside a comment or an "end element" tag

                              //  4. Just found a right angle bracket '>'

                              //  5. Inside a data field

                              //  6. Inside a "begin element" tag

     

      int tag_level = 0;

      char this_char;

      FILE *wrk_file;

 

      wrk_file = fopen ( filename, "r" );

 

      while (!feof(wrk_file)) {

            this_char = getc(wrk_file);

            switch ( mode )

            {

            case 1:

                 

                  if ((this_char == ' ')  ||

                        (this_char == '\n') ||

                        (this_char == '\t'))

                        continue;

                  if (this_char == '<' ) {

                        mode = 2;  

                  }

                  break;

            case 2:

                  if ( this_char == '?' ) {

                        mode = 3;

                        (*nmbr_tags)++;

                  }

                  else if ( this_char == '/' ) {

                        mode = 3;

                        tag_level--;

                  }

                  else {

                        mode = 6;

                        tag_level++;

                        (*nmbr_tags)++;

                        if ( *deepest_tag_level < tag_level )

                              *deepest_tag_level = tag_level;

                  }

                  break;

            case 3:

                  if ( this_char == '>' )

                        mode = 4;

                  break;

            case 4:

                  if ((this_char == ' ')  ||

                        (this_char == '\n') ||

                        (this_char == EOF ) ||

                        (this_char == '\t'))

                        continue;

                  else if (this_char == '<')

                        mode = 2;

                  else {

                        mode = 5;

                        (*data_items)++;

                  }

                  break;

            case 5:

                  if (this_char == '<')

                        mode = 2;

                  break;

            case 6:

                  if (this_char == '=') {

                        (*data_items)++;

                        (*nmbr_tags)++;

                        tag_level++;

                        if ( *deepest_tag_level < tag_level )

                              *deepest_tag_level = tag_level;

                        tag_level--;

                  }

                  else if (this_char == '/') {

                        tag_level--;

                  }

                  else if (this_char == '>')

                        mode = 4;

                  break;

            }  // end switch

      } // end "while" loop

 

      fclose(wrk_file);

}

 

 

 

/*

APPENDIX_A_IconTables

Index of HTML Docs

APPENDIX C Extensibility.cpp >>

*/