Contents 
 Index 
 "Perl Program Reference" 
 < Previous 
 Next > 

html_look_integrate.pl

Go to the documentation of this file.
00001 // This file has been modified on-the-fly with an input filter
00002 // to change it from Perl syntax to C++ strictly for the purposes
00003 // of faking out Doxygen. Modifications include:
00004 
00005 // - changing local() definitions to C++ #define statements.
00006 // - commenting out undef statements.
00007 // - changing $globe'... variable names to $globe_...
00008 // - changing sub statements to look like C++ functions.
00009 // - changing # comments to C++ comments.
00010 // - ...
00011 
00012 // If you see other strangeness in the HTML version of the Perl file,
00013 // it comes from getting it to look more C++ like.
00014 
00015 
00016 // #!/usr/#define/bin/perl
00017 package gen_nav;
00018 
00019 //#############################################################################
00020 /** @file
00021  ** @brief Looks into HTML files that have been provided by other sources
00022  ** and parses them for useful information.
00023  ** 
00024  ** It extracts appropriate information needed for a entries in an index
00025  ** and table of contents.
00026  **
00027  ** It starts at the files listed in @xscope::top_files array. For every
00028  ** HTML file there, it finds their hyperlinks. It successively traces through
00029  ** the hyperlinks creating a data structure. This is the data structure used
00030  ** in the TOC. The hyperlinks that it traces are also used directly in the 
00031  ** table of contents.
00032  **
00033  ** Certain files can be excluded from creating children or tracing further.
00034  ** All top files in particular are added to this list so that it does not
00035  ** loop continuously through things it already knows, or more realistically,
00036  ** so that it doesn't build a top-levels data structure under some other
00037  ** top-level data.
00038  **
00039  ** This has several routines (spider_trace, index_token_generation, script_generation
00040  ** etc.) that call themselves recursively. The key to stop the recursion is when
00041  ** the owning files for the children (hyperlinks) have already been visited.
00042  **
00043  ** @ingroup tp_tools tp_nav
00044  **
00045  ** @author Glenn C. Maxey
00046  **
00047  **/
00048 // #    $Id: html_look_integrate.pl,v 1.5 2002/11/20 15:23:02 gmaxe Exp $
00049 //#
00050 //# 2002 Created by Voyant Technologies, Inc., Westminster, Colorado, USA.
00051 //#
00052 //# Permission to use, copy, modify, and distribute this software and its 
00053 //# documentation under the terms of the GNU General Public License is hereby 
00054 //# granted. No representations are made about the suitability of this software 
00055 //# for any purpose. It is provided "as is" without express or implied warranty. 
00056 //# See the GNU General Public License (http://www.gnu.org/copyleft/gpl.html) 
00057 //# for more details.
00058 //# 
00059 //# Documents produced by this script are derivative works derived from the 
00060 //# input used in their production; they are not affected by this license.
00061 //#
00062 //#    Revision Information:
00063 //#
00064 //#    $Log: html_look_integrate.pl,v $
00065 //#    Revision 1.5  2002/11/20 15:23:02  gmaxe
00066 // //#    Added exit codes so that wrapper scripts can catch errors properly.
00067 //#
00068 //#    Revision 1.4  2002/07/26 18:56:10  gmaxe
00069 //#    Got rid of old definitions and migrated new structures into all;
00070 //#    enhanced xhelp output file names and format; now everything is
00071 //#    alphebetized.
00072 //#
00073 //#    Revision 1.3  2002/04/12 18:01:13  gmaxe
00074 //#    Various tweaks; work on tag extraction and change_nav;
00075 //#    more exclused special perl characters from indexer;
00076 //#    improved support for sapi group pm files in xhelp.
00077 //#    spider tool now works regularly and as expected.
00078 //#
00079 //#    Revision 1.2  2002/04/10 00:35:27  gmaxe
00080 //#    Tracing tool now works;
00081 //#    indexer ignores more special characters;
00082 //#    pm file is the first version; contains the proper data structures
00083 //#    to handle bsp/vxworks
00084 //#
00085 //#    Revision 1.1  2002/04/06 01:55:10  gmaxe
00086 //#    New files and new general constructs for handling tags. Uses globals better.
00087 //#    The html_look* files are designed to handle spider tracing of html systems.
00088 //#
00089 //#
00090 //#
00091 //#############################################################################
00092 
00093 
00094 //#############################################################################
00095 /** @fn int BEGIN
00096  ** @brief Code to execute when first entered.
00097  **
00098  ** @param None. 
00099  **
00100  ** @return None.
00101  **
00102  ** @lim None
00103  ** @ingroup tp_nav
00104  **/
00105 // #############################################################################
00106 int BEGIN  ( ) {
00107 //    print "\n============  Starting html_look_integrate.pl ==================================\n";
00108    $_file_list = "_file_list";
00109    $_toc_file = "tree.html";
00110    $_index_file = "_index_list";
00111    $_arg_inc = 0;
00112    
00113    $no_scope_file = 0;
00114    $scope_pm = "globe.pm";  //  first time through; other scope stuff passed in.
00115    
00116    $in_file = "tree.js"; //  default
00117 
00118    push (@INC, `pwd`);
00119    push (@INC, '../perl');
00120    if (0){
00121 //       print (@INC, "\n");
00122    }
00123    // ####
00124    //  All global variables are defined in the following file
00125    // ####
00126    unless (open ( IN_LIST, $scope_pm)) {
00127       unless (open ( IN_LIST, "../perl/$scope_pm")) {
00128          push (@file_errors, "Cannot open file \"$scope_pm\" or \"../perl/$scope_pm\"\n");
00129          $no_scope_file++;
00130       }
00131    }
00132 //    close (IN_LIST);
00133    push (@INC, $scope_pm);
00134    push (@INC, "../perl/$scope_pm");
00135 
00136    if (!@file_errors) {
00137       // ####
00138       //  All global variables are defined in the following file
00139       // ####
00140       require $scope_pm;
00141    
00142       if (&globe::declare_variables()) {
00143 //          print "Variables initialized from $scope_pm.\n";
00144       } else {
00145          push (@file_errors, "Could not initialize variables from $scope_pm.\n");
00146       }
00147    } //  if not @file_errors
00148    
00149    
00150 
00151    //  Get scope files if there is one.
00152    if (@ARGV > $_arg_inc) {
00153       $scope_pm = @ARGV[$_arg_inc];
00154       
00155       unless (open ( IN_LIST, $scope_pm)) {
00156          push (@file_errors, "Cannot open file \"$scope_pm\"\n");
00157          $no_scope_file++;
00158       }
00159 //       close (IN_LIST);
00160       push (@INC, $scope_pm);
00161       
00162       if (!@file_errors) {
00163          // ####
00164          //  All global variables are defined in the following file
00165          // ####
00166          require $scope_pm;
00167    
00168          if (&xscope::declare_variables()) {
00169 //             print "Variables initialized from $scope_pm.\n";
00170          } else {
00171             push (@file_errors, "Could not initialize variables from $scope_pm.\n");
00172          }
00173       } //  if not @file_errors
00174    } else {
00175       push (@file_errors, "ERROR: Need to provide a scope file as first argument.");
00176    } //  if 1 or more arguments
00177    $_arg_inc++;
00178 
00179    //  Get path to code files
00180   if (0) { //  opt out
00181    if (@ARGV > $_arg_inc) {
00182       $root_path = @ARGV[$_arg_inc];
00183       $globe::path = @ARGV[$_arg_inc];
00184       if ($root_path =~ /\/$/) {
00185 //          print "The path specified is $root_path\n";
00186       } else {
00187          push (@file_errors, "The input argument \"$root_path\" requires a forward slash (\/) at the end.\n");
00188       }
00189    } else {
00190       push (@file_errors, "ERROR: root path is required.");
00191    } //  if 1 or more arguments
00192    $_arg_inc++;
00193   } //  opt out
00194    
00195    if (@ARGV > $_arg_inc) {
00196       $in_file = @ARGV[$_arg_inc];
00197       $globe::master_nav_file = @ARGV[$_arg_inc];
00198 //       print "The master file is $in_file.\n";
00199       unless (open ( IN_MASTER, "$in_file")) {
00200          push (@file_errors, "Cannot open file \"$in_file\"\n");
00201       }
00202    } else {
00203       push (@file_errors, "ERROR: master HTML file is required.");
00204    } //  if 2 or more arguments
00205    $_arg_inc++;
00206 
00207    //  Optional: a fully qualified path of where to start.
00208    if (@ARGV > $_arg_inc) {
00209       // undef (@globe::top_files);
00210       push (@globe::top_files, @ARGV[$_arg_inc]);
00211    } //  if 3 or more arguments
00212    $_arg_inc++;
00213 
00214 
00215   if (0) { //  opt out
00216    if (@ARGV > $_arg_inc) {
00217       $gen_proto = @ARGV[$_arg_inc];
00218       if ($gen_proto !~ /\//) {
00219          //  if the output file does not have a path, then
00220          //  put it in the root location
00221          $gen_list = "_$gen_proto";
00222          $gen_class = "c_$gen_proto";
00223          $gen_proto = "$root_path$gen_proto";
00224          $gen_list = "$root_path$gen_list";
00225          $gen_class = "$root_path$gen_class";
00226       } else {
00227          @chunk = split (/\//, $gen_proto);
00228          $chunk[$// chunk] = "_$chunk[$#chunk]";
00229          $gen_list = join ("\/", @chunk);
00230          @chunk = split (/\//, $gen_proto);
00231          $chunk[$// chunk] = "c_$chunk[$#chunk]";
00232          $gen_class = join ("\/", @chunk);
00233       }
00234 //       print "The output files are $gen_proto and $gen_list\n";
00235    } else {
00236       push (@file_errors, "Need to have the output prototype file.\n");
00237    } //  if 3 or more arguments
00238    $_arg_inc++;
00239   } //  opt out
00240 
00241    
00242 } //  BEGIN
00243 
00244 //#############################################################################
00245 /** @fn int main
00246  ** @brief The main program.
00247  **
00248  ** @param None. 
00249  **
00250  ** @return None.
00251  **
00252  ** @lim None
00253  ** @ingroup tp_nav
00254  **/
00255 // #############################################################################
00256 // sub main {
00257 {
00258    // #############################################################################
00259    // # Program start
00260    // #############################################################################
00261 
00262    if (0){
00263 //       print "=== Definitions 3 \n";
00264 //       exit(1);
00265    }
00266 
00267    if (@file_errors) {
00268       //  Makes no sense to go on if input parameters are off.
00269 //       print "\n============  Summary of errors =================================a\n";
00270       for ($i=0; $i<@file_errors; $i++){
00271 //          print "$i = $file_errors[$i]\n";
00272       }
00273       &using_voy_nav();
00274 //       exit(1);
00275    }
00276 
00277    // ####
00278    //  MASTER FILE DEFINITIONS
00279    //  Get the master definitions
00280    // ####
00281    while (<IN_MASTER>){    //  entire master file into memory.
00282       $globe::master_nav .= $_;
00283    }
00284 //    close (IN_MASTER);
00285    &globe::get_master_nav_info;
00286    if (!exists ($globe::m_info{order})) {
00287       if (1) {
00288          push (@file_errors, "ERROR: The master file \"$globe::master_tree_file\" does not have the ordering.");
00289 //          print "Ordering begins with \"$globe::m_define{order}[0]\" \nand ends with \"$globe::m_define{order}[1]\"\n";
00290 //          print "Inbetween on a line by themselves is the directory doc_publish subdirectory name.\n";
00291       }
00292 //       exit(1);
00293    } else {
00294       if (0) {
00295 //          print "Got to here...\n$globe::m_info{order}\n======\n";
00296          for ($i = 0; $i <= $// {$globe::m_info{_array_order}}; $i++) {
00297             foreach $k (keys %{$globe::m_info{_array_order}[$i]}) {
00298 //                print "$i $k $globe::m_info{_array_order}[$i]{$k}\n";
00299             }
00300          }
00301 //          //  exit(1);
00302       }
00303    }
00304    // ####
00305 
00306    // ####
00307    //  This is the main loop that does a spider trace through the documentation.
00308    // ####
00309    if (1) { //  Turn on/off spider default 1 -- mandatory
00310       foreach $in_file (@xscope::top_files){
00311          @_name = split (/\//, $in_file);
00312          pop (@_name);  //  This is the filename, which we don't want.
00313          $globe::org{$in_file}{path} = join ("\/", @_name, "");
00314          if (0){
00315 //             print "Starting file = $in_file\n";
00316          }
00317          if (0){
00318 //             print "root path is $globe::org{$in_file}{path}\n";
00319          }
00320          // ####
00321          //  Read in file and get its child hyperlinks
00322          // ####
00323          if (&get_input_file ($in_file)) {
00324             //  good
00325             //  Level is 2 for get_hyperlinks, because 1 is already the main guy.
00326             //  when using nested scripts, main guy is already handled; 
00327             //  start numbering with 1
00328             if (!&get_hyperlinks ($in_file, 1)) {
00329                //  bad; did not succeed getting hyperlinks from file
00330                push (@file_errors, "No hyperlinks found in $in_file.");
00331             }
00332          } else {
00333             //  bad
00334             //  Turned off because trapped elsewhere
00335             if (0) {
00336                push (@file_errors, "Could not find \"$in_file\". :)");
00337             }
00338          } //  get_input_file
00339          if (0) {
00340 //             print "Title is \"$globe::org{$in_file}{title}\"\n";
00341          }
00342          if (1) {
00343 //             print "Starting spider trace...\n";
00344             &spider_trace ($in_file, 1);  //  base level is 1, the starting point.
00345          }
00346          if (0) {
00347 //             print "\nEnding spider trace...\n";
00348          }
00349       } //  for each top_file
00350    }  //  Turn on/off spider
00351 
00352    // undef (%globe_all_files);
00353    $globe_file_cnt=0;
00354    // ####
00355 
00356    // ####
00357    //  Test routines that are recursively called.
00358    //  Turned off, but when on can help debug what is being collected.
00359    //  leave this off
00360    // ####
00361    if (0) { //  debug if default 0
00362       #define $globe::_last_level  1
00363       //  #define $globe::_last_level  0
00364 //       print "Testing the structure...\n";
00365 //       print "Start last level = $globe::_last_level\n";
00366       &testing_structure ($xscope::top_files[0]);
00367       $globe::_last_level = 1;
00368       //  $globe::_last_level = 0;
00369 //       print "======\nSecond time through...\n";
00370 //       print "Start last level = $globe::_last_level\n";
00371       &testing_structure ($xscope::top_files[0]);
00372       if (0){
00373          foreach $in_file (sort keys %{globe_all_files}) {
00374 //             print "input $in_file\n";
00375          }
00376       }
00377 //       exit(1);
00378    } //  debug if
00379    // ####
00380    
00381    // ####
00382    //  Used to see what's in the data structures.
00383    //  Leave this off
00384    // ####
00385    if (0) { //  debug if default 0
00386       foreach $in_file (sort keys %{globe::org}) {
00387 //          print "file $in_file\n";
00388 //          print "  g title $globe::org{$in_file}{title}\n";
00389          if (1){
00390 //             print "  g title $globe::org{$in_file}{title}\n";
00391 //             print "  g path $globe::org{$in_file}{path}\n";
00392          }
00393          if (0){
00394             foreach $k (sort keys %{$globe::org{$in_file}}) {
00395 //                print "  $k \$globe::org{\$in_file}{$k} = $globe::org{$in_file}{$k}\n";
00396             }
00397          }
00398       }
00399    } //  debug if
00400    // ####
00401 
00402    // undef (%globe_all_files);
00403    $globe_file_cnt=0;
00404    
00405    // ####
00406    //  Creates the output scripts for each top file.
00407    // ####
00408    if (1) { //  turn on/off default 1
00409       foreach $in_file (@xscope::top_files) {
00410          //  $globe::org{$in_file}{title}
00411          // ##
00412          //  Create a file name from the title.
00413          //  Replace characters.
00414          // ##
00415          $tree = $globe::org{$in_file}{title};
00416          $tree =~ s/[\s+]/_/g;
00417          $tree =~ s/[\'\"\`\~\!\@\// \$\%\^\&\*\(\)\=\+\{\}\[\]\?\/\\\,\.\?><\|]/_/g;
00418          $tree = "tree_$tree";
00419          $globe::org{$in_file}{script_#define}  $tree
00420          $m_tree = "m_$tree";
00421          $globe::org{$in_file}{script_master} = $m_tree;
00422          $tree = "$globe::path$tree";
00423          $m_tree = "$globe::path$m_tree";
00424          if (0) {
00425 //             print "Old title \"$globe::org{$in_file}{title}\"\n  new \"$tree\"\n\nmaster \"$m_tree\"\n";
00426          }
00427          
00428          //  $tree = "$globe::path";
00429          //  $tree .=  "tree_$q";
00430          if (0 && (&output_structure_script ($tree, $in_file, "#define"))) {
00431             //  assuming successful
00432          }
00433          if ((1) && (&output_structure_script ($m_tree, $in_file, "master"))) {
00434             //  assuming successful
00435          }
00436       } //  foreach $in_file
00437    } //  turn on/off
00438    // ####
00439 
00440    // ##################
00441    //  special case a script file for the Top Most master file.
00442    // ##################
00443    if (1) { //  turn on/off
00444       $tree = $globe::path;
00445       $tree .= "tree_start_here";
00446       if (0 && (&starting_point_script ($tree, "script_#define", "local"))){
00447       } //  starting_point_script
00448       $tree = $globe::path;
00449       $tree .= "m_tree_start_here";
00450       if ((1) && (&starting_point_script ($tree, "script_master", "master"))) {
00451       } //  starting_point_script
00452    } //  turn on/off
00453    // ####
00454 
00455 
00456 
00457    // undef (%globe_all_files);
00458    $globe_file_cnt=0;
00459    
00460 
00461    // ##################
00462    //  Take care of index tokens
00463    // ##################
00464    if (1) { //  turn on/off default 1
00465 //       print "Generating index tokens $_index_file...\n";
00466       $_in_index_file = "$globe::path$_index_file";
00467 
00468       $q = 0;
00469       foreach $in_file (@xscope::top_files) {
00470          if (&handle_index_tokens ($in_file)) {
00471             //  assuming successful
00472          }
00473          $q++;
00474       } //  foreach $in_file
00475       
00476       if (@globe::index_info) {
00477 //          print "Initial index token count: $// globe::index_info\n";
00478          if (1) { //  4/8/2002 Get rid of duplicate entries
00479             @sm_index = sort (@globe::index_info);
00480             @globe::index_info = @sm_index;
00481             // undef (@sm_index);
00482             $remember = $globe::index_info[$i];
00483             push (@sm_index, $globe::index_info[0]);
00484             // ####
00485             //  Loop to get rid of duplicates
00486             // ####
00487             for ($i=1; $i <= $// globe::index_info; $i++) { # not starting on first 
00488                if ($remember ne $globe::index_info[$i]) {
00489                   push (@sm_index, $globe::index_info[$i]);
00490                }
00491             }
00492             @globe::index_info = @sm_index;
00493             // undef (@sm_index);
00494             
00495          }
00496 //          print "Final index token count: $// globe::index_info\n";
00497          
00498          //  clean up memory
00499          
00500          
00501          unless (open ( OUT_INDEX_LIST, ">$_in_index_file")) {
00502             push (@file_errors, "Cannot open file \"$_in_index_file\"");
00503 //             exit(1);
00504          }
00505          if (1) { //  4/5/2002 one way of doing index tokens
00506             for ($i=0; $i<@globe::index_info; $i++){
00507 //                //  print "$i = $globe::index_info[$i]\n";
00508 //                print (OUT_INDEX_LIST "$globe::index_info[$i]\n");
00509             }
00510          }
00511 //          close (OUT_INDEX_LIST);
00512       }
00513    } //  turn on/off
00514    // ####
00515 
00516 
00517    // ##################
00518    //  Take care of navigation and copyright in the HTML file
00519    // ##################
00520    if (1) { //  turn on/off default 1
00521 //       print "Updating the individual HTML files...\n";
00522       // undef (%globe_all_files);
00523       $globe_file_cnt=0;
00524 
00525       $q = 0;
00526       foreach $in_file (@xscope::top_files) {
00527          if (&html_file_update ($in_file)) {
00528             //  assuming successful
00529          } else {
00530             //  happens when you can't write the file out
00531          }
00532          $q++;
00533       } //  foreach $in_file
00534       
00535    } //  turn on/off
00536    // ####
00537    
00538    
00539    if (0) {
00540 //       print "got this far...\n";
00541 //       exit(0);
00542    }
00543 
00544    // #############################################################################
00545    // # End of Program
00546    // #############################################################################
00547 //    exit(0);
00548 } //  the main routine
00549 
00550 //#############################################################################
00551 /** @fn int using_voy_nav  ( )
00552  ** @brief What to do when no arguments are given.
00553  ** @param None
00554  ** @return None
00555  ** 
00556  ** @lim None
00557  ** @ingroup tp_nav
00558  **/
00559 // #############################################################################
00560 int using_voy_nav  ( ) {
00561 //    print "\nhtml_look_integrate.pl operates on text files that indicate heirarchical \n";
00562 //    print "structure of the system. Needed arguments:\n";
00563 //    print "- appropriate scope pm; required. \n";
00564 //    print "- name of master file. \n";
00565 //    print "- [optional] fully qualified path and name of starting HTML file for spider trace. \n";
00566 //    print "    If not here, the starting HTML needs to be specified in the scope file. \n";
00567 //    //  print "- output toc script file; optional. \n";
00568 
00569    return;
00570 }
00571 
00572 
00573 //#############################################################################
00574 /** @fn int get_input_file
00575  ** @brief Grabs the information from the input file and puts into an array
00576  ** of hash elements.
00577  **
00578  ** @param file_incoming This is the file to open.
00579  **
00580  ** Reads in the file into memory for later processing to find anchors.
00581  ** While it's here, it tries to find the title for the file for later display
00582  ** and file naming purposes.
00583  **
00584  ** @ingroup tp_nav
00585  **/
00586 // #############################################################################
00587 int get_input_file  ( ) {
00588    #define $file_incoming  $_[0]
00589    // undef ($globe::entire_file);
00590    
00591    if (0) {
00592 //       print "get_input_file with $file_incoming.\n";
00593    }
00594    
00595    unless (open ( IN_COMING, "$file_incoming")) {
00596       push (@file_errors, "Cannot open file \"$file_incoming\" <<<");
00597       return (0);
00598    }
00599    while (<IN_COMING>){
00600       $globe::entire_file .= $_;
00601    }
00602 //    close (IN_COMING);
00603    if (0) {
00604 //       print "$globe::entire_file\nglenn\n";
00605    }
00606    $not_critical = 0;
00607    $case_in = 1;
00608 
00609    //  Look for an h1
00610    ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file,
00611                                "<h1", ">", 
00612                                $not_critical, $case_in);
00613    if ($piece) {
00614       //  rebuilt accurate test string
00615       $test = "<h1$piece>";
00616       //  Redo this on entire file now that complete first tag is known.
00617       ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file,
00618                             $test, "</h1", 
00619                             $not_critical, $case_in);
00620       if ($piece) {
00621          //  Strip out html tags within this piece.
00622          while ($piece =~ /\</) {
00623             ($before, $throw_away, $after) = &globe::get_tag_chunk( $piece,
00624                             "<", ">", 
00625                             $not_critical, $case_in);
00626             $piece = join ("", $before, $after);
00627          } //  while temp title.
00628           
00629          $piece =~ s/\n//g;  //  strip out carriage return
00630          $piece =~ s/^[\s+]//g;  //  strip out leading/trailing space
00631          $piece =~ s/[\s+]$//g;  //  strip out leading/trailing space
00632          if (0) {
00633 //             print "h1 title is \"$piece\"\n";
00634          }
00635          $globe::org{$file_incoming}{title} = $piece;
00636       } else {
00637 //          print "Could find no h1.\n";
00638       }
00639    } else {
00640    
00641       ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file,
00642                                "<title>", "</title>", 
00643                                $not_critical, $case_in);
00644       if (($piece) && ($piece =~ /[\w+]/)) {
00645          $piece =~ s/\n//g;  //  strip out carriage return
00646          while ($piece =~ /^[\s+]/){
00647             $piece =~ s/^[\s+]//;  //  strip out leading/trailing space
00648          }
00649          while ($piece =~ /[\s+]$/){
00650             $piece =~ s/[\s+]$//;  //  strip out leading/trailing space
00651          }
00652          if (0) {
00653 //             print "<title> is \"$piece\"\n";
00654          }
00655          $globe::org{$file_incoming}{title} = $piece;
00656 
00657        } else {
00658 //          print "Could find no h1 or title.\n";
00659        }
00660    }
00661    if (0) {
00662 //       print "title from get_input \"$globe::org{$file_incoming}{title}\" for $file_incoming\n";
00663    }
00664    
00665    if (1) {
00666       //  Remove the voyant tags so that we don't get extra links to mess us up.
00667       &remove_voyant_tags;
00668    }
00669    
00670    return (1);   
00671 } //  get_input_file
00672 
00673 //#############################################################################
00674 /** @fn int get_hyperlinks
00675  ** @brief Parses through the information in the globe::entire_file buffer
00676  ** for anchors.
00677  **
00678  ** @param _file_4_links contains the fully qualified name of the file.
00679  ** @param _level contains the level, or distance from, the root for use in
00680  ** assigning to the hyperlinks discovered.
00681  **
00682  ** All qualified anchors represent children of the _file_4_links. This
00683  ** routine creates the children's url, display text, and level.
00684  **
00685  ** This routine calls the routine verify_link, which has criteria to test 
00686  ** against. The criteria is intended to make the qualification of the link 
00687  ** fail. In other words, when it returns from verify_link with a failed (0) criteria,
00688  ** this get_hyperlinks routine knows that it should not add a child for
00689  ** that hyperlink. 
00690  **
00691  ** @lim Assumes that $globe::entire_file has an HTML file in it.
00692  ** @ingroup tp_nav
00693  **/
00694 // #############################################################################
00695 int get_hyperlinks  ( ) {
00696    #define $_file_4_links  $_[0]
00697    #define $_level  $_[1]
00698    #define $before  ""
00699    #define $piece  0
00700    #define $after  ""
00701    #define $b_href  ""
00702    #define $href  0
00703    #define $keep_href  ""
00704    #define $a_href  ""
00705    #define $d
00706    $_l_cnt = 0;
00707    
00708    if (0){
00709 //       print "get_hyperlinks from $_file_4_links.\n";
00710    }
00711    
00712    @pot_link_chunk = split (/\<a[\s+]/i, $globe::entire_file);
00713    if ($// pot_link_chunk < 1) {
00714       //  file doesn't have any hyperlinks
00715       return (0);
00716    }
00717    POTENTIAL_L: for ($d = 1; $d <= $// pot_link_chunk; $d++){
00718       //  Purposely skipping the zero element; has no link into it.
00719       // ####
00720       //  Work on hyperlink
00721       // ####
00722       $piece = 0;
00723       ($before, $piece, $after) = &globe::get_tag_chunk( $pot_link_chunk[$d],
00724                                "href", ">", 
00725                                $not_critical, $case_in);
00726       if ($piece) {
00727          //  This contains the information inside the anchor
00728          ($b_href, $href, $a_href) = &globe::get_tag_chunk( $piece,
00729                                "\"", "\"", 
00730                                $not_critical, $case_in);
00731          if ($href) {
00732             // ####
00733             //  If this has valid information, then let's first strip out
00734             //  and further test what the link is with verify_link.
00735             // ####
00736             $keep_href = &verify_link ($href, $_file_4_links);
00737             if ($keep_href) {
00738                // ####
00739                //  This is for valid hyperlink references.
00740                // ####
00741                $href = $keep_href;
00742                if (0) {
00743                   //  old way; 
00744                   #define $path  $globe::org{$_file_4_links}{path}
00745                   $globe::org{$_file_4_links}{child}[$_l_cnt]{url}= "$path$href";
00746                }
00747                if (0) {
00748                   // ###
00749                   //  Don't assign this until we're sure that we have valid
00750                   //  text to go with it.
00751                   //  If we wait, we won't get empty index tokens or
00752                   //  TOC entries
00753                   // ###
00754                   $globe::org{$_file_4_links}{child}[$_l_cnt]{url}= "$href";
00755                }
00756                if (0) {
00757 //                   print "... link = $globe::org{$_file_4_links}{child}[$_l_cnt]{url}\n";
00758                }
00759             } else {
00760                //  if verify_link returns 0, then it is not a link we want to hold on to.
00761                next POTENTIAL_L;
00762             }
00763          } else {
00764             //  Did not contain an href; but if it has a name, it
00765             //  still might be useful to have in script.
00766             //  TBD: implement this later if needed.
00767 //             //  print "Did not contain a valid href for me.\n";
00768             
00769             next POTENTIAL_L;
00770          }         
00771       } else {
00772          //  This anchor does not have a hyperlink
00773          next POTENTIAL_L;
00774       }
00775       // ####
00776       //  Work on link text
00777       //  $after is still valid from above; contains what followed starting 
00778       //  anchor designation <a ....>
00779       // ####
00780       ($b_anc_end, $a_anc_end) = split (/\<\/a>/i, $after, 2);
00781       // undef ($remember);
00782       //  Get rid of any html that might be in link text;
00783       while ($b_anc_end =~ /\</) {
00784          //  while html still in b_anc_end
00785          ($before, $piece, $after) = &globe::get_tag_chunk( $b_anc_end,
00786                                "<", ">", 
00787                                $not_critical, $case_in);
00788          if (($piece =~ /^img/i) && ($piece =~ /alt[\s*]\=/i)){
00789             //  This might have some valid information to remember
00790             @chunks = split (/alt[\s*]\=/i, $piece, 2);
00791             ($b2, $p2, $a2) = &globe::get_tag_chunk( $chunks[1],
00792                                "\"", "\"", 
00793                                $not_critical, $case_in);
00794             $remember=$p2;
00795             if (1) {
00796 //                 print "Alt text = \"$remember\" for $globe::org{$_file_4_links}{child}[$_l_cnt]{url}\n";
00797             }
00798          }
00799          
00800          //  rebuild and retry
00801          $b_anc_end = join ("", $before, $after);
00802          $b_anc_end =~ s/\n/ /g;
00803          $remove = "&nbsp;";
00804          $b_anc_end =~ s/$remove/ /g;
00805          $b_anc_end =~ s/&nbsp;/ /g;
00806          $b_anc_end =~ s/  / /g;
00807          $b_anc_end =~ s/  / /g;
00808          $b_anc_end =~ s/  / /g;
00809          $b_anc_end =~ s/[\s+]$//;
00810       } //  while html still in b_anc_end
00811       
00812       // ####
00813       //  Get rid of portion of entries that begin with numbers or punctuation
00814       // ####
00815       if (1) {
00816          //  Get rid of topics that are all numbers.
00817          if ($b_anc_end !~ /[\D+]/) {
00818             if (0) {
00819 //                print "removing \"$b_anc_end\"\n";
00820             }
00821             next POTENTIAL_L;
00822          }
00823          if ($b_anc_end =~ /^[\d+][\.]/) {
00824             while ($b_anc_end =~ /^[\d+][\.]/){
00825                @_remove = split (/^[\d+][\.]/, $b_anc_end, 2);
00826                $remove = shift (@_remove);
00827                $b_anc_end = join ("", @_remove);
00828             }
00829             while ($b_anc_end =~ /^[\s+]/) {
00830                $b_anc_end =~ s/^[\s]//;
00831             }
00832             if (0) {
00833 //                print "has (had) digits \"$b_anc_end\"\n";
00834             }
00835          }
00836          if ($b_anc_end =~ /^[\d+][\s+]/) {
00837             while ($b_anc_end =~ /^[\d+][\s+]/){
00838                @_remove = split (/^[\d+][\s+]/, $b_anc_end, 2);
00839                $remove = shift (@_remove);
00840                $b_anc_end = join ("", @_remove);
00841             }
00842             while ($b_anc_end =~ /^[\s+]/) {
00843                $b_anc_end =~ s/^[\s]//;
00844             }
00845             if (0) {
00846 //                print "has (had) digits \"$b_anc_end\"\n";
00847             }
00848          }
00849          if ($b_anc_end =~ /^[\d+]$/) {
00850             if (0) {
00851 //                print "has (had) digits \"$b_anc_end\"\n";
00852             }
00853             while ($b_anc_end =~ /^[\d+]$/){
00854                @_remove = split (/^[\d+]/, $b_anc_end, 2);
00855                $remove = shift (@_remove);
00856                $b_anc_end = join ("", @_remove);
00857             }
00858             while ($b_anc_end =~ /^[\s+]/) {
00859                $b_anc_end =~ s/^[\s]//;
00860             }
00861             if (0) {
00862 //                print "has (had) digits \"$b_anc_end\"\n";
00863             }
00864          }
00865          if ($b_anc_end =~ /^[A-Z]\.[\d]/) {
00866             if (0) {
00867 //                print "has (had) digits \"$b_anc_end\"\n";
00868             }
00869             while (($b_anc_end =~ /^[A-Z]\.[\d]/) 
00870                || ($b_anc_end =~ /^[\d+]\./) 
00871                || ($b_anc_end =~ /^\.[\d+]/)  ){
00872                if ($b_anc_end =~ /^[A-Z]\.[\d]/) {
00873                   @_remove = split (/^[A-Z]\./, $b_anc_end, 2);
00874                } elsif ($b_anc_end =~ /^[\d+]\./) {
00875                   @_remove = split (/^[\d+]\./, $b_anc_end, 2);
00876                } else {
00877                   @_remove = split (/^\.[\d+]/, $b_anc_end, 2);
00878                }
00879                
00880                $remove = shift (@_remove);
00881                $b_anc_end = join ("", @_remove);
00882             }
00883             while ($b_anc_end =~ /^[\s+]/) {
00884                $b_anc_end =~ s/^[\s]//;
00885             }
00886             if (0) {
00887 //                print "has (had) digits \"$b_anc_end\"\n";
00888             }
00889          }
00890          if ($b_anc_end =~ /^[\d+][\,]/) {
00891             while ($b_anc_end =~ /^[\d+][\,]/){
00892                @_remove = split (/^[\d+][\,]/, $b_anc_end, 2);
00893                $remove = shift (@_remove);
00894                $b_anc_end = join ("", @_remove);
00895             }
00896             while ($b_anc_end =~ /^[\s+]/) {
00897                $b_anc_end =~ s/^[\s]//;
00898             }
00899             if (0) {
00900 //                print "has (had) digits \"$b_anc_end\"\n";
00901             }
00902          }
00903          if ($b_anc_end =~ /^[\d+]&nbsp;/) {
00904             while ($b_anc_end =~ /^[\d+]/){
00905                @_remove = split (/^[\d+]&nbsp;/, $b_anc_end, 2);
00906                $remove = shift (@_remove);
00907                $b_anc_end = join ("", @_remove);
00908             }
00909             while ($b_anc_end =~ /^[\s+]/) {
00910                $b_anc_end =~ s/^[\s]//;
00911             }
00912             if (0) {
00913 //                print "has (had) digits \"$b_anc_end\"\n";
00914             }
00915          }
00916          if ($b_anc_end =~ /^[\s+]/) {
00917             $b_anc_end =~ s/^[\s+]//g;
00918             if (0) {
00919 //                print "has (had) digits \"$b_anc_end\"\n";
00920             }
00921          }
00922       }
00923       
00924       // ####
00925       //  If it doesn't have text, try to give it some alternate 
00926       //  text or return with 0;
00927       // ####
00928       if ($b_anc_end !~ /[\S+]/){
00929          //  See if the $piece contains alt text or an image
00930          if ((0) && ($remember =~ /[\w+]/)){
00931             $b_anc_end = $remember;
00932          } else {
00933             //  There was no valid text, so go and get next hyperlink
00934             next POTENTIAL_L;
00935          }
00936       }
00937 
00938       if (0) {
00939 //          print "Link text = $b_anc_end\n";
00940       }
00941 
00942 
00943       //  clean up the text from things like &nbsp
00944       $b_anc_end =~ s/\&nbsp\;/ /g;
00945       $b_anc_end =~ s/&nbsp;/ /g;
00946       $b_anc_end =~ s/^\s+//;
00947       $b_anc_end =~ s/\s+$//;
00948       
00949       $globe::org{$_file_4_links}{child}[$_l_cnt]{text}=$b_anc_end;
00950       
00951       // ####
00952       //  The children own their level rather than the owning file,
00953       //  so that they'll be displayed properly when
00954       //  referenced from multiple locations.
00955       // ####
00956       $globe::org{$_file_4_links}{child}[$_l_cnt]{level} = $_level;
00957       
00958       // ####
00959       //  URL comes from above.
00960       // ####
00961       $globe::org{$_file_4_links}{child}[$_l_cnt]{url}= "$href";
00962 
00963       if (0) {
00964 //          print "Link text = $globe::org{$_file_4_links}{child}[$_l_cnt]{text}\n";
00965 //          print "   Link url = $globe::org{$_file_4_links}{child}[$_l_cnt]{url}\n";
00966 //          print "   Link level = $globe::org{$_file_4_links}{child}[$_l_cnt]{level}\n";
00967 //          print "   Link parent = $_file_4_links\n";
00968       }
00969 
00970       // ####
00971       //  Should be the last thing; increment counter
00972       // ####
00973       $_l_cnt++;
00974    } //  for
00975    
00976    if (0) {
00977 //       print "org file: $_file_4_links with children\n";
00978       for ($d=0; $d <= $// {$globe::org{$_file_4_links}{child}}; $d++){
00979 //             print "  child $d $globe::org{$_file_4_links}{child}[$d]{url}\n";
00980       }
00981 //       print "  org file above: $_file_4_links\n";
00982 //       //  print "ending get_hyperlinks\n";
00983 //       //  exit(1);
00984    }
00985    return (1);
00986 
00987 } //  get_hyperlinks 
00988 
00989 
00990 //#############################################################################
00991 /** @fn int verify_link
00992  ** @brief Tests the potential link against various criteria to validate
00993  ** whether the link is of value.
00994  ** 
00995  ** @param potential_link Contains a fully qualified hyperlink.
00996  ** @param contains the owning file.
00997  ** 
00998  ** @return If a criteria is matched, this routine returns 0 (meaning
00999  ** uninteresting link. If not of the criteria sets off a flag, this then
01000  ** returns the potential link as validated.
01001  ** 
01002  ** Criteria that makes the link uninteresting are things like:
01003  ** linking to itself, up linking to a top-level entity, linking to
01004  ** an html topic that we want to exclude as being a child, linking to
01005  ** other children at the same level, etc.
01006  **
01007  ** @ingroup tp_nav
01008  **/
01009 // #############################################################################
01010 int verify_link  ( ) {
01011    #define $potential_link  $_[0]
01012    #define $owning_file  $_[1]
01013    #define $_href  ""
01014 
01015 
01016    if (0) {
01017 //       print "Entering verify_link with\n\t$potential_link\n\t$owning_file\n";
01018    }
01019    { //  saves removing indents
01020       { //  saves removing indents
01021          if ($potential_link) {
01022             //  strip out what we don't want, like leading ./
01023             #define $_rel  "\.\/"
01024             while ($potential_link =~ /^$_rel/){
01025 //             if (0) { print "\tbefore $potential_link "; }
01026                $potential_link =~ s/^$_rel//;
01027 //                if (0) { print "after $potential_link \n"; }
01028             }
01029             // ###
01030             //  VxWorks had some strange file names with ( and ), but
01031             //  escaped out with the characters below in the links.
01032             // ###
01033             if ($potential_link =~ /\%28/){
01034 //                if (0) { print "==== before $potential_link "; }
01035                $potential_link =~ s/\%28/\(/;
01036             }
01037             if ($potential_link =~ /\%29/){
01038                $potential_link =~ s/\%29/\)/;
01039 //                if (0) { print "after $potential_link \n"; }
01040             }
01041             if (0) {
01042 //                print "Hyperlink = $potential_link\n";
01043             }
01044             // ####
01045             //  Test for if link within same page
01046             //  If so, we don't want it
01047             // ####
01048             if ($potential_link =~ /\// /){
01049                //  See if there is any meat to this anchor
01050                @temp_sides = split (/\// /, $potential_link, 2);
01051                if ($temp_sides[0] !~ /[\w+]/){
01052                   //  If the left side has no meat, it means it is a link
01053                   //  within the page. We can safely ignore.
01054                   return (0);
01055                }
01056             }
01057             if (($potential_link =~ /mailto\:/i)
01058               || ($potential_link =~ /http\:\/\//i)
01059               || ($potential_link =~ /www\./i))
01060             {
01061                //  We can safely ignore.
01062                return (0);
01063             }
01064             if (0){
01065 //                print "href $potential_link and \n\t path $globe::org{$owning_file}{path}\n";
01066 //                //  exit(1);
01067             }
01068             
01069             
01070             // ####
01071             //  get rid of relative path markings
01072             // ####
01073             #define $path  $globe::org{$owning_file}{path}
01074             if (0) { 
01075 //                print "\n$potential_link and path before = $path \n";
01076             }
01077             #define $_href  $potential_link
01078             #define $_rel  "../"
01079             if ($_href =~ /^$_rel/){
01080                if (0) { 
01081 //                   print "\n$potential_link and path before = $path \n";
01082                }
01083                while ($_href =~ /^$_rel/){
01084                   $_href =~ s/$_rel//;
01085                   @_ath = split (/\//, $path);
01086                   pop (@_ath);
01087                   $path = join ("\/", @_ath);
01088                   if ($path !~ /\/$/){
01089                      $path .= "\/";
01090                   }
01091                } //  while
01092                if (0) { 
01093 //                   print "$_href at ppath after = $path\n";
01094 //                   //  exit(1);
01095                } 
01096                $potential_link = $_href; 
01097             } //  if
01098             // ####
01099             //  Test for targets and removing links that are to the
01100             //  same file
01101             // ####
01102          if (1) {
01103             if ($potential_link =~ /\// /) {
01104                @_ath = split (/\// /, $potential_link, 2);
01105                $_href = $_ath[0];
01106             } else {
01107                $_href = $potential_link;
01108             }
01109             
01110             if (($owning_file =~ /$_href/) && ($owning_file =~ /$potential_link/)) {
01111                //  Don't add hyperlinks that take you back to the same file
01112                //  However, if the link has targets, then it is okay
01113                if (0) {
01114 //                   print "test1 this=$_href against that=$owning_file \n";
01115                }
01116                return (0);
01117             }
01118          }  //  if (0)
01119             // ####
01120             //  Rebuild a true link file name
01121             // ####
01122             $potential_link = "$path$potential_link";
01123             
01124             // ####
01125             //  Don't add hyperlinks back to itself.
01126             // ####
01127             if ($globe::org{$owning_file}{url} =~ /$potential_link/){
01128                return (0);
01129             }
01130             
01131             // ####
01132             //  Don't add duplicate hyperlinks from other children at that level.
01133             // ####
01134             for ($r = 0; $r <= $// {$globe::org{$owning_file}{child}}; $r++) {
01135                if ($globe::org{$owning_file}{child}[$r]{url} =~ /$potential_link/){
01136                   if (0){
01137 //                      print "RAN INTO SOME DUPLICATION; elimination\n";
01138 //                      exit(1);
01139                   }
01140                   return (0);
01141                }
01142             }
01143 
01144             // ####
01145             //  Don't add hyperlinks that go to files with these names
01146             // ####
01147             foreach $_href (@xscope::gen_ex_child){
01148                if (0) {
01149 //                   print "verifying against $_href,\n\t $potential_link\n";
01150                }
01151                if (($_href =~ /$potential_link/) || ($potential_link =~ /$_href/)) {
01152                   if (0){
01153 //                      print "Not adding child link to top level $potential_link.\n";
01154 //                      print "Not validating $potential_link\n";
01155                   }
01156                   return (0);
01157                }
01158             } //  foreach #_href
01159 
01160 
01161             
01162             // ####
01163             //  Don't add hyperlinks that go to one of the top-level to-be-excluded files
01164             // ####
01165             foreach $_href (@xscope::ex_as_child){
01166                if (0) {
01167 //                   print "verifying against $_href,\n\t $potential_link\n";
01168                }
01169                if (($_href =~ /$potential_link/) || ($potential_link =~ /$_href/)) {
01170                   if (0){
01171 //                      print "Not adding child link to top level $potential_link.\n";
01172 //                      print "Not validating $potential_link\n";
01173                   }
01174                   return (0);
01175                }
01176             } //  foreach #_href
01177             if (0) {
01178 //                print "verified $potential_link\n";
01179 //                //  exit(1);
01180             }
01181             
01182             // ####
01183             //  If we made it this far, then we can return the modified potential_link
01184             // ####
01185             return ($potential_link);
01186             
01187          } //  if for potential_link
01188 
01189       } //  saves removing indents
01190    } //  saves removing indents            
01191 } //  verify_link 
01192 
01193 
01194 
01195 //#############################################################################
01196 /** @fn int spider_trace
01197  ** @brief Traces the child hyperlinks of a starting file and places them
01198  ** into the data structure.
01199  **
01200  ** @param start_file The starting HTML file to trace.
01201  **
01202  ** This calls routines to open and read the children HTML files and to locate the
01203  ** appropriate hyperlinks within those files.
01204  **
01205  ** @lim This assumes that the data structure has already been started by
01206  ** having a root file, by having already opened it and acquired its children.
01207  **
01208  ** @ingroup tp_nav
01209  **/
01210 // #############################################################################
01211 int spider_trace  ( ) {
01212    #define $start_file  $_[0]
01213    #define $base_level  $_[1]
01214    #define $new_level  0
01215    #define $_f_cnt  1 //  set so that loop will start
01216    #define $new_file_entry
01217    #define %to_do
01218    
01219    if (1) {
01220 //       print "spider trace $start_file\n";
01221    } else {
01222 //       print "$globe_file_cnt...";
01223    }
01224    $globe_file_cnt++;
01225 
01226    // ####
01227    //  New level should be one more than current level
01228    // ####
01229    $new_level = $base_level + 1;
01230        
01231 
01232 
01233    if (exists ($globe::org{$start_file}{o_level})) {
01234       $globe::org{$start_file}{o_level} .= "...$base_level";
01235    } else {
01236       $globe::org{$start_file}{o_level} = $base_level;
01237    }
01238    { //  FILLER BRACKET
01239       for ($i = 0; $i <= $// {$globe::org{$start_file}{child}}; $i++) {
01240          //  strip out any targets inside of a file...
01241          if ($globe::org{$start_file}{child}[$i]{url} =~ /\// /){
01242             @_name = split (/\// /, $globe::org{$start_file}{child}[$i]{url}, 2);
01243             //  $new_file_entry .= @_name[0];
01244             $new_file_entry = @_name[0];
01245          } else {
01246            //  child url name is good to go
01247            $new_file_entry = $globe::org{$start_file}{child}[$i]{url};
01248          }
01249       
01250          if (0) {
01251 //             print "Child url $globe::org{$start_file}{child}[$i]{url}\n";
01252 //             print "Child1 $new_file_entry\n";
01253          }
01254          // ####
01255          //  Add this child to a list for later creation if it doesn't exist.
01256          // ####
01257          if ((1) && (!(exists ($globe::org{$new_file_entry})))) {
01258             // ####
01259             //  The entry contains the path without any targets.
01260             //  The url contains the path with targets.
01261             // ####
01262             $to_do{$new_file_entry} = $globe::org{$start_file}{child}[$i]{url};
01263 
01264             // ####
01265             //  Set up the path to be used when generating links
01266             // ####
01267             @_name = split (/\//, $new_file_entry);
01268             pop (@_name);  //  This is the filename, which we don't want in path.
01269             $globe::org{$new_file_entry}{path} = join ("\/", @_name, "");
01270             // ####
01271             //  The entry contains the path without any targets.
01272             //  The url contains the path with targets.
01273             // ####
01274             $globe::org{$new_file_entry}{url} = $to_do{$new_file_entry}; //  should have the url
01275             
01276             // ####
01277             //  Read in file and get its child hyperlinks
01278             // ####
01279             if (&get_input_file ($new_file_entry)) {
01280                //  good
01281                if (0) {
01282 //                   print "after input title = $globe::org{$new_file_entry}{title}\n";
01283                }
01284                if (!&get_hyperlinks ($new_file_entry, $new_level)) {
01285                   //  bad; did not succeed getting hyperlinks from file
01286                   push (@file_errors, "No hyperlinks found in $new_file_entry.");
01287 //                   print ("\nNo hyperlinks found in $new_file_entry\n");
01288                }
01289                if (0) {
01290 //                   print "base $base_level new $new_level for all children\n";
01291 //                   print "\tafter hyperlink title = $globe::org{$new_file_entry}{title}\n";
01292 //                   print "\t$new_file_entry\n";
01293                }
01294             } else {
01295                //  bad
01296                //  Messaging turned off because trapped elsewhere.
01297                if (0) {
01298                   push (@file_errors, "Could not find \"$new_file_entry\" !!");
01299 //                   print ("\nCould not find $new_file_entry !!!?\n");
01300                }
01301             } //  get_input_file
01302                
01303           } //  if ((1) && (!(exists ($globe::org{$new_file_entry}))))
01304        } //  for ($i = 0;...
01305 
01306        if (0) {
01307 //           print "======== to be traced\n";
01308           foreach $new_file_entry (keys %to_do) {
01309 //               print "key: $new_file_entry\n";
01310 //               print "value: $to_do{$new_file_entry}\n";
01311           }
01312        }
01313        
01314 
01315        
01316        foreach $new_file_entry (keys %to_do) {
01317           if ($to_do{$new_file_entry}) {
01318             // ####
01319             //  if new entry doesn't exist; make it exist
01320             //  Handle its title and path
01321             // ####
01322             
01323             // ####
01324             //  call this routine again with that new file as the starting point
01325             //  RECURSIVE CALL
01326             // ####
01327             if (0) {
01328 //                print "Recursive call.\n  owner = $start_file\n  with $new_file_entry\n";
01329 //                print "   path $globe::org{$new_file_entry}{path}\n  title = $globe::org{$new_file_entry}{title}; \n";
01330             }
01331             &spider_trace($new_file_entry, $new_level);
01332             if (0) {
01333 //                print "  return from spider with $new_file_entry\n";
01334 //                print "  title = $globe::org{$new_file_entry}{title}; \n";
01335             }
01336             
01337             $to_do{$new_file_entry} = 0;
01338           }  //  if (!($to_do{$new_file_entry}))
01339             
01340           
01341        }  //  foreach $new_file_entry
01342 
01343    }   //  FILLER BRACKET   
01344          
01345    // undef (%to_do);   
01346    
01347    if (0) {
01348       foreach $in_file (sort keys %{globe::org}) {
01349 //          print "debug file $in_file\n";
01350 //          print "  g title $globe::org{$in_file}{title}\n";
01351       }
01352    }
01353    
01354    return (1);
01355 
01356 } //  spider_trace
01357 
01358 
01359 //#############################################################################
01360 /** @fn int testing_structure
01361  ** @brief Traces through the created data structure.
01362  ** 
01363  ** @param begin_file is the starting point to begin tracing.
01364  ** 
01365  ** This assumes that the begin_file is present in the data structure and
01366  ** has children to trace through.
01367  ** 
01368  ** @note This is a RECURSIVE algorithm and can be used as a template for other
01369  ** recursive things involving the same data structure.
01370  **
01371  ** @ingroup tp_nav
01372  **/
01373 // #############################################################################
01374 int testing_structure  ( ) {
01375    #define $begin_file  $_[0]
01376    #define $child
01377    #define $loc_title
01378    #define $f
01379    if (0) {
01380 //       print "test_file $begin_file\n$globe::org{$begin_file}{title}\n";
01381    }
01382    
01383    for ($f = 0; $f <= $// {$globe::org{$begin_file}{child}}; $f++){
01384       $child = $globe::org{$begin_file}{child}[$f]{url};
01385       if ($child =~ /\// /){
01386          @_name = split (/\// /, $child, 2);
01387          $child = $_name[0];
01388       }
01389       if (1) {
01390 //          print "  child level=$globe::org{$begin_file}{child}[$f]{level} $globe::org{$begin_file}{child}[$f]{text}\n";
01391 //          print "      p level=$globe::org{$begin_file}{o_level} $globe::org{$begin_file}{title}\n";
01392 //          //  print "     parent $begin_file\n";
01393 //          //  print "  $globe::org{$begin_file}{child}[$f]{url}\n";
01394       }
01395       if ($globe::org{$begin_file}{child}[$f]{level} > $globe::_last_level + 1) {
01396 //          print "DANGER WILL ROBINSON!!! DANGER!!!\n";
01397 //          print "child $globe::org{$begin_file}{child}[$f]{url}\n";
01398 //          print "parent $begin_file\n";
01399 //          print "parent level $globe::org{$begin_file}{o_level} \nchild level $globe::org{$begin_file}{child}[$f]{level}\n";
01400 //          print "parent has children, who are:\n";
01401          for ($b = 0; $b <= $// {$globe::org{$begin_file}{child}}; $b++) {
01402 //             print "\t $b: $globe::org{$begin_file}{child}[$b]{text}";
01403 //             print "; level $globe::org{$begin_file}{child}[$b]{level}\n";
01404          }
01405 //          print "===============\n";
01406          
01407          // ####
01408          //  Fixing the level
01409          // ####
01410          //  $globe::org{$begin_file}{child}[$f]{level}--;
01411 //          print "Fixing the level\n";
01412 //          print "parent level $globe::org{$begin_file}{o_level} \nchild level $globe::org{$begin_file}{child}[$f]{level}\n";
01413 //          print "===============\n";
01414          
01415 //          //  exit(1);
01416       } 
01417       $globe::_last_level = $globe::org{$begin_file}{child}[$f]{level};
01418       if (! (exists ($globe_all_files{$child}) )) {
01419          $globe_all_files{$child} = 1;
01420          $globe_file_cnt++;
01421          // ####
01422          //  Recursive Call
01423          // ####
01424          &testing_structure ($child);
01425       }
01426       
01427    } //  for each child
01428 
01429    return (1);
01430 
01431 } //  testing_structure
01432 
01433 //#############################################################################
01434 /** @fn int output_structure_script
01435  ** @brief The starting point for outputting the TOC script files.
01436  **
01437  ** @param _in_file / htree_file the prefix of the name of the script file to output.
01438  ** @param trace_start_file starting point in the data structure that has
01439  ** children to trace through.
01440  ** @param globe_path_purge the key into the $globe::nav_path_purge hash,
01441  ** which contains paths to purge from the output hyperlinks in the TOC, etc.
01442  ** 
01443  ** @note This calls script_structure which is a recursive routine that traces through
01444  ** the data structure.
01445  **
01446  ** @ingroup tp_nav
01447  **/
01448 // #############################################################################
01449 int output_structure_script  ( ) {
01450    #define $_in_file  $_[0]
01451    #define $htree_file  $_[0]
01452    #define $trace_start_file  $_[1]
01453    #define $globe_path_purge  $_[2] //  used globally by subsequent routines
01454    // undef (%globe_found_files);  //  Used in a global way
01455    #define $s_path  ""
01456    $globe_file_cnt = 0;
01457    
01458    if (($htree_file =~ /(\.html)$/i) || ($htree_file =~ /(\.htm)$/i)){
01459       @_name = split (/\.htm/i, $htree_file, 2);
01460       $htree_file = $_name[0];
01461    }
01462    $htree_file .= ".script";
01463    
01464    if (1) {
01465 //       print "The output TOC script file is \"$htree_file\".\n";
01466 //       print "The portion of the path to purge from hyperlinks is \"$globe::nav_purge_path{$globe_path_purge}\".\n";
01467    }
01468 
01469    unless (open(OUT_SCRIPT, ">$htree_file")) {
01470       push (@file_errors, "Cannot open file \"$htree_file\".");
01471       return(0);
01472    }
01473    // #################################################
01474    //  Root level of tree.
01475    // #################################################
01476    ($okay, $s_path) = &purge_full_path ($trace_start_file,  $globe::nav_purge_path{$globe_path_purge});
01477    if (0) {
01478 //       print "before $trace_start_file\nokay=$okay after $s_path\n";
01479    }
01480 
01481    { //  filler bracket
01482       // ####
01483       //  This top title is part of the master and does not need to be
01484       //  part of this.
01485       // ####
01486 //       print (OUT_SCRIPT "Item level=1 ");
01487 //       print (OUT_SCRIPT "image=nav_doc.gif ");
01488 //       print (OUT_SCRIPT "url=$s_path,basefrm ");
01489 //       print (OUT_SCRIPT "selected=YES ");
01490 //       print (OUT_SCRIPT "text=$globe::org{$trace_start_file}{title}\r\n");
01491    } //  filler bracket
01492    
01493    // #####
01494    //  Starting routine that can get called recursively
01495    // #####
01496 //    print "$globe_file_cnt Starting the building of scripts...\n";
01497    &script_structure($trace_start_file);
01498 //    print "\n  $globe_file_cnt Ending the building of scripts...\n";
01499 
01500 
01501 
01502    // #################################################
01503    //  Clean up navigation.
01504    // #################################################
01505 //    close (OUT_SCRIPT);
01506    // undef (%globe_found_files); 
01507    return (1);
01508 } //  output_structure_script
01509 
01510 
01511 //#############################################################################
01512 /** @fn int purge_full_path
01513  ** @brief Changes the file name by purging a portion of its path.
01514  ** 
01515  ** @param _filename original filename.
01516  ** @param part2rm portion of the path in the filename to remove.
01517  ** 
01518  ** @return 1 and the updated filename if successful; 0 and the unmodified
01519  ** filename if unsuccessful.
01520  **
01521  ** @ingroup tp_nav
01522  **/
01523 // #############################################################################
01524 int purge_full_path  ( ) {
01525    #define $_filename  $_[0]
01526    #define $part2rm  $_[1]
01527    
01528    if ($_filename =~ /$part2rm/){
01529       $_filename =~ s/$part2rm//;
01530       return (1, $_filename);  
01531    } else {
01532       return (0, $_filename);  
01533    }
01534 } //  purge_full_path
01535 
01536 
01537 //#############################################################################
01538 /** @fn int starting_point_script
01539  ** @brief Creates the top level TOC script that uses subscript references.
01540  ** 
01541  ** @param htree_file The prefix of the script file to be output.
01542  ** @param which_script The key into the hash for whether we're creating a
01543  ** "script_#define" or "script_master" script. This had the appropriate file names.
01544  ** @param which_purge_path The key into the hash for the appropriate purge
01545  ** path depending upon whether or not this is a "#define" or "master" version.
01546  ** 
01547  ** This knows about the top level entries and what they should be named.
01548  ** The names have been added to the appropriate hash. It can generate the
01549  ** master script that calls subscripts depending upon which version we're
01550  ** doing.
01551  ** 
01552  ** The difference between a #define and master version are the paths for links.
01553  ** Normally, the other tools would handle this for me. However, because this
01554  ** is starting out already as a nested version, it does not integrate well.
01555  ** It is better to have this take care of all entries appropriately so that
01556  ** the other tools only has to integrate the top level script (through
01557  ** a master_nav file).
01558  ** 
01559  ** @ingroup tp_nav
01560  **/
01561 // #############################################################################
01562 int starting_point_script  ( ) {
01563    #define $htree_file  $_[0]
01564    #define $which_script  $_[1]
01565    #define $which_purge_path  $_[2]
01566    
01567    if (($htree_file =~ /(\.html)$/i) || ($htree_file =~ /(\.htm)$/i)){
01568       @_name = split (/\.htm/i, $htree_file, 2);
01569       $htree_file = $_name[0];
01570    }
01571    $htree_file .= ".script";
01572 
01573    if ((0) && ($htree_file =~ /\//)) {
01574       @_name = split (/\//, $htree_file);
01575       $_name[$// _name] = "tree_$_name[$#_name]";
01576       $htree_file = join ("\/", @_name);
01577    }
01578    
01579 //    print "The new output TOC script file is \"$htree_file\".\n";
01580 
01581    unless (open(OUT_SCRIPT, ">$htree_file")) {
01582       push (@file_errors, "Cannot open file \"$htree_file\".");
01583       return(0);
01584    }
01585 
01586    if ($which_purge_path =~ /master/) {
01587       //  The master level doesn't need a top level
01588    } else {
01589       //  $globe::top_most_level_title
01590       ($okay, $strip_p) = &purge_full_path ($globe::top_most_level{url}, $globe::nav_purge_path{$which_purge_path});
01591 //       print (OUT_SCRIPT "Item level=1 ");
01592 //       print (OUT_SCRIPT "image=nav_folderclosed.gif ");
01593 //       //  print (OUT_SCRIPT "url=$strip_p ");
01594 //       print (OUT_SCRIPT "text=$globe::top_most_level{title}\r\n");
01595    }
01596    
01597 
01598    $i = 0;
01599    foreach $_in_file (@xscope::top_files){
01600       $globe::org{$_in_file}{title} =~ s/^[\s+]//;  //  get rid of leading spaces
01601       if ($which_purge_path =~ /master/) {
01602 //          print (OUT_SCRIPT "Item level=1 ");
01603       } else {
01604 //          print (OUT_SCRIPT "Item level=2 ");
01605       }
01606 //       print (OUT_SCRIPT "image=nav_folderclosed.gif ");
01607 //       print (OUT_SCRIPT "subscript=$globe::org{$_in_file}{$which_script}.script ");
01608 //       print (OUT_SCRIPT "selected=YES ");
01609 //       print (OUT_SCRIPT "text=$globe::org{$_in_file}{title}\r\n");
01610       if (0) {
01611 //          print "$i $_in_file\n\t$globe::org{$_in_file}{title}\n";
01612       }
01613       $i++;
01614    }
01615 //    close (OUT_SCRIPT);
01616 
01617 } //  starting_point_script
01618 
01619 
01620 
01621 
01622 
01623 //#############################################################################
01624 /** @fn int script_structure
01625  ** @brief Traces through the data structure following the children and
01626  ** creates entries into the script file for the TOC.
01627  ** 
01628  ** @param begin_file is the point to start in the data structure.
01629  ** 
01630  ** For every begin_file, this outputs entries into the already open
01631  ** OUTSCRIPT file for each of its children.
01632  ** 
01633  ** Whenever a child is encountered, it is tested (using globe_found_files)
01634  ** to see if it has already been processed. This prevents it looping forever.
01635  ** 
01636  ** If a given child has not been traced, this calls itself with that child.
01637  ** 
01638 //  ** It was deemed desireable to still print out children entries even if
01639  ** we weren't going to trace them (because they already had been traced.)
01640  ** 
01641  ** @note This is a RECURSIVE routine. It stops when all children at
01642  ** that level have had appropriate entries made and none have
01643  ** been flagged to be traced.
01644  **
01645  ** @lim $globe_found_files{$begin_file} is used in a global way. 
01646  ** $globe::nav_purge_path is also a global hash. The key into the hash
01647  ** ($globe_path_purge) was defined a level higher and used globally.
01648  ** 
01649  ** @ingroup tp_nav
01650  **/
01651 // #############################################################################
01652 int script_structure  ( ) {
01653    #define $begin_file  $_[0]
01654    //  $globe_path_purge is global from calling routine, so we don't have
01655    //  to remember it.
01656    #define $child
01657    #define $strip_p  ""
01658    #define $trace  0
01659    #define $f
01660    #define $_lev
01661    
01662    if (0) {
01663 //       print "script_structure $begin_file\n$globe::org{$begin_file}{title}\n";
01664    } else {
01665       //  purposely not terminated; shows that it is working
01666 //       print "$globe_file_cnt.";
01667    }
01668    
01669    
01670    for ($f = 0; $f <= $// {$globe::org{$begin_file}{child}}; $f++){
01671       ($okay, $strip_p) = &purge_full_path ($globe::org{$begin_file}{child}[$f]{url}, $globe::nav_purge_path{$globe_path_purge});
01672       
01673 //       //  okay to still print out the children even if we're not going to 
01674       //  trace them.
01675       // ####
01676       //  We were getting wrong levels; now we assure that children are always only
01677 //       //  incremented one from the parent printing them out.
01678       // ####
01679 
01680 //       print (OUT_SCRIPT "Item level=$globe::org{$begin_file}{child}[$f]{level} ");
01681 //       print (OUT_SCRIPT "image=nav_doc.gif ");
01682 //       print (OUT_SCRIPT "url=$strip_p,basefrm ");
01683 //       print (OUT_SCRIPT "selected=YES ");
01684 //       print (OUT_SCRIPT "text=$globe::org{$begin_file}{child}[$f]{text}\r\n");
01685 
01686       $child = $globe::org{$begin_file}{child}[$f]{url};
01687       if ($child =~ /\// /){
01688          @_name = split (/\// /, $child, 2);
01689          $child = $_name[0];
01690       }
01691       if (! (exists ($globe_found_files{$child}) )){
01692          $globe_found_files{$child} = 1;
01693          $globe_file_cnt++;
01694          // ####
01695          //  Recursive Call
01696          // ####
01697          &script_structure ($child);
01698       }
01699       
01700    } //  for each child
01701 
01702    return (1);
01703 
01704 } //  script_structure
01705 
01706 
01707 
01708 //#############################################################################
01709 /** @fn int handle_index_tokens
01710  ** @brief Traces through the data structure and outputs appropriate entries
01711  ** for the index.
01712  ** 
01713  ** @param begin_file The starting point in the structure.
01714  ** 
01715  ** @note This is a RECURSIVE routine. It creates appropriate entries for each
01716  ** of the children into the globe::index_info data structure, which later
01717  ** gets output to a file.
01718  **
01719  ** @lim Does a convoluted thing to get the path to be truly integrated
01720  ** into the system. Also, it creates a helluva lot of entries for BSP (4022)
01721  ** before any word-chunking might be considered.
01722  **
01723  ** @ingroup tp_nav
01724  **/
01725 // #############################################################################
01726 int handle_index_tokens  ( ) {  
01727 
01728    #define $begin_file  $_[0]
01729    #define $child
01730    #define $f
01731 
01732    if (0) {
01733 //       print "handle_index_tokens $begin_file\n$globe::org{$begin_file}{title}\n";
01734    }
01735    
01736    HONEY_CHILE: for ($f = 0; $f <= $// {$globe::org{$begin_file}{child}}; $f++){
01737       $child = $globe::org{$begin_file}{child}[$f]{url};
01738       if ($child =~ /\// /){
01739          @_name = split (/\// /, $child, 2);
01740          $child = $_name[0];
01741       }
01742       if (0) {
01743 //          print "  child $globe::org{$begin_file}{child}[$f]{text}";
01744 //          print "  $globe::org{$begin_file}{child}[$f]{url}\n";
01745       }
01746       // ####
01747       //  
01748       // ####
01749       $path_temp  $globe::nav_purge_path{#define}
01750       if ($globe::org{$begin_file}{child}[$f]{url} =~ /^\//) {
01751          //  if the child begins with a slash, then remove the slash
01752          //  from the path temp before combining the two.
01753          $path_temp = s/\/$//;
01754       }
01755       $path_temp .= $globe::org{$begin_file}{child}[$f]{url};
01756       ($okay, $strip_p) = &purge_full_path ($path_temp, $globe::nav_purge_path{"master"});
01757       
01758       $n_temp = $globe::org{$begin_file}{child}[$f]{text};
01759 
01760       $n_temp .= $globe::word_url_boundary;
01761       $n_temp .= "<a href=\"";
01762       $n_temp .= $strip_p;
01763       $n_temp .= "\" target=\"basefrm\">";
01764 
01765       // ####
01766       //  This is an attempt at efficiency.
01767       //  It adds duplicates to the list, but they can be removed later,
01768       //  rather than constantly looping within this already expensive
01769       //  loop and recursive routine.
01770       // ####
01771       push (@globe::index_info, $n_temp);
01772       
01773       if (! (exists ($globe_all_files{$child}) )) {
01774          $globe_all_files{$child} = 1;
01775          $globe_file_cnt++;
01776          // ####
01777          //  Recursive Call
01778          // ####
01779          &handle_index_tokens ($child);
01780       }
01781       
01782    } //  for each child
01783    return (1);
01784    
01785 } //  handle_index_tokens   
01786 
01787 
01788 
01789 //#############################################################################
01790 /** @fn int html_file_update
01791  ** @brief Traces through the data structure, removes tagged sections, and
01792  ** inserts new tagged sections with navigation information and copyright
01793  ** stuff.
01794  ** 
01795  ** @param begin_file The starting point in the structure.
01796  ** 
01797  ** @note This is a RECURSIVE routine. It opens each HTML file and writes it
01798  ** back out.
01799  **
01800  ** @ingroup tp_nav
01801  **/
01802 // #############################################################################
01803 int html_file_update  ( ) {  
01804    #define $begin_file  $_[0]
01805    #define $child
01806    #define $f
01807 
01808    if (1) {
01809 //       //  print "Updating HTML file $begin_file\n   $globe::org{$begin_file}{title}\n";
01810 //       print "Updating HTML file $begin_file\n";
01811    }
01812 
01813    unless (open ( HTML_FILE, "$begin_file")) {
01814        push (@file_errors, "Cannot open file \"$begin_file\"");
01815 //        exit(1);
01816    }
01817    
01818    // undef ($globe::entire_file);
01819    //  read in entire file
01820    while (<HTML_FILE>){
01821       $globe::entire_file .= $_;
01822    }
01823 //    close (HTML_FILE);
01824    
01825    //  Change the tags inside the file
01826    if (&change_nav){
01827       //  it assumes $globe::entire_file
01828       //  assume success for now
01829    }
01830 
01831    //  Write out entire file after changing the tags inside the file.
01832    unless (open ( HTML_FILE, ">$begin_file")) {
01833        push (@file_errors, "Cannot open file \"$begin_file\"");
01834        return (0);
01835    }
01836 //    print (HTML_FILE "$globe::entire_file");
01837 //    close (HTML_FILE);
01838    
01839    for ($f = 0; $f <= $// {$globe::org{$begin_file}{child}}; $f++){
01840       $child = $globe::org{$begin_file}{child}[$f]{url};
01841       if ($child =~ /\// /){
01842          @_name = split (/\// /, $child, 2);
01843          $child = $_name[0];
01844       }
01845       if (0) {
01846 //          print "  lil chile $globe::org{$begin_file}{child}[$f]{text}";
01847 //          print "  $globe::org{$begin_file}{child}[$f]{url}\n";
01848       }
01849       
01850       if (! (exists ($globe_all_files{$child}) )) {
01851          $globe_all_files{$child} = 1;
01852          $globe_file_cnt++;
01853          // ####
01854          //  Recursive Call
01855          // ####
01856          &html_file_update ($child);
01857       }
01858    } //  for each child
01859 
01860    return (1);
01861    
01862 } //  html_file_update   
01863 
01864 //#############################################################################
01865 /** @fn int change_nav
01866  ** @brief Removes existing tags in the file and inserts new tags with the
01867  ** content from the master files.
01868  ** 
01869  ** @return This updates the globe::entire_file with new information.
01870  ** 
01871  ** @note This is different than the change_nav in the voyant_nav.pl
01872  ** program.
01873  **
01874  ** @ingroup tp_nav
01875  **/
01876 // #############################################################################
01877 int change_nav  ( ) {  
01878    if (0){
01879 //       print "... changing the navigation.\n";
01880    }
01881    
01882    // ####
01883    //  Make sure our input HTML file is well formed
01884    // ####
01885    $globe::entire_file =~ s/\<\/Head\>/\<\/head\>/g;
01886    $globe::entire_file =~ s/\<\/HEAD\>/\<\/head\>/g;
01887    $globe::entire_file =~ s/\<Head\>/\<head\>/g;
01888    $globe::entire_file =~ s/\<HEAD\>/\<head\>/g;
01889    $globe::entire_file =~ s/\<\/Body/\<\/body/g;
01890    $globe::entire_file =~ s/\<\/BODY/\<\/body/g;
01891    $globe::entire_file =~ s/\<Body/\<body/g;
01892    $globe::entire_file =~ s/\<BODY/\<body/g;
01893    $globe::entire_file =~ s/\<Link rel\=/\<link rel\=/g;
01894    $globe::entire_file =~ s/\<Link Rel\=/\<link rel\=/g;
01895    $globe::entire_file =~ s/\<LINK REL\=/\<link rel\=/g;
01896    $globe::entire_file =~ s/\<Link/\<link/g;
01897    $globe::entire_file =~ s/\<LINK/\<link/g;
01898    
01899    
01900    //  Remove any style sheets that might be hanging out.
01901    //  get rid of any old tags.
01902 
01903    while ($globe::entire_file =~ /\<link/){
01904       ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file,
01905                               "<link", ">", 0, 1);
01906       if ($piece) {
01907          $globe::entire_file = join ("", $before, $after);
01908       }
01909    }
01910    while ($globe::entire_file =~ /\<body/){  
01911       ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file,
01912                               "<body", ">", 0, 1);
01913       if (1) {
01914          $globe::entire_file = join ("", $before, $after);
01915       }
01916    }
01917    if ($globe::entire_file =~ /\<\/head>/){  //  insert a <body> tag
01918       ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file,
01919                               "</head", ">", 0, 1);
01920       if (1) {
01921          $globe::entire_file = join ("", $before, "</head><body>",$after);
01922       }
01923    }
01924    
01925    
01926    //  Remove any style sheets that might be hanging out.
01927    //  get rid of any old tags.
01928    &remove_voyant_tags;
01929 
01930    
01931    if (0) {
01932 //       print "Finishing up here... premature.\n";
01933 //       //  print $globe::entire_file;
01934 //       print (OUT_HTML "$globe::entire_file");
01935 //       close (OUT_HTML);
01936       system ("cp $out_file $in_file");
01937       return (0);
01938    }
01939 
01940    
01941    // #################
01942    //  voyant header needs to go before </head>
01943    // #################
01944    $not_critical = 1;
01945    $reinsert_tags = 0;
01946    $def_type = "header";
01947    if (! &file_chunk_change ($def_type, $not_critical, "</head>", 0, $reinsert_tags, $in_file) ) {
01948       //  Could not put proper chunk in; write out what we have an abort.
01949 //       //  print (OUT_HTML "$globe::entire_file");
01950 //       close (OUT_HTML);
01951       if (0){ //  debug
01952 //           print "glenn3\n$globe::entire_file";
01953 //           exit(1);
01954       } //  debug
01955       return(0);
01956    } //  voyant_header
01957 
01958    if (0){ //  debug
01959 //       print "glenn4\n$globe::entire_file";
01960 //       print "oh $def_type $globe::m_info{$def_type}\noh again\n";
01961 //       exit(1);
01962    } //  debug
01963  
01964 
01965    // #################
01966    //  voyant footer needs to go before </body>
01967    // #################
01968    $not_critical = 1;
01969    $reinsert_tags = 0;
01970    $def_type = "footer";
01971    if ($globe::entire_file =~ /\<\/body\>/) { 
01972       &file_chunk_change ($def_type, $not_critical, "<\/body>", 0, $reinsert_tags, $in_file);
01973       //  Could not put proper chunk in; write out what we have an abort.
01974    } else {
01975       //  Does not have proper body
01976       push (@file_errors, "ERROR: Does not have </body> in $in_file.");
01977       return (0);
01978    } //  voyant footer
01979 
01980 
01981    // #################
01982    //  voyant common top navigation bar should go after <body>
01983    //  Do AFTER FM or Doxygen navigation
01984    // #################
01985    $not_critical = 0;
01986    $reinsert_tags = 0;
01987    $def_type = "nav_common";
01988 
01989    if (! &file_chunk_change ($def_type, $not_critical, "<body>", 1, $reinsert_tags, $in_file) ) {
01990       //  Could not put proper chunk in; 
01991       //  Not necessarily a problem.
01992    } //  Common Navigation
01993 
01994    if (0){ //  debug
01995 //       print "glenn4\n$globe::entire_file";
01996 //       print "oh $def_type $globe::m_info{$def_type}\noh again\n";
01997 //       exit(1);
01998    } //  debug
01999 
02000    
02001    
02002    // #################
02003    //  Handle the PDF file names
02004    // #################
02005    $temp = "$globe::rel_path_to_start_point$globe::pdf_dir$globe::master_order_pdf[$globe::master_order_key]";
02006    $globe::entire_file =~ s/$globe::this_pdf_flag/$temp/g;
02007 
02008    // #################
02009    //  Handle the relative path
02010    //  glenn
02011    // #################
02012    //  $globe::rel_path_to_start_point_def = "../";
02013    //  $globe::rel_path_to_start_point = "./";
02014    if (0) {
02015 //       print "getting rid of path $begin_file\n";
02016    }
02017    $_t_path = &purge_full_path ($begin_file, $globe::nav_purge_path{master});
02018    @_a_path = split (/\//, $_t_path);
02019    pop (@_a_path); //  because last element is file name
02020    $globe::rel_path_to_start_point = "";
02021    for ($t=0; $t<=$// _a_path; $t++) {
02022       $globe::rel_path_to_start_point .= "..\/";
02023    }
02024    // undef (@_a_path);
02025    // undef ($_t_path);
02026    if (0) {
02027 //       print "new $_t_path\n$globe::rel_path_to_start_point\n";
02028    }
02029 
02030    $globe::entire_file =~ s/$globe::rel_path_to_start_point_def/$globe::rel_path_to_start_point/g;
02031    
02032    // #################
02033    //  Handle the manual or group names
02034    // #################
02035    $temp = $globe::master_order_title[$globe::master_order_key];
02036    $temp =~ s/[\s]+/\&nbsp\;/g;
02037    
02038    if ($globe::entire_file =~ /$globe::xmanual/){
02039       $globe::entire_file =~ s/$globe::xmanual/$temp/g;
02040    } 
02041    if ($globe::entire_file =~ /$globe::xgroup/){
02042       $globe::entire_file =~ s/$globe::xgroup/$temp/g;
02043    }
02044    $globe::entire_file =~ s/$globe::fix_path_flag/$globe::fix_path_to/g;
02045    
02046    // ####
02047    //  Take care of current file name which might be in header,
02048    //  as specified in template and needed in tree applet.
02049    // ####
02050    if ($globe::entire_file =~ /$globe::m_var{define}{curr2}/){
02051       $globe::entire_file =~ s/$globe::m_var{define}{curr2}/$rel_to_file/g;
02052    } 
02053    
02054 
02055    // #################
02056    //  Handle the document number
02057    // #################
02058    if ($globe::entire_file =~ /$globe::voy_variable_doc_num/){
02059       $globe::entire_file =~ s/$globe::voy_variable_doc_num/$globe::m_info{_array_order}[$globe::master_order_key]{num}/;
02060    } 
02061    
02062    
02063    // ################################
02064    //  Remove certain specified HTML tags
02065    // ################################
02066    if ((@globe::voy_html_zap < 1) || ($globe::voy_html_zap[0] =~ /none/)){
02067       //  Do nothing
02068 //       //  print "$in_file has no tags to zap.\n";
02069    } else {
02070       //  Remove the tags in question
02071       for ($i=0; $i<@globe::voy_html_zap; $i++){
02072          //  Determine start and Stop tags
02073          @zap_tag = split ( /\,/, $globe::voy_html_zap[$i]);
02074          //  clean up tag
02075          for ($j=0; $j <@zap_tag; $j++){
02076             $zap_tag[$j] =~ s/^[\s]*\"//;
02077             $zap_tag[$j] =~ s/\"[\s]*$//;
02078 //             //  print "Zap this =$zap_tag[$j]\n";
02079          }
02080          $t_cnt=0;
02081          while ($globe::entire_file =~ /$zap_tag[0]/i){  //  case insensitive test
02082              $t_cnt++;
02083 //              print "$t_cnt: Removing everything between \"$zap_tag[0]\" and \"$zap_tag[1]\"\n";
02084              //  First tag part A
02085              @pre_chunks = split ( /$zap_tag[0]/, $globe::entire_file, 2);
02086              //  First tag part B
02087              @post_chunks = split ( /$zap_tag[1]/, $pre_chunks[1], 2);
02088              if (@zap_tag > 2){
02089                 //  Second tag
02090                 //  Hit the first occurrence of the ending tag
02091                 $post_chunks[1] =~ s/$zap_tag[2]//i; //  do it only once, case insensitive
02092              }
02093              //  rebuild
02094              $globe::entire_file = join ("", $pre_chunks[0], $post_chunks[1]);
02095          } //  while it still is infested with the tag
02096       } //  for all zap tags
02097 //       //  print "$in_file was tested for tags to zap.\n";
02098    } //  if zap tags to remove
02099    // undef(@pre_chunks);
02100    // undef(@post_chunks);
02101    // ################################
02102 
02103 } //  change_nav
02104 
02105 
02106 
02107 //#############################################################################
02108 /** @fn int remove_voyant_tags
02109  ** 
02110  ** 
02111  ** 
02112  **/
02113 // #############################################################################
02114 int remove_voyant_tags  ( ) {
02115 
02116    //  Remove any style sheets that might be hanging out.
02117    //  get rid of any old tags.
02118    while ($globe::entire_file =~ $globe::m_define{header}[0]){
02119       ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file,
02120                               $globe::m_define{header}[0], 
02121                               $globe::m_define{header}[1],
02122                               0, 1);
02123       if ($piece) {
02124          $globe::entire_file = join ("", $before, $after);
02125       }
02126    }
02127    while ($globe::entire_file =~ $globe::m_define{nav_book}[0]){
02128       ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file,
02129                               $globe::m_define{nav_book}[0], 
02130                               $globe::m_define{nav_book}[1],
02131                               0, 1);
02132       if ($piece) {
02133          $globe::entire_file = join ("", $before, $after);
02134       }
02135    }
02136    while ($globe::entire_file =~ $globe::m_define{nav_common}[0]){
02137       ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file,
02138                               $globe::m_define{nav_common}[0], 
02139                               $globe::m_define{nav_common}[1],
02140                               0, 1);
02141       if ($piece) {
02142          $globe::entire_file = join ("", $before, $after);
02143       }
02144    }
02145    while ($globe::entire_file =~ $globe::m_define{footer}[0]){
02146       ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file,
02147                               $globe::m_define{footer}[0], 
02148                               $globe::m_define{footer}[1],
02149                               0, 1);
02150       if ($piece) {
02151          $globe::entire_file = join ("", $before, $after);
02152       }
02153    }
02154 
02155 
02156 
02157 } //  remove_voyant_tags
02158 
02159 
02160 
02161 
02162 //#############################################################################
02163 /** @fn int file_chunk_change
02164  ** @brief Replaces the information for the given chunk. (Not complete).
02165  ** 
02166  ** @param _type_define The key into the m_define and m_info hashes.
02167  ** @param _criticality specifies whether or not to record error messages.
02168  ** @param _where_flag The tag where this is supposed to go.
02169  ** @param _after if 0, set before, otherwise after
02170  ** @param _in_file Name of file being worked on.
02171  **
02172  ** @retval success 1 if found tags, 0 if missing.
02173  ** 
02174  ** @lim Uses the $globe::entire_file
02175  ** @ingroup tp_toc
02176  **/
02177 // #############################################################################
02178 int file_chunk_change  ( ) {
02179    $_type_define = $_[0];
02180    $_criticality = $_[1];
02181    $_where_flag = $_[2];
02182    $_after = $_[3];
02183    $_insert_tags = $_[4];  //  pass through
02184    $_in_file = $_[5];
02185 
02186    if (0) {
02187 //       print "file_chunk_change $_type_define $globe::m_define{$_type_define}[0]\n$_in_file\n";
02188    }
02189    if (( $globe::entire_file =~ /$globe::m_define{$_type_define}[1]/ ) && ( $globe::entire_file =~ /$globe::m_define{$_type_define}[0]/ ) ) {
02190       ($okay, $globe::entire_file) = &globe::replace_tag_chunk ( $globe::entire_file,
02191                 $globe::m_define{$_type_define}[0], 
02192                 $globe::m_define{$_type_define}[1], 
02193                 $globe::m_info{$_type_define}, $_insert_tags, $_criticality);
02194       if (!$okay) {
02195          push (@file_errors, "$_type_define definition messed up in $in_file.");
02196       }
02197       if (0){
02198 //          print "file_chunk_change ok=$okay\n";
02199       }
02200 
02201    } else {
02202       //  Figure out where the $_where_flag should go
02203       if ( $globe::entire_file =~ /$_where_flag/ ) {
02204          @pre_chunks = split ( /$_where_flag/, $globe::entire_file, 2);
02205          if ($_after) {
02206 //             $globe::entire_file = sprintf("%s%s%s%s", 
02207                $pre_chunks[0], 
02208                $_where_flag,
02209                $globe::m_info{$_type_define}, 
02210                $pre_chunks[1]);
02211          } else {
02212 //             $globe::entire_file = sprintf("%s%s%s%s", 
02213                $pre_chunks[0], 
02214                $globe::m_info{$_type_define}, 
02215                $_where_flag,
02216                $pre_chunks[1]);
02217          }
02218       } else {
02219          push (@file_errors, "WARNING: No $_where_flag defined in $_in_file.");
02220          //  Do only if no head/body tags
02221 //          print "WARNING: No $_where_flag defined.\n";
02222          return (0);
02223       } //  figuring out where it should go
02224    }
02225    return (1);
02226 
02227 } //  file_chunk_change
02228 
02229 
02230 
02231 
02232 
02233 
02234 //#############################################################################
02235 /** @fn int html_exist
02236  ** @brief Tests that the URL to a file exists.
02237  **
02238  ** @param test_file File to test to see if it exists.
02239  ** 
02240  ** Discovered that doxygen's installdox does not update the tree.js files,
02241  ** which leads to entries in the TOC that 404. Test it and return an 
02242  ** appropriate response.
02243  **
02244  ** @ingroup tp_nav
02245  **/
02246 // #############################################################################
02247 int html_exist  ( ) {
02248    //  Test the damn URL to make sure the file even exists.
02249    $test_file = $_[0];
02250 
02251    unless (open(TEST, "$globe::path$test_file")) {
02252       if (0){
02253 //          print "$globe::path$test_file doesn't exist.\n";
02254       }
02255       return(0);
02256    }
02257 //    close (TEST);
02258    return (1);
02259 
02260 
02261 } //  html_exist
02262 
02263 
02264 //#############################################################################
02265 /** @fn int END
02266  ** @brief Code to execute when first entered.
02267  **
02268  ** @param None. 
02269  **
02270  ** @return None.
02271  **
02272  ** @lim None
02273  ** @ingroup tp_nav
02274  **/
02275 // #############################################################################
02276 int END  ( ) {
02277    &globe::do_errors;
02278    // undef ($_file_list);     //  "_file_list";
02279    // undef ($_index_file); //  = "_index_list";
02280    // undef ($in_file);     //  "";
02281    // undef ($f_type);     //  "htm";
02282 
02283    
02284    // #############################################################################
02285    // # Memory clean-up.
02286    // #############################################################################
02287    if ($no_scope_file > 0){
02288       &xscope::memory_clean_up();
02289       &globe::memory_clean_up();
02290    }
02291 
02292    if (@file_errors) {
02293 //       print "\n============  Summary of errors =================================b\n";
02294       for ($i=0; $i<@file_errors; $i++){
02295 //          print "$i = $file_errors[$i]\n";
02296       }
02297    }
02298 
02299 //    print "\n============  Finished html_look_integrate.pl =================================\n";
02300 } //  END
02301 
02302 


 "Perl Program Reference" 
 < Previous 
 Next > 


Open-Source tools compliments of Voyant Technologies, Inc. and Glenn C. Maxey.
01/13/2003

TP Tools v2-00-0a

# tpt-perl-hcr-02