|
|
|
|
|
00001 // This file has been modified on-the-fly with an input filter 00002 // to change it from Perl syntax to C++ strictly for the purposes 00003 // of faking out Doxygen. Modifications include: 00004 00005 // - changing local() definitions to C++ #define statements. 00006 // - commenting out undef statements. 00007 // - changing $globe'... variable names to $globe_... 00008 // - changing sub statements to look like C++ functions. 00009 // - changing # comments to C++ comments. 00010 // - ... 00011 00012 // If you see other strangeness in the HTML version of the Perl file, 00013 // it comes from getting it to look more C++ like. 00014 00015 00016 // #!/usr/#define/bin/perl 00017 package gen_nav; 00018 00019 //############################################################################# 00020 /** @file 00021 ** @brief Looks into HTML files that have been provided by other sources 00022 ** and parses them for useful information. 00023 ** 00024 ** It extracts appropriate information needed for a entries in an index 00025 ** and table of contents. 00026 ** 00027 ** It starts at the files listed in @xscope::top_files array. For every 00028 ** HTML file there, it finds their hyperlinks. It successively traces through 00029 ** the hyperlinks creating a data structure. This is the data structure used 00030 ** in the TOC. The hyperlinks that it traces are also used directly in the 00031 ** table of contents. 00032 ** 00033 ** Certain files can be excluded from creating children or tracing further. 00034 ** All top files in particular are added to this list so that it does not 00035 ** loop continuously through things it already knows, or more realistically, 00036 ** so that it doesn't build a top-levels data structure under some other 00037 ** top-level data. 00038 ** 00039 ** This has several routines (spider_trace, index_token_generation, script_generation 00040 ** etc.) that call themselves recursively. The key to stop the recursion is when 00041 ** the owning files for the children (hyperlinks) have already been visited. 00042 ** 00043 ** @ingroup tp_tools tp_nav 00044 ** 00045 ** @author Glenn C. Maxey 00046 ** 00047 **/ 00048 // # $Id: html_look_integrate.pl,v 1.5 2002/11/20 15:23:02 gmaxe Exp $ 00049 //# 00050 //# 2002 Created by Voyant Technologies, Inc., Westminster, Colorado, USA. 00051 //# 00052 //# Permission to use, copy, modify, and distribute this software and its 00053 //# documentation under the terms of the GNU General Public License is hereby 00054 //# granted. No representations are made about the suitability of this software 00055 //# for any purpose. It is provided "as is" without express or implied warranty. 00056 //# See the GNU General Public License (http://www.gnu.org/copyleft/gpl.html) 00057 //# for more details. 00058 //# 00059 //# Documents produced by this script are derivative works derived from the 00060 //# input used in their production; they are not affected by this license. 00061 //# 00062 //# Revision Information: 00063 //# 00064 //# $Log: html_look_integrate.pl,v $ 00065 //# Revision 1.5 2002/11/20 15:23:02 gmaxe 00066 // //# Added exit codes so that wrapper scripts can catch errors properly. 00067 //# 00068 //# Revision 1.4 2002/07/26 18:56:10 gmaxe 00069 //# Got rid of old definitions and migrated new structures into all; 00070 //# enhanced xhelp output file names and format; now everything is 00071 //# alphebetized. 00072 //# 00073 //# Revision 1.3 2002/04/12 18:01:13 gmaxe 00074 //# Various tweaks; work on tag extraction and change_nav; 00075 //# more exclused special perl characters from indexer; 00076 //# improved support for sapi group pm files in xhelp. 00077 //# spider tool now works regularly and as expected. 00078 //# 00079 //# Revision 1.2 2002/04/10 00:35:27 gmaxe 00080 //# Tracing tool now works; 00081 //# indexer ignores more special characters; 00082 //# pm file is the first version; contains the proper data structures 00083 //# to handle bsp/vxworks 00084 //# 00085 //# Revision 1.1 2002/04/06 01:55:10 gmaxe 00086 //# New files and new general constructs for handling tags. Uses globals better. 00087 //# The html_look* files are designed to handle spider tracing of html systems. 00088 //# 00089 //# 00090 //# 00091 //############################################################################# 00092 00093 00094 //############################################################################# 00095 /** @fn int BEGIN 00096 ** @brief Code to execute when first entered. 00097 ** 00098 ** @param None. 00099 ** 00100 ** @return None. 00101 ** 00102 ** @lim None 00103 ** @ingroup tp_nav 00104 **/ 00105 // ############################################################################# 00106 int BEGIN ( ) { 00107 // print "\n============ Starting html_look_integrate.pl ==================================\n"; 00108 $_file_list = "_file_list"; 00109 $_toc_file = "tree.html"; 00110 $_index_file = "_index_list"; 00111 $_arg_inc = 0; 00112 00113 $no_scope_file = 0; 00114 $scope_pm = "globe.pm"; // first time through; other scope stuff passed in. 00115 00116 $in_file = "tree.js"; // default 00117 00118 push (@INC, `pwd`); 00119 push (@INC, '../perl'); 00120 if (0){ 00121 // print (@INC, "\n"); 00122 } 00123 // #### 00124 // All global variables are defined in the following file 00125 // #### 00126 unless (open ( IN_LIST, $scope_pm)) { 00127 unless (open ( IN_LIST, "../perl/$scope_pm")) { 00128 push (@file_errors, "Cannot open file \"$scope_pm\" or \"../perl/$scope_pm\"\n"); 00129 $no_scope_file++; 00130 } 00131 } 00132 // close (IN_LIST); 00133 push (@INC, $scope_pm); 00134 push (@INC, "../perl/$scope_pm"); 00135 00136 if (!@file_errors) { 00137 // #### 00138 // All global variables are defined in the following file 00139 // #### 00140 require $scope_pm; 00141 00142 if (&globe::declare_variables()) { 00143 // print "Variables initialized from $scope_pm.\n"; 00144 } else { 00145 push (@file_errors, "Could not initialize variables from $scope_pm.\n"); 00146 } 00147 } // if not @file_errors 00148 00149 00150 00151 // Get scope files if there is one. 00152 if (@ARGV > $_arg_inc) { 00153 $scope_pm = @ARGV[$_arg_inc]; 00154 00155 unless (open ( IN_LIST, $scope_pm)) { 00156 push (@file_errors, "Cannot open file \"$scope_pm\"\n"); 00157 $no_scope_file++; 00158 } 00159 // close (IN_LIST); 00160 push (@INC, $scope_pm); 00161 00162 if (!@file_errors) { 00163 // #### 00164 // All global variables are defined in the following file 00165 // #### 00166 require $scope_pm; 00167 00168 if (&xscope::declare_variables()) { 00169 // print "Variables initialized from $scope_pm.\n"; 00170 } else { 00171 push (@file_errors, "Could not initialize variables from $scope_pm.\n"); 00172 } 00173 } // if not @file_errors 00174 } else { 00175 push (@file_errors, "ERROR: Need to provide a scope file as first argument."); 00176 } // if 1 or more arguments 00177 $_arg_inc++; 00178 00179 // Get path to code files 00180 if (0) { // opt out 00181 if (@ARGV > $_arg_inc) { 00182 $root_path = @ARGV[$_arg_inc]; 00183 $globe::path = @ARGV[$_arg_inc]; 00184 if ($root_path =~ /\/$/) { 00185 // print "The path specified is $root_path\n"; 00186 } else { 00187 push (@file_errors, "The input argument \"$root_path\" requires a forward slash (\/) at the end.\n"); 00188 } 00189 } else { 00190 push (@file_errors, "ERROR: root path is required."); 00191 } // if 1 or more arguments 00192 $_arg_inc++; 00193 } // opt out 00194 00195 if (@ARGV > $_arg_inc) { 00196 $in_file = @ARGV[$_arg_inc]; 00197 $globe::master_nav_file = @ARGV[$_arg_inc]; 00198 // print "The master file is $in_file.\n"; 00199 unless (open ( IN_MASTER, "$in_file")) { 00200 push (@file_errors, "Cannot open file \"$in_file\"\n"); 00201 } 00202 } else { 00203 push (@file_errors, "ERROR: master HTML file is required."); 00204 } // if 2 or more arguments 00205 $_arg_inc++; 00206 00207 // Optional: a fully qualified path of where to start. 00208 if (@ARGV > $_arg_inc) { 00209 // undef (@globe::top_files); 00210 push (@globe::top_files, @ARGV[$_arg_inc]); 00211 } // if 3 or more arguments 00212 $_arg_inc++; 00213 00214 00215 if (0) { // opt out 00216 if (@ARGV > $_arg_inc) { 00217 $gen_proto = @ARGV[$_arg_inc]; 00218 if ($gen_proto !~ /\//) { 00219 // if the output file does not have a path, then 00220 // put it in the root location 00221 $gen_list = "_$gen_proto"; 00222 $gen_class = "c_$gen_proto"; 00223 $gen_proto = "$root_path$gen_proto"; 00224 $gen_list = "$root_path$gen_list"; 00225 $gen_class = "$root_path$gen_class"; 00226 } else { 00227 @chunk = split (/\//, $gen_proto); 00228 $chunk[$// chunk] = "_$chunk[$#chunk]"; 00229 $gen_list = join ("\/", @chunk); 00230 @chunk = split (/\//, $gen_proto); 00231 $chunk[$// chunk] = "c_$chunk[$#chunk]"; 00232 $gen_class = join ("\/", @chunk); 00233 } 00234 // print "The output files are $gen_proto and $gen_list\n"; 00235 } else { 00236 push (@file_errors, "Need to have the output prototype file.\n"); 00237 } // if 3 or more arguments 00238 $_arg_inc++; 00239 } // opt out 00240 00241 00242 } // BEGIN 00243 00244 //############################################################################# 00245 /** @fn int main 00246 ** @brief The main program. 00247 ** 00248 ** @param None. 00249 ** 00250 ** @return None. 00251 ** 00252 ** @lim None 00253 ** @ingroup tp_nav 00254 **/ 00255 // ############################################################################# 00256 // sub main { 00257 { 00258 // ############################################################################# 00259 // # Program start 00260 // ############################################################################# 00261 00262 if (0){ 00263 // print "=== Definitions 3 \n"; 00264 // exit(1); 00265 } 00266 00267 if (@file_errors) { 00268 // Makes no sense to go on if input parameters are off. 00269 // print "\n============ Summary of errors =================================a\n"; 00270 for ($i=0; $i<@file_errors; $i++){ 00271 // print "$i = $file_errors[$i]\n"; 00272 } 00273 &using_voy_nav(); 00274 // exit(1); 00275 } 00276 00277 // #### 00278 // MASTER FILE DEFINITIONS 00279 // Get the master definitions 00280 // #### 00281 while (<IN_MASTER>){ // entire master file into memory. 00282 $globe::master_nav .= $_; 00283 } 00284 // close (IN_MASTER); 00285 &globe::get_master_nav_info; 00286 if (!exists ($globe::m_info{order})) { 00287 if (1) { 00288 push (@file_errors, "ERROR: The master file \"$globe::master_tree_file\" does not have the ordering."); 00289 // print "Ordering begins with \"$globe::m_define{order}[0]\" \nand ends with \"$globe::m_define{order}[1]\"\n"; 00290 // print "Inbetween on a line by themselves is the directory doc_publish subdirectory name.\n"; 00291 } 00292 // exit(1); 00293 } else { 00294 if (0) { 00295 // print "Got to here...\n$globe::m_info{order}\n======\n"; 00296 for ($i = 0; $i <= $// {$globe::m_info{_array_order}}; $i++) { 00297 foreach $k (keys %{$globe::m_info{_array_order}[$i]}) { 00298 // print "$i $k $globe::m_info{_array_order}[$i]{$k}\n"; 00299 } 00300 } 00301 // // exit(1); 00302 } 00303 } 00304 // #### 00305 00306 // #### 00307 // This is the main loop that does a spider trace through the documentation. 00308 // #### 00309 if (1) { // Turn on/off spider default 1 -- mandatory 00310 foreach $in_file (@xscope::top_files){ 00311 @_name = split (/\//, $in_file); 00312 pop (@_name); // This is the filename, which we don't want. 00313 $globe::org{$in_file}{path} = join ("\/", @_name, ""); 00314 if (0){ 00315 // print "Starting file = $in_file\n"; 00316 } 00317 if (0){ 00318 // print "root path is $globe::org{$in_file}{path}\n"; 00319 } 00320 // #### 00321 // Read in file and get its child hyperlinks 00322 // #### 00323 if (&get_input_file ($in_file)) { 00324 // good 00325 // Level is 2 for get_hyperlinks, because 1 is already the main guy. 00326 // when using nested scripts, main guy is already handled; 00327 // start numbering with 1 00328 if (!&get_hyperlinks ($in_file, 1)) { 00329 // bad; did not succeed getting hyperlinks from file 00330 push (@file_errors, "No hyperlinks found in $in_file."); 00331 } 00332 } else { 00333 // bad 00334 // Turned off because trapped elsewhere 00335 if (0) { 00336 push (@file_errors, "Could not find \"$in_file\". :)"); 00337 } 00338 } // get_input_file 00339 if (0) { 00340 // print "Title is \"$globe::org{$in_file}{title}\"\n"; 00341 } 00342 if (1) { 00343 // print "Starting spider trace...\n"; 00344 &spider_trace ($in_file, 1); // base level is 1, the starting point. 00345 } 00346 if (0) { 00347 // print "\nEnding spider trace...\n"; 00348 } 00349 } // for each top_file 00350 } // Turn on/off spider 00351 00352 // undef (%globe_all_files); 00353 $globe_file_cnt=0; 00354 // #### 00355 00356 // #### 00357 // Test routines that are recursively called. 00358 // Turned off, but when on can help debug what is being collected. 00359 // leave this off 00360 // #### 00361 if (0) { // debug if default 0 00362 #define $globe::_last_level 1 00363 // #define $globe::_last_level 0 00364 // print "Testing the structure...\n"; 00365 // print "Start last level = $globe::_last_level\n"; 00366 &testing_structure ($xscope::top_files[0]); 00367 $globe::_last_level = 1; 00368 // $globe::_last_level = 0; 00369 // print "======\nSecond time through...\n"; 00370 // print "Start last level = $globe::_last_level\n"; 00371 &testing_structure ($xscope::top_files[0]); 00372 if (0){ 00373 foreach $in_file (sort keys %{globe_all_files}) { 00374 // print "input $in_file\n"; 00375 } 00376 } 00377 // exit(1); 00378 } // debug if 00379 // #### 00380 00381 // #### 00382 // Used to see what's in the data structures. 00383 // Leave this off 00384 // #### 00385 if (0) { // debug if default 0 00386 foreach $in_file (sort keys %{globe::org}) { 00387 // print "file $in_file\n"; 00388 // print " g title $globe::org{$in_file}{title}\n"; 00389 if (1){ 00390 // print " g title $globe::org{$in_file}{title}\n"; 00391 // print " g path $globe::org{$in_file}{path}\n"; 00392 } 00393 if (0){ 00394 foreach $k (sort keys %{$globe::org{$in_file}}) { 00395 // print " $k \$globe::org{\$in_file}{$k} = $globe::org{$in_file}{$k}\n"; 00396 } 00397 } 00398 } 00399 } // debug if 00400 // #### 00401 00402 // undef (%globe_all_files); 00403 $globe_file_cnt=0; 00404 00405 // #### 00406 // Creates the output scripts for each top file. 00407 // #### 00408 if (1) { // turn on/off default 1 00409 foreach $in_file (@xscope::top_files) { 00410 // $globe::org{$in_file}{title} 00411 // ## 00412 // Create a file name from the title. 00413 // Replace characters. 00414 // ## 00415 $tree = $globe::org{$in_file}{title}; 00416 $tree =~ s/[\s+]/_/g; 00417 $tree =~ s/[\'\"\`\~\!\@\// \$\%\^\&\*\(\)\=\+\{\}\[\]\?\/\\\,\.\?><\|]/_/g; 00418 $tree = "tree_$tree"; 00419 $globe::org{$in_file}{script_#define} $tree 00420 $m_tree = "m_$tree"; 00421 $globe::org{$in_file}{script_master} = $m_tree; 00422 $tree = "$globe::path$tree"; 00423 $m_tree = "$globe::path$m_tree"; 00424 if (0) { 00425 // print "Old title \"$globe::org{$in_file}{title}\"\n new \"$tree\"\n\nmaster \"$m_tree\"\n"; 00426 } 00427 00428 // $tree = "$globe::path"; 00429 // $tree .= "tree_$q"; 00430 if (0 && (&output_structure_script ($tree, $in_file, "#define"))) { 00431 // assuming successful 00432 } 00433 if ((1) && (&output_structure_script ($m_tree, $in_file, "master"))) { 00434 // assuming successful 00435 } 00436 } // foreach $in_file 00437 } // turn on/off 00438 // #### 00439 00440 // ################## 00441 // special case a script file for the Top Most master file. 00442 // ################## 00443 if (1) { // turn on/off 00444 $tree = $globe::path; 00445 $tree .= "tree_start_here"; 00446 if (0 && (&starting_point_script ($tree, "script_#define", "local"))){ 00447 } // starting_point_script 00448 $tree = $globe::path; 00449 $tree .= "m_tree_start_here"; 00450 if ((1) && (&starting_point_script ($tree, "script_master", "master"))) { 00451 } // starting_point_script 00452 } // turn on/off 00453 // #### 00454 00455 00456 00457 // undef (%globe_all_files); 00458 $globe_file_cnt=0; 00459 00460 00461 // ################## 00462 // Take care of index tokens 00463 // ################## 00464 if (1) { // turn on/off default 1 00465 // print "Generating index tokens $_index_file...\n"; 00466 $_in_index_file = "$globe::path$_index_file"; 00467 00468 $q = 0; 00469 foreach $in_file (@xscope::top_files) { 00470 if (&handle_index_tokens ($in_file)) { 00471 // assuming successful 00472 } 00473 $q++; 00474 } // foreach $in_file 00475 00476 if (@globe::index_info) { 00477 // print "Initial index token count: $// globe::index_info\n"; 00478 if (1) { // 4/8/2002 Get rid of duplicate entries 00479 @sm_index = sort (@globe::index_info); 00480 @globe::index_info = @sm_index; 00481 // undef (@sm_index); 00482 $remember = $globe::index_info[$i]; 00483 push (@sm_index, $globe::index_info[0]); 00484 // #### 00485 // Loop to get rid of duplicates 00486 // #### 00487 for ($i=1; $i <= $// globe::index_info; $i++) { # not starting on first 00488 if ($remember ne $globe::index_info[$i]) { 00489 push (@sm_index, $globe::index_info[$i]); 00490 } 00491 } 00492 @globe::index_info = @sm_index; 00493 // undef (@sm_index); 00494 00495 } 00496 // print "Final index token count: $// globe::index_info\n"; 00497 00498 // clean up memory 00499 00500 00501 unless (open ( OUT_INDEX_LIST, ">$_in_index_file")) { 00502 push (@file_errors, "Cannot open file \"$_in_index_file\""); 00503 // exit(1); 00504 } 00505 if (1) { // 4/5/2002 one way of doing index tokens 00506 for ($i=0; $i<@globe::index_info; $i++){ 00507 // // print "$i = $globe::index_info[$i]\n"; 00508 // print (OUT_INDEX_LIST "$globe::index_info[$i]\n"); 00509 } 00510 } 00511 // close (OUT_INDEX_LIST); 00512 } 00513 } // turn on/off 00514 // #### 00515 00516 00517 // ################## 00518 // Take care of navigation and copyright in the HTML file 00519 // ################## 00520 if (1) { // turn on/off default 1 00521 // print "Updating the individual HTML files...\n"; 00522 // undef (%globe_all_files); 00523 $globe_file_cnt=0; 00524 00525 $q = 0; 00526 foreach $in_file (@xscope::top_files) { 00527 if (&html_file_update ($in_file)) { 00528 // assuming successful 00529 } else { 00530 // happens when you can't write the file out 00531 } 00532 $q++; 00533 } // foreach $in_file 00534 00535 } // turn on/off 00536 // #### 00537 00538 00539 if (0) { 00540 // print "got this far...\n"; 00541 // exit(0); 00542 } 00543 00544 // ############################################################################# 00545 // # End of Program 00546 // ############################################################################# 00547 // exit(0); 00548 } // the main routine 00549 00550 //############################################################################# 00551 /** @fn int using_voy_nav ( ) 00552 ** @brief What to do when no arguments are given. 00553 ** @param None 00554 ** @return None 00555 ** 00556 ** @lim None 00557 ** @ingroup tp_nav 00558 **/ 00559 // ############################################################################# 00560 int using_voy_nav ( ) { 00561 // print "\nhtml_look_integrate.pl operates on text files that indicate heirarchical \n"; 00562 // print "structure of the system. Needed arguments:\n"; 00563 // print "- appropriate scope pm; required. \n"; 00564 // print "- name of master file. \n"; 00565 // print "- [optional] fully qualified path and name of starting HTML file for spider trace. \n"; 00566 // print " If not here, the starting HTML needs to be specified in the scope file. \n"; 00567 // // print "- output toc script file; optional. \n"; 00568 00569 return; 00570 } 00571 00572 00573 //############################################################################# 00574 /** @fn int get_input_file 00575 ** @brief Grabs the information from the input file and puts into an array 00576 ** of hash elements. 00577 ** 00578 ** @param file_incoming This is the file to open. 00579 ** 00580 ** Reads in the file into memory for later processing to find anchors. 00581 ** While it's here, it tries to find the title for the file for later display 00582 ** and file naming purposes. 00583 ** 00584 ** @ingroup tp_nav 00585 **/ 00586 // ############################################################################# 00587 int get_input_file ( ) { 00588 #define $file_incoming $_[0] 00589 // undef ($globe::entire_file); 00590 00591 if (0) { 00592 // print "get_input_file with $file_incoming.\n"; 00593 } 00594 00595 unless (open ( IN_COMING, "$file_incoming")) { 00596 push (@file_errors, "Cannot open file \"$file_incoming\" <<<"); 00597 return (0); 00598 } 00599 while (<IN_COMING>){ 00600 $globe::entire_file .= $_; 00601 } 00602 // close (IN_COMING); 00603 if (0) { 00604 // print "$globe::entire_file\nglenn\n"; 00605 } 00606 $not_critical = 0; 00607 $case_in = 1; 00608 00609 // Look for an h1 00610 ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file, 00611 "<h1", ">", 00612 $not_critical, $case_in); 00613 if ($piece) { 00614 // rebuilt accurate test string 00615 $test = "<h1$piece>"; 00616 // Redo this on entire file now that complete first tag is known. 00617 ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file, 00618 $test, "</h1", 00619 $not_critical, $case_in); 00620 if ($piece) { 00621 // Strip out html tags within this piece. 00622 while ($piece =~ /\</) { 00623 ($before, $throw_away, $after) = &globe::get_tag_chunk( $piece, 00624 "<", ">", 00625 $not_critical, $case_in); 00626 $piece = join ("", $before, $after); 00627 } // while temp title. 00628 00629 $piece =~ s/\n//g; // strip out carriage return 00630 $piece =~ s/^[\s+]//g; // strip out leading/trailing space 00631 $piece =~ s/[\s+]$//g; // strip out leading/trailing space 00632 if (0) { 00633 // print "h1 title is \"$piece\"\n"; 00634 } 00635 $globe::org{$file_incoming}{title} = $piece; 00636 } else { 00637 // print "Could find no h1.\n"; 00638 } 00639 } else { 00640 00641 ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file, 00642 "<title>", "</title>", 00643 $not_critical, $case_in); 00644 if (($piece) && ($piece =~ /[\w+]/)) { 00645 $piece =~ s/\n//g; // strip out carriage return 00646 while ($piece =~ /^[\s+]/){ 00647 $piece =~ s/^[\s+]//; // strip out leading/trailing space 00648 } 00649 while ($piece =~ /[\s+]$/){ 00650 $piece =~ s/[\s+]$//; // strip out leading/trailing space 00651 } 00652 if (0) { 00653 // print "<title> is \"$piece\"\n"; 00654 } 00655 $globe::org{$file_incoming}{title} = $piece; 00656 00657 } else { 00658 // print "Could find no h1 or title.\n"; 00659 } 00660 } 00661 if (0) { 00662 // print "title from get_input \"$globe::org{$file_incoming}{title}\" for $file_incoming\n"; 00663 } 00664 00665 if (1) { 00666 // Remove the voyant tags so that we don't get extra links to mess us up. 00667 &remove_voyant_tags; 00668 } 00669 00670 return (1); 00671 } // get_input_file 00672 00673 //############################################################################# 00674 /** @fn int get_hyperlinks 00675 ** @brief Parses through the information in the globe::entire_file buffer 00676 ** for anchors. 00677 ** 00678 ** @param _file_4_links contains the fully qualified name of the file. 00679 ** @param _level contains the level, or distance from, the root for use in 00680 ** assigning to the hyperlinks discovered. 00681 ** 00682 ** All qualified anchors represent children of the _file_4_links. This 00683 ** routine creates the children's url, display text, and level. 00684 ** 00685 ** This routine calls the routine verify_link, which has criteria to test 00686 ** against. The criteria is intended to make the qualification of the link 00687 ** fail. In other words, when it returns from verify_link with a failed (0) criteria, 00688 ** this get_hyperlinks routine knows that it should not add a child for 00689 ** that hyperlink. 00690 ** 00691 ** @lim Assumes that $globe::entire_file has an HTML file in it. 00692 ** @ingroup tp_nav 00693 **/ 00694 // ############################################################################# 00695 int get_hyperlinks ( ) { 00696 #define $_file_4_links $_[0] 00697 #define $_level $_[1] 00698 #define $before "" 00699 #define $piece 0 00700 #define $after "" 00701 #define $b_href "" 00702 #define $href 0 00703 #define $keep_href "" 00704 #define $a_href "" 00705 #define $d 00706 $_l_cnt = 0; 00707 00708 if (0){ 00709 // print "get_hyperlinks from $_file_4_links.\n"; 00710 } 00711 00712 @pot_link_chunk = split (/\<a[\s+]/i, $globe::entire_file); 00713 if ($// pot_link_chunk < 1) { 00714 // file doesn't have any hyperlinks 00715 return (0); 00716 } 00717 POTENTIAL_L: for ($d = 1; $d <= $// pot_link_chunk; $d++){ 00718 // Purposely skipping the zero element; has no link into it. 00719 // #### 00720 // Work on hyperlink 00721 // #### 00722 $piece = 0; 00723 ($before, $piece, $after) = &globe::get_tag_chunk( $pot_link_chunk[$d], 00724 "href", ">", 00725 $not_critical, $case_in); 00726 if ($piece) { 00727 // This contains the information inside the anchor 00728 ($b_href, $href, $a_href) = &globe::get_tag_chunk( $piece, 00729 "\"", "\"", 00730 $not_critical, $case_in); 00731 if ($href) { 00732 // #### 00733 // If this has valid information, then let's first strip out 00734 // and further test what the link is with verify_link. 00735 // #### 00736 $keep_href = &verify_link ($href, $_file_4_links); 00737 if ($keep_href) { 00738 // #### 00739 // This is for valid hyperlink references. 00740 // #### 00741 $href = $keep_href; 00742 if (0) { 00743 // old way; 00744 #define $path $globe::org{$_file_4_links}{path} 00745 $globe::org{$_file_4_links}{child}[$_l_cnt]{url}= "$path$href"; 00746 } 00747 if (0) { 00748 // ### 00749 // Don't assign this until we're sure that we have valid 00750 // text to go with it. 00751 // If we wait, we won't get empty index tokens or 00752 // TOC entries 00753 // ### 00754 $globe::org{$_file_4_links}{child}[$_l_cnt]{url}= "$href"; 00755 } 00756 if (0) { 00757 // print "... link = $globe::org{$_file_4_links}{child}[$_l_cnt]{url}\n"; 00758 } 00759 } else { 00760 // if verify_link returns 0, then it is not a link we want to hold on to. 00761 next POTENTIAL_L; 00762 } 00763 } else { 00764 // Did not contain an href; but if it has a name, it 00765 // still might be useful to have in script. 00766 // TBD: implement this later if needed. 00767 // // print "Did not contain a valid href for me.\n"; 00768 00769 next POTENTIAL_L; 00770 } 00771 } else { 00772 // This anchor does not have a hyperlink 00773 next POTENTIAL_L; 00774 } 00775 // #### 00776 // Work on link text 00777 // $after is still valid from above; contains what followed starting 00778 // anchor designation <a ....> 00779 // #### 00780 ($b_anc_end, $a_anc_end) = split (/\<\/a>/i, $after, 2); 00781 // undef ($remember); 00782 // Get rid of any html that might be in link text; 00783 while ($b_anc_end =~ /\</) { 00784 // while html still in b_anc_end 00785 ($before, $piece, $after) = &globe::get_tag_chunk( $b_anc_end, 00786 "<", ">", 00787 $not_critical, $case_in); 00788 if (($piece =~ /^img/i) && ($piece =~ /alt[\s*]\=/i)){ 00789 // This might have some valid information to remember 00790 @chunks = split (/alt[\s*]\=/i, $piece, 2); 00791 ($b2, $p2, $a2) = &globe::get_tag_chunk( $chunks[1], 00792 "\"", "\"", 00793 $not_critical, $case_in); 00794 $remember=$p2; 00795 if (1) { 00796 // print "Alt text = \"$remember\" for $globe::org{$_file_4_links}{child}[$_l_cnt]{url}\n"; 00797 } 00798 } 00799 00800 // rebuild and retry 00801 $b_anc_end = join ("", $before, $after); 00802 $b_anc_end =~ s/\n/ /g; 00803 $remove = " "; 00804 $b_anc_end =~ s/$remove/ /g; 00805 $b_anc_end =~ s/ / /g; 00806 $b_anc_end =~ s/ / /g; 00807 $b_anc_end =~ s/ / /g; 00808 $b_anc_end =~ s/ / /g; 00809 $b_anc_end =~ s/[\s+]$//; 00810 } // while html still in b_anc_end 00811 00812 // #### 00813 // Get rid of portion of entries that begin with numbers or punctuation 00814 // #### 00815 if (1) { 00816 // Get rid of topics that are all numbers. 00817 if ($b_anc_end !~ /[\D+]/) { 00818 if (0) { 00819 // print "removing \"$b_anc_end\"\n"; 00820 } 00821 next POTENTIAL_L; 00822 } 00823 if ($b_anc_end =~ /^[\d+][\.]/) { 00824 while ($b_anc_end =~ /^[\d+][\.]/){ 00825 @_remove = split (/^[\d+][\.]/, $b_anc_end, 2); 00826 $remove = shift (@_remove); 00827 $b_anc_end = join ("", @_remove); 00828 } 00829 while ($b_anc_end =~ /^[\s+]/) { 00830 $b_anc_end =~ s/^[\s]//; 00831 } 00832 if (0) { 00833 // print "has (had) digits \"$b_anc_end\"\n"; 00834 } 00835 } 00836 if ($b_anc_end =~ /^[\d+][\s+]/) { 00837 while ($b_anc_end =~ /^[\d+][\s+]/){ 00838 @_remove = split (/^[\d+][\s+]/, $b_anc_end, 2); 00839 $remove = shift (@_remove); 00840 $b_anc_end = join ("", @_remove); 00841 } 00842 while ($b_anc_end =~ /^[\s+]/) { 00843 $b_anc_end =~ s/^[\s]//; 00844 } 00845 if (0) { 00846 // print "has (had) digits \"$b_anc_end\"\n"; 00847 } 00848 } 00849 if ($b_anc_end =~ /^[\d+]$/) { 00850 if (0) { 00851 // print "has (had) digits \"$b_anc_end\"\n"; 00852 } 00853 while ($b_anc_end =~ /^[\d+]$/){ 00854 @_remove = split (/^[\d+]/, $b_anc_end, 2); 00855 $remove = shift (@_remove); 00856 $b_anc_end = join ("", @_remove); 00857 } 00858 while ($b_anc_end =~ /^[\s+]/) { 00859 $b_anc_end =~ s/^[\s]//; 00860 } 00861 if (0) { 00862 // print "has (had) digits \"$b_anc_end\"\n"; 00863 } 00864 } 00865 if ($b_anc_end =~ /^[A-Z]\.[\d]/) { 00866 if (0) { 00867 // print "has (had) digits \"$b_anc_end\"\n"; 00868 } 00869 while (($b_anc_end =~ /^[A-Z]\.[\d]/) 00870 || ($b_anc_end =~ /^[\d+]\./) 00871 || ($b_anc_end =~ /^\.[\d+]/) ){ 00872 if ($b_anc_end =~ /^[A-Z]\.[\d]/) { 00873 @_remove = split (/^[A-Z]\./, $b_anc_end, 2); 00874 } elsif ($b_anc_end =~ /^[\d+]\./) { 00875 @_remove = split (/^[\d+]\./, $b_anc_end, 2); 00876 } else { 00877 @_remove = split (/^\.[\d+]/, $b_anc_end, 2); 00878 } 00879 00880 $remove = shift (@_remove); 00881 $b_anc_end = join ("", @_remove); 00882 } 00883 while ($b_anc_end =~ /^[\s+]/) { 00884 $b_anc_end =~ s/^[\s]//; 00885 } 00886 if (0) { 00887 // print "has (had) digits \"$b_anc_end\"\n"; 00888 } 00889 } 00890 if ($b_anc_end =~ /^[\d+][\,]/) { 00891 while ($b_anc_end =~ /^[\d+][\,]/){ 00892 @_remove = split (/^[\d+][\,]/, $b_anc_end, 2); 00893 $remove = shift (@_remove); 00894 $b_anc_end = join ("", @_remove); 00895 } 00896 while ($b_anc_end =~ /^[\s+]/) { 00897 $b_anc_end =~ s/^[\s]//; 00898 } 00899 if (0) { 00900 // print "has (had) digits \"$b_anc_end\"\n"; 00901 } 00902 } 00903 if ($b_anc_end =~ /^[\d+] /) { 00904 while ($b_anc_end =~ /^[\d+]/){ 00905 @_remove = split (/^[\d+] /, $b_anc_end, 2); 00906 $remove = shift (@_remove); 00907 $b_anc_end = join ("", @_remove); 00908 } 00909 while ($b_anc_end =~ /^[\s+]/) { 00910 $b_anc_end =~ s/^[\s]//; 00911 } 00912 if (0) { 00913 // print "has (had) digits \"$b_anc_end\"\n"; 00914 } 00915 } 00916 if ($b_anc_end =~ /^[\s+]/) { 00917 $b_anc_end =~ s/^[\s+]//g; 00918 if (0) { 00919 // print "has (had) digits \"$b_anc_end\"\n"; 00920 } 00921 } 00922 } 00923 00924 // #### 00925 // If it doesn't have text, try to give it some alternate 00926 // text or return with 0; 00927 // #### 00928 if ($b_anc_end !~ /[\S+]/){ 00929 // See if the $piece contains alt text or an image 00930 if ((0) && ($remember =~ /[\w+]/)){ 00931 $b_anc_end = $remember; 00932 } else { 00933 // There was no valid text, so go and get next hyperlink 00934 next POTENTIAL_L; 00935 } 00936 } 00937 00938 if (0) { 00939 // print "Link text = $b_anc_end\n"; 00940 } 00941 00942 00943 // clean up the text from things like   00944 $b_anc_end =~ s/\ \;/ /g; 00945 $b_anc_end =~ s/ / /g; 00946 $b_anc_end =~ s/^\s+//; 00947 $b_anc_end =~ s/\s+$//; 00948 00949 $globe::org{$_file_4_links}{child}[$_l_cnt]{text}=$b_anc_end; 00950 00951 // #### 00952 // The children own their level rather than the owning file, 00953 // so that they'll be displayed properly when 00954 // referenced from multiple locations. 00955 // #### 00956 $globe::org{$_file_4_links}{child}[$_l_cnt]{level} = $_level; 00957 00958 // #### 00959 // URL comes from above. 00960 // #### 00961 $globe::org{$_file_4_links}{child}[$_l_cnt]{url}= "$href"; 00962 00963 if (0) { 00964 // print "Link text = $globe::org{$_file_4_links}{child}[$_l_cnt]{text}\n"; 00965 // print " Link url = $globe::org{$_file_4_links}{child}[$_l_cnt]{url}\n"; 00966 // print " Link level = $globe::org{$_file_4_links}{child}[$_l_cnt]{level}\n"; 00967 // print " Link parent = $_file_4_links\n"; 00968 } 00969 00970 // #### 00971 // Should be the last thing; increment counter 00972 // #### 00973 $_l_cnt++; 00974 } // for 00975 00976 if (0) { 00977 // print "org file: $_file_4_links with children\n"; 00978 for ($d=0; $d <= $// {$globe::org{$_file_4_links}{child}}; $d++){ 00979 // print " child $d $globe::org{$_file_4_links}{child}[$d]{url}\n"; 00980 } 00981 // print " org file above: $_file_4_links\n"; 00982 // // print "ending get_hyperlinks\n"; 00983 // // exit(1); 00984 } 00985 return (1); 00986 00987 } // get_hyperlinks 00988 00989 00990 //############################################################################# 00991 /** @fn int verify_link 00992 ** @brief Tests the potential link against various criteria to validate 00993 ** whether the link is of value. 00994 ** 00995 ** @param potential_link Contains a fully qualified hyperlink. 00996 ** @param contains the owning file. 00997 ** 00998 ** @return If a criteria is matched, this routine returns 0 (meaning 00999 ** uninteresting link. If not of the criteria sets off a flag, this then 01000 ** returns the potential link as validated. 01001 ** 01002 ** Criteria that makes the link uninteresting are things like: 01003 ** linking to itself, up linking to a top-level entity, linking to 01004 ** an html topic that we want to exclude as being a child, linking to 01005 ** other children at the same level, etc. 01006 ** 01007 ** @ingroup tp_nav 01008 **/ 01009 // ############################################################################# 01010 int verify_link ( ) { 01011 #define $potential_link $_[0] 01012 #define $owning_file $_[1] 01013 #define $_href "" 01014 01015 01016 if (0) { 01017 // print "Entering verify_link with\n\t$potential_link\n\t$owning_file\n"; 01018 } 01019 { // saves removing indents 01020 { // saves removing indents 01021 if ($potential_link) { 01022 // strip out what we don't want, like leading ./ 01023 #define $_rel "\.\/" 01024 while ($potential_link =~ /^$_rel/){ 01025 // if (0) { print "\tbefore $potential_link "; } 01026 $potential_link =~ s/^$_rel//; 01027 // if (0) { print "after $potential_link \n"; } 01028 } 01029 // ### 01030 // VxWorks had some strange file names with ( and ), but 01031 // escaped out with the characters below in the links. 01032 // ### 01033 if ($potential_link =~ /\%28/){ 01034 // if (0) { print "==== before $potential_link "; } 01035 $potential_link =~ s/\%28/\(/; 01036 } 01037 if ($potential_link =~ /\%29/){ 01038 $potential_link =~ s/\%29/\)/; 01039 // if (0) { print "after $potential_link \n"; } 01040 } 01041 if (0) { 01042 // print "Hyperlink = $potential_link\n"; 01043 } 01044 // #### 01045 // Test for if link within same page 01046 // If so, we don't want it 01047 // #### 01048 if ($potential_link =~ /\// /){ 01049 // See if there is any meat to this anchor 01050 @temp_sides = split (/\// /, $potential_link, 2); 01051 if ($temp_sides[0] !~ /[\w+]/){ 01052 // If the left side has no meat, it means it is a link 01053 // within the page. We can safely ignore. 01054 return (0); 01055 } 01056 } 01057 if (($potential_link =~ /mailto\:/i) 01058 || ($potential_link =~ /http\:\/\//i) 01059 || ($potential_link =~ /www\./i)) 01060 { 01061 // We can safely ignore. 01062 return (0); 01063 } 01064 if (0){ 01065 // print "href $potential_link and \n\t path $globe::org{$owning_file}{path}\n"; 01066 // // exit(1); 01067 } 01068 01069 01070 // #### 01071 // get rid of relative path markings 01072 // #### 01073 #define $path $globe::org{$owning_file}{path} 01074 if (0) { 01075 // print "\n$potential_link and path before = $path \n"; 01076 } 01077 #define $_href $potential_link 01078 #define $_rel "../" 01079 if ($_href =~ /^$_rel/){ 01080 if (0) { 01081 // print "\n$potential_link and path before = $path \n"; 01082 } 01083 while ($_href =~ /^$_rel/){ 01084 $_href =~ s/$_rel//; 01085 @_ath = split (/\//, $path); 01086 pop (@_ath); 01087 $path = join ("\/", @_ath); 01088 if ($path !~ /\/$/){ 01089 $path .= "\/"; 01090 } 01091 } // while 01092 if (0) { 01093 // print "$_href at ppath after = $path\n"; 01094 // // exit(1); 01095 } 01096 $potential_link = $_href; 01097 } // if 01098 // #### 01099 // Test for targets and removing links that are to the 01100 // same file 01101 // #### 01102 if (1) { 01103 if ($potential_link =~ /\// /) { 01104 @_ath = split (/\// /, $potential_link, 2); 01105 $_href = $_ath[0]; 01106 } else { 01107 $_href = $potential_link; 01108 } 01109 01110 if (($owning_file =~ /$_href/) && ($owning_file =~ /$potential_link/)) { 01111 // Don't add hyperlinks that take you back to the same file 01112 // However, if the link has targets, then it is okay 01113 if (0) { 01114 // print "test1 this=$_href against that=$owning_file \n"; 01115 } 01116 return (0); 01117 } 01118 } // if (0) 01119 // #### 01120 // Rebuild a true link file name 01121 // #### 01122 $potential_link = "$path$potential_link"; 01123 01124 // #### 01125 // Don't add hyperlinks back to itself. 01126 // #### 01127 if ($globe::org{$owning_file}{url} =~ /$potential_link/){ 01128 return (0); 01129 } 01130 01131 // #### 01132 // Don't add duplicate hyperlinks from other children at that level. 01133 // #### 01134 for ($r = 0; $r <= $// {$globe::org{$owning_file}{child}}; $r++) { 01135 if ($globe::org{$owning_file}{child}[$r]{url} =~ /$potential_link/){ 01136 if (0){ 01137 // print "RAN INTO SOME DUPLICATION; elimination\n"; 01138 // exit(1); 01139 } 01140 return (0); 01141 } 01142 } 01143 01144 // #### 01145 // Don't add hyperlinks that go to files with these names 01146 // #### 01147 foreach $_href (@xscope::gen_ex_child){ 01148 if (0) { 01149 // print "verifying against $_href,\n\t $potential_link\n"; 01150 } 01151 if (($_href =~ /$potential_link/) || ($potential_link =~ /$_href/)) { 01152 if (0){ 01153 // print "Not adding child link to top level $potential_link.\n"; 01154 // print "Not validating $potential_link\n"; 01155 } 01156 return (0); 01157 } 01158 } // foreach #_href 01159 01160 01161 01162 // #### 01163 // Don't add hyperlinks that go to one of the top-level to-be-excluded files 01164 // #### 01165 foreach $_href (@xscope::ex_as_child){ 01166 if (0) { 01167 // print "verifying against $_href,\n\t $potential_link\n"; 01168 } 01169 if (($_href =~ /$potential_link/) || ($potential_link =~ /$_href/)) { 01170 if (0){ 01171 // print "Not adding child link to top level $potential_link.\n"; 01172 // print "Not validating $potential_link\n"; 01173 } 01174 return (0); 01175 } 01176 } // foreach #_href 01177 if (0) { 01178 // print "verified $potential_link\n"; 01179 // // exit(1); 01180 } 01181 01182 // #### 01183 // If we made it this far, then we can return the modified potential_link 01184 // #### 01185 return ($potential_link); 01186 01187 } // if for potential_link 01188 01189 } // saves removing indents 01190 } // saves removing indents 01191 } // verify_link 01192 01193 01194 01195 //############################################################################# 01196 /** @fn int spider_trace 01197 ** @brief Traces the child hyperlinks of a starting file and places them 01198 ** into the data structure. 01199 ** 01200 ** @param start_file The starting HTML file to trace. 01201 ** 01202 ** This calls routines to open and read the children HTML files and to locate the 01203 ** appropriate hyperlinks within those files. 01204 ** 01205 ** @lim This assumes that the data structure has already been started by 01206 ** having a root file, by having already opened it and acquired its children. 01207 ** 01208 ** @ingroup tp_nav 01209 **/ 01210 // ############################################################################# 01211 int spider_trace ( ) { 01212 #define $start_file $_[0] 01213 #define $base_level $_[1] 01214 #define $new_level 0 01215 #define $_f_cnt 1 // set so that loop will start 01216 #define $new_file_entry 01217 #define %to_do 01218 01219 if (1) { 01220 // print "spider trace $start_file\n"; 01221 } else { 01222 // print "$globe_file_cnt..."; 01223 } 01224 $globe_file_cnt++; 01225 01226 // #### 01227 // New level should be one more than current level 01228 // #### 01229 $new_level = $base_level + 1; 01230 01231 01232 01233 if (exists ($globe::org{$start_file}{o_level})) { 01234 $globe::org{$start_file}{o_level} .= "...$base_level"; 01235 } else { 01236 $globe::org{$start_file}{o_level} = $base_level; 01237 } 01238 { // FILLER BRACKET 01239 for ($i = 0; $i <= $// {$globe::org{$start_file}{child}}; $i++) { 01240 // strip out any targets inside of a file... 01241 if ($globe::org{$start_file}{child}[$i]{url} =~ /\// /){ 01242 @_name = split (/\// /, $globe::org{$start_file}{child}[$i]{url}, 2); 01243 // $new_file_entry .= @_name[0]; 01244 $new_file_entry = @_name[0]; 01245 } else { 01246 // child url name is good to go 01247 $new_file_entry = $globe::org{$start_file}{child}[$i]{url}; 01248 } 01249 01250 if (0) { 01251 // print "Child url $globe::org{$start_file}{child}[$i]{url}\n"; 01252 // print "Child1 $new_file_entry\n"; 01253 } 01254 // #### 01255 // Add this child to a list for later creation if it doesn't exist. 01256 // #### 01257 if ((1) && (!(exists ($globe::org{$new_file_entry})))) { 01258 // #### 01259 // The entry contains the path without any targets. 01260 // The url contains the path with targets. 01261 // #### 01262 $to_do{$new_file_entry} = $globe::org{$start_file}{child}[$i]{url}; 01263 01264 // #### 01265 // Set up the path to be used when generating links 01266 // #### 01267 @_name = split (/\//, $new_file_entry); 01268 pop (@_name); // This is the filename, which we don't want in path. 01269 $globe::org{$new_file_entry}{path} = join ("\/", @_name, ""); 01270 // #### 01271 // The entry contains the path without any targets. 01272 // The url contains the path with targets. 01273 // #### 01274 $globe::org{$new_file_entry}{url} = $to_do{$new_file_entry}; // should have the url 01275 01276 // #### 01277 // Read in file and get its child hyperlinks 01278 // #### 01279 if (&get_input_file ($new_file_entry)) { 01280 // good 01281 if (0) { 01282 // print "after input title = $globe::org{$new_file_entry}{title}\n"; 01283 } 01284 if (!&get_hyperlinks ($new_file_entry, $new_level)) { 01285 // bad; did not succeed getting hyperlinks from file 01286 push (@file_errors, "No hyperlinks found in $new_file_entry."); 01287 // print ("\nNo hyperlinks found in $new_file_entry\n"); 01288 } 01289 if (0) { 01290 // print "base $base_level new $new_level for all children\n"; 01291 // print "\tafter hyperlink title = $globe::org{$new_file_entry}{title}\n"; 01292 // print "\t$new_file_entry\n"; 01293 } 01294 } else { 01295 // bad 01296 // Messaging turned off because trapped elsewhere. 01297 if (0) { 01298 push (@file_errors, "Could not find \"$new_file_entry\" !!"); 01299 // print ("\nCould not find $new_file_entry !!!?\n"); 01300 } 01301 } // get_input_file 01302 01303 } // if ((1) && (!(exists ($globe::org{$new_file_entry})))) 01304 } // for ($i = 0;... 01305 01306 if (0) { 01307 // print "======== to be traced\n"; 01308 foreach $new_file_entry (keys %to_do) { 01309 // print "key: $new_file_entry\n"; 01310 // print "value: $to_do{$new_file_entry}\n"; 01311 } 01312 } 01313 01314 01315 01316 foreach $new_file_entry (keys %to_do) { 01317 if ($to_do{$new_file_entry}) { 01318 // #### 01319 // if new entry doesn't exist; make it exist 01320 // Handle its title and path 01321 // #### 01322 01323 // #### 01324 // call this routine again with that new file as the starting point 01325 // RECURSIVE CALL 01326 // #### 01327 if (0) { 01328 // print "Recursive call.\n owner = $start_file\n with $new_file_entry\n"; 01329 // print " path $globe::org{$new_file_entry}{path}\n title = $globe::org{$new_file_entry}{title}; \n"; 01330 } 01331 &spider_trace($new_file_entry, $new_level); 01332 if (0) { 01333 // print " return from spider with $new_file_entry\n"; 01334 // print " title = $globe::org{$new_file_entry}{title}; \n"; 01335 } 01336 01337 $to_do{$new_file_entry} = 0; 01338 } // if (!($to_do{$new_file_entry})) 01339 01340 01341 } // foreach $new_file_entry 01342 01343 } // FILLER BRACKET 01344 01345 // undef (%to_do); 01346 01347 if (0) { 01348 foreach $in_file (sort keys %{globe::org}) { 01349 // print "debug file $in_file\n"; 01350 // print " g title $globe::org{$in_file}{title}\n"; 01351 } 01352 } 01353 01354 return (1); 01355 01356 } // spider_trace 01357 01358 01359 //############################################################################# 01360 /** @fn int testing_structure 01361 ** @brief Traces through the created data structure. 01362 ** 01363 ** @param begin_file is the starting point to begin tracing. 01364 ** 01365 ** This assumes that the begin_file is present in the data structure and 01366 ** has children to trace through. 01367 ** 01368 ** @note This is a RECURSIVE algorithm and can be used as a template for other 01369 ** recursive things involving the same data structure. 01370 ** 01371 ** @ingroup tp_nav 01372 **/ 01373 // ############################################################################# 01374 int testing_structure ( ) { 01375 #define $begin_file $_[0] 01376 #define $child 01377 #define $loc_title 01378 #define $f 01379 if (0) { 01380 // print "test_file $begin_file\n$globe::org{$begin_file}{title}\n"; 01381 } 01382 01383 for ($f = 0; $f <= $// {$globe::org{$begin_file}{child}}; $f++){ 01384 $child = $globe::org{$begin_file}{child}[$f]{url}; 01385 if ($child =~ /\// /){ 01386 @_name = split (/\// /, $child, 2); 01387 $child = $_name[0]; 01388 } 01389 if (1) { 01390 // print " child level=$globe::org{$begin_file}{child}[$f]{level} $globe::org{$begin_file}{child}[$f]{text}\n"; 01391 // print " p level=$globe::org{$begin_file}{o_level} $globe::org{$begin_file}{title}\n"; 01392 // // print " parent $begin_file\n"; 01393 // // print " $globe::org{$begin_file}{child}[$f]{url}\n"; 01394 } 01395 if ($globe::org{$begin_file}{child}[$f]{level} > $globe::_last_level + 1) { 01396 // print "DANGER WILL ROBINSON!!! DANGER!!!\n"; 01397 // print "child $globe::org{$begin_file}{child}[$f]{url}\n"; 01398 // print "parent $begin_file\n"; 01399 // print "parent level $globe::org{$begin_file}{o_level} \nchild level $globe::org{$begin_file}{child}[$f]{level}\n"; 01400 // print "parent has children, who are:\n"; 01401 for ($b = 0; $b <= $// {$globe::org{$begin_file}{child}}; $b++) { 01402 // print "\t $b: $globe::org{$begin_file}{child}[$b]{text}"; 01403 // print "; level $globe::org{$begin_file}{child}[$b]{level}\n"; 01404 } 01405 // print "===============\n"; 01406 01407 // #### 01408 // Fixing the level 01409 // #### 01410 // $globe::org{$begin_file}{child}[$f]{level}--; 01411 // print "Fixing the level\n"; 01412 // print "parent level $globe::org{$begin_file}{o_level} \nchild level $globe::org{$begin_file}{child}[$f]{level}\n"; 01413 // print "===============\n"; 01414 01415 // // exit(1); 01416 } 01417 $globe::_last_level = $globe::org{$begin_file}{child}[$f]{level}; 01418 if (! (exists ($globe_all_files{$child}) )) { 01419 $globe_all_files{$child} = 1; 01420 $globe_file_cnt++; 01421 // #### 01422 // Recursive Call 01423 // #### 01424 &testing_structure ($child); 01425 } 01426 01427 } // for each child 01428 01429 return (1); 01430 01431 } // testing_structure 01432 01433 //############################################################################# 01434 /** @fn int output_structure_script 01435 ** @brief The starting point for outputting the TOC script files. 01436 ** 01437 ** @param _in_file / htree_file the prefix of the name of the script file to output. 01438 ** @param trace_start_file starting point in the data structure that has 01439 ** children to trace through. 01440 ** @param globe_path_purge the key into the $globe::nav_path_purge hash, 01441 ** which contains paths to purge from the output hyperlinks in the TOC, etc. 01442 ** 01443 ** @note This calls script_structure which is a recursive routine that traces through 01444 ** the data structure. 01445 ** 01446 ** @ingroup tp_nav 01447 **/ 01448 // ############################################################################# 01449 int output_structure_script ( ) { 01450 #define $_in_file $_[0] 01451 #define $htree_file $_[0] 01452 #define $trace_start_file $_[1] 01453 #define $globe_path_purge $_[2] // used globally by subsequent routines 01454 // undef (%globe_found_files); // Used in a global way 01455 #define $s_path "" 01456 $globe_file_cnt = 0; 01457 01458 if (($htree_file =~ /(\.html)$/i) || ($htree_file =~ /(\.htm)$/i)){ 01459 @_name = split (/\.htm/i, $htree_file, 2); 01460 $htree_file = $_name[0]; 01461 } 01462 $htree_file .= ".script"; 01463 01464 if (1) { 01465 // print "The output TOC script file is \"$htree_file\".\n"; 01466 // print "The portion of the path to purge from hyperlinks is \"$globe::nav_purge_path{$globe_path_purge}\".\n"; 01467 } 01468 01469 unless (open(OUT_SCRIPT, ">$htree_file")) { 01470 push (@file_errors, "Cannot open file \"$htree_file\"."); 01471 return(0); 01472 } 01473 // ################################################# 01474 // Root level of tree. 01475 // ################################################# 01476 ($okay, $s_path) = &purge_full_path ($trace_start_file, $globe::nav_purge_path{$globe_path_purge}); 01477 if (0) { 01478 // print "before $trace_start_file\nokay=$okay after $s_path\n"; 01479 } 01480 01481 { // filler bracket 01482 // #### 01483 // This top title is part of the master and does not need to be 01484 // part of this. 01485 // #### 01486 // print (OUT_SCRIPT "Item level=1 "); 01487 // print (OUT_SCRIPT "image=nav_doc.gif "); 01488 // print (OUT_SCRIPT "url=$s_path,basefrm "); 01489 // print (OUT_SCRIPT "selected=YES "); 01490 // print (OUT_SCRIPT "text=$globe::org{$trace_start_file}{title}\r\n"); 01491 } // filler bracket 01492 01493 // ##### 01494 // Starting routine that can get called recursively 01495 // ##### 01496 // print "$globe_file_cnt Starting the building of scripts...\n"; 01497 &script_structure($trace_start_file); 01498 // print "\n $globe_file_cnt Ending the building of scripts...\n"; 01499 01500 01501 01502 // ################################################# 01503 // Clean up navigation. 01504 // ################################################# 01505 // close (OUT_SCRIPT); 01506 // undef (%globe_found_files); 01507 return (1); 01508 } // output_structure_script 01509 01510 01511 //############################################################################# 01512 /** @fn int purge_full_path 01513 ** @brief Changes the file name by purging a portion of its path. 01514 ** 01515 ** @param _filename original filename. 01516 ** @param part2rm portion of the path in the filename to remove. 01517 ** 01518 ** @return 1 and the updated filename if successful; 0 and the unmodified 01519 ** filename if unsuccessful. 01520 ** 01521 ** @ingroup tp_nav 01522 **/ 01523 // ############################################################################# 01524 int purge_full_path ( ) { 01525 #define $_filename $_[0] 01526 #define $part2rm $_[1] 01527 01528 if ($_filename =~ /$part2rm/){ 01529 $_filename =~ s/$part2rm//; 01530 return (1, $_filename); 01531 } else { 01532 return (0, $_filename); 01533 } 01534 } // purge_full_path 01535 01536 01537 //############################################################################# 01538 /** @fn int starting_point_script 01539 ** @brief Creates the top level TOC script that uses subscript references. 01540 ** 01541 ** @param htree_file The prefix of the script file to be output. 01542 ** @param which_script The key into the hash for whether we're creating a 01543 ** "script_#define" or "script_master" script. This had the appropriate file names. 01544 ** @param which_purge_path The key into the hash for the appropriate purge 01545 ** path depending upon whether or not this is a "#define" or "master" version. 01546 ** 01547 ** This knows about the top level entries and what they should be named. 01548 ** The names have been added to the appropriate hash. It can generate the 01549 ** master script that calls subscripts depending upon which version we're 01550 ** doing. 01551 ** 01552 ** The difference between a #define and master version are the paths for links. 01553 ** Normally, the other tools would handle this for me. However, because this 01554 ** is starting out already as a nested version, it does not integrate well. 01555 ** It is better to have this take care of all entries appropriately so that 01556 ** the other tools only has to integrate the top level script (through 01557 ** a master_nav file). 01558 ** 01559 ** @ingroup tp_nav 01560 **/ 01561 // ############################################################################# 01562 int starting_point_script ( ) { 01563 #define $htree_file $_[0] 01564 #define $which_script $_[1] 01565 #define $which_purge_path $_[2] 01566 01567 if (($htree_file =~ /(\.html)$/i) || ($htree_file =~ /(\.htm)$/i)){ 01568 @_name = split (/\.htm/i, $htree_file, 2); 01569 $htree_file = $_name[0]; 01570 } 01571 $htree_file .= ".script"; 01572 01573 if ((0) && ($htree_file =~ /\//)) { 01574 @_name = split (/\//, $htree_file); 01575 $_name[$// _name] = "tree_$_name[$#_name]"; 01576 $htree_file = join ("\/", @_name); 01577 } 01578 01579 // print "The new output TOC script file is \"$htree_file\".\n"; 01580 01581 unless (open(OUT_SCRIPT, ">$htree_file")) { 01582 push (@file_errors, "Cannot open file \"$htree_file\"."); 01583 return(0); 01584 } 01585 01586 if ($which_purge_path =~ /master/) { 01587 // The master level doesn't need a top level 01588 } else { 01589 // $globe::top_most_level_title 01590 ($okay, $strip_p) = &purge_full_path ($globe::top_most_level{url}, $globe::nav_purge_path{$which_purge_path}); 01591 // print (OUT_SCRIPT "Item level=1 "); 01592 // print (OUT_SCRIPT "image=nav_folderclosed.gif "); 01593 // // print (OUT_SCRIPT "url=$strip_p "); 01594 // print (OUT_SCRIPT "text=$globe::top_most_level{title}\r\n"); 01595 } 01596 01597 01598 $i = 0; 01599 foreach $_in_file (@xscope::top_files){ 01600 $globe::org{$_in_file}{title} =~ s/^[\s+]//; // get rid of leading spaces 01601 if ($which_purge_path =~ /master/) { 01602 // print (OUT_SCRIPT "Item level=1 "); 01603 } else { 01604 // print (OUT_SCRIPT "Item level=2 "); 01605 } 01606 // print (OUT_SCRIPT "image=nav_folderclosed.gif "); 01607 // print (OUT_SCRIPT "subscript=$globe::org{$_in_file}{$which_script}.script "); 01608 // print (OUT_SCRIPT "selected=YES "); 01609 // print (OUT_SCRIPT "text=$globe::org{$_in_file}{title}\r\n"); 01610 if (0) { 01611 // print "$i $_in_file\n\t$globe::org{$_in_file}{title}\n"; 01612 } 01613 $i++; 01614 } 01615 // close (OUT_SCRIPT); 01616 01617 } // starting_point_script 01618 01619 01620 01621 01622 01623 //############################################################################# 01624 /** @fn int script_structure 01625 ** @brief Traces through the data structure following the children and 01626 ** creates entries into the script file for the TOC. 01627 ** 01628 ** @param begin_file is the point to start in the data structure. 01629 ** 01630 ** For every begin_file, this outputs entries into the already open 01631 ** OUTSCRIPT file for each of its children. 01632 ** 01633 ** Whenever a child is encountered, it is tested (using globe_found_files) 01634 ** to see if it has already been processed. This prevents it looping forever. 01635 ** 01636 ** If a given child has not been traced, this calls itself with that child. 01637 ** 01638 // ** It was deemed desireable to still print out children entries even if 01639 ** we weren't going to trace them (because they already had been traced.) 01640 ** 01641 ** @note This is a RECURSIVE routine. It stops when all children at 01642 ** that level have had appropriate entries made and none have 01643 ** been flagged to be traced. 01644 ** 01645 ** @lim $globe_found_files{$begin_file} is used in a global way. 01646 ** $globe::nav_purge_path is also a global hash. The key into the hash 01647 ** ($globe_path_purge) was defined a level higher and used globally. 01648 ** 01649 ** @ingroup tp_nav 01650 **/ 01651 // ############################################################################# 01652 int script_structure ( ) { 01653 #define $begin_file $_[0] 01654 // $globe_path_purge is global from calling routine, so we don't have 01655 // to remember it. 01656 #define $child 01657 #define $strip_p "" 01658 #define $trace 0 01659 #define $f 01660 #define $_lev 01661 01662 if (0) { 01663 // print "script_structure $begin_file\n$globe::org{$begin_file}{title}\n"; 01664 } else { 01665 // purposely not terminated; shows that it is working 01666 // print "$globe_file_cnt."; 01667 } 01668 01669 01670 for ($f = 0; $f <= $// {$globe::org{$begin_file}{child}}; $f++){ 01671 ($okay, $strip_p) = &purge_full_path ($globe::org{$begin_file}{child}[$f]{url}, $globe::nav_purge_path{$globe_path_purge}); 01672 01673 // // okay to still print out the children even if we're not going to 01674 // trace them. 01675 // #### 01676 // We were getting wrong levels; now we assure that children are always only 01677 // // incremented one from the parent printing them out. 01678 // #### 01679 01680 // print (OUT_SCRIPT "Item level=$globe::org{$begin_file}{child}[$f]{level} "); 01681 // print (OUT_SCRIPT "image=nav_doc.gif "); 01682 // print (OUT_SCRIPT "url=$strip_p,basefrm "); 01683 // print (OUT_SCRIPT "selected=YES "); 01684 // print (OUT_SCRIPT "text=$globe::org{$begin_file}{child}[$f]{text}\r\n"); 01685 01686 $child = $globe::org{$begin_file}{child}[$f]{url}; 01687 if ($child =~ /\// /){ 01688 @_name = split (/\// /, $child, 2); 01689 $child = $_name[0]; 01690 } 01691 if (! (exists ($globe_found_files{$child}) )){ 01692 $globe_found_files{$child} = 1; 01693 $globe_file_cnt++; 01694 // #### 01695 // Recursive Call 01696 // #### 01697 &script_structure ($child); 01698 } 01699 01700 } // for each child 01701 01702 return (1); 01703 01704 } // script_structure 01705 01706 01707 01708 //############################################################################# 01709 /** @fn int handle_index_tokens 01710 ** @brief Traces through the data structure and outputs appropriate entries 01711 ** for the index. 01712 ** 01713 ** @param begin_file The starting point in the structure. 01714 ** 01715 ** @note This is a RECURSIVE routine. It creates appropriate entries for each 01716 ** of the children into the globe::index_info data structure, which later 01717 ** gets output to a file. 01718 ** 01719 ** @lim Does a convoluted thing to get the path to be truly integrated 01720 ** into the system. Also, it creates a helluva lot of entries for BSP (4022) 01721 ** before any word-chunking might be considered. 01722 ** 01723 ** @ingroup tp_nav 01724 **/ 01725 // ############################################################################# 01726 int handle_index_tokens ( ) { 01727 01728 #define $begin_file $_[0] 01729 #define $child 01730 #define $f 01731 01732 if (0) { 01733 // print "handle_index_tokens $begin_file\n$globe::org{$begin_file}{title}\n"; 01734 } 01735 01736 HONEY_CHILE: for ($f = 0; $f <= $// {$globe::org{$begin_file}{child}}; $f++){ 01737 $child = $globe::org{$begin_file}{child}[$f]{url}; 01738 if ($child =~ /\// /){ 01739 @_name = split (/\// /, $child, 2); 01740 $child = $_name[0]; 01741 } 01742 if (0) { 01743 // print " child $globe::org{$begin_file}{child}[$f]{text}"; 01744 // print " $globe::org{$begin_file}{child}[$f]{url}\n"; 01745 } 01746 // #### 01747 // 01748 // #### 01749 $path_temp $globe::nav_purge_path{#define} 01750 if ($globe::org{$begin_file}{child}[$f]{url} =~ /^\//) { 01751 // if the child begins with a slash, then remove the slash 01752 // from the path temp before combining the two. 01753 $path_temp = s/\/$//; 01754 } 01755 $path_temp .= $globe::org{$begin_file}{child}[$f]{url}; 01756 ($okay, $strip_p) = &purge_full_path ($path_temp, $globe::nav_purge_path{"master"}); 01757 01758 $n_temp = $globe::org{$begin_file}{child}[$f]{text}; 01759 01760 $n_temp .= $globe::word_url_boundary; 01761 $n_temp .= "<a href=\""; 01762 $n_temp .= $strip_p; 01763 $n_temp .= "\" target=\"basefrm\">"; 01764 01765 // #### 01766 // This is an attempt at efficiency. 01767 // It adds duplicates to the list, but they can be removed later, 01768 // rather than constantly looping within this already expensive 01769 // loop and recursive routine. 01770 // #### 01771 push (@globe::index_info, $n_temp); 01772 01773 if (! (exists ($globe_all_files{$child}) )) { 01774 $globe_all_files{$child} = 1; 01775 $globe_file_cnt++; 01776 // #### 01777 // Recursive Call 01778 // #### 01779 &handle_index_tokens ($child); 01780 } 01781 01782 } // for each child 01783 return (1); 01784 01785 } // handle_index_tokens 01786 01787 01788 01789 //############################################################################# 01790 /** @fn int html_file_update 01791 ** @brief Traces through the data structure, removes tagged sections, and 01792 ** inserts new tagged sections with navigation information and copyright 01793 ** stuff. 01794 ** 01795 ** @param begin_file The starting point in the structure. 01796 ** 01797 ** @note This is a RECURSIVE routine. It opens each HTML file and writes it 01798 ** back out. 01799 ** 01800 ** @ingroup tp_nav 01801 **/ 01802 // ############################################################################# 01803 int html_file_update ( ) { 01804 #define $begin_file $_[0] 01805 #define $child 01806 #define $f 01807 01808 if (1) { 01809 // // print "Updating HTML file $begin_file\n $globe::org{$begin_file}{title}\n"; 01810 // print "Updating HTML file $begin_file\n"; 01811 } 01812 01813 unless (open ( HTML_FILE, "$begin_file")) { 01814 push (@file_errors, "Cannot open file \"$begin_file\""); 01815 // exit(1); 01816 } 01817 01818 // undef ($globe::entire_file); 01819 // read in entire file 01820 while (<HTML_FILE>){ 01821 $globe::entire_file .= $_; 01822 } 01823 // close (HTML_FILE); 01824 01825 // Change the tags inside the file 01826 if (&change_nav){ 01827 // it assumes $globe::entire_file 01828 // assume success for now 01829 } 01830 01831 // Write out entire file after changing the tags inside the file. 01832 unless (open ( HTML_FILE, ">$begin_file")) { 01833 push (@file_errors, "Cannot open file \"$begin_file\""); 01834 return (0); 01835 } 01836 // print (HTML_FILE "$globe::entire_file"); 01837 // close (HTML_FILE); 01838 01839 for ($f = 0; $f <= $// {$globe::org{$begin_file}{child}}; $f++){ 01840 $child = $globe::org{$begin_file}{child}[$f]{url}; 01841 if ($child =~ /\// /){ 01842 @_name = split (/\// /, $child, 2); 01843 $child = $_name[0]; 01844 } 01845 if (0) { 01846 // print " lil chile $globe::org{$begin_file}{child}[$f]{text}"; 01847 // print " $globe::org{$begin_file}{child}[$f]{url}\n"; 01848 } 01849 01850 if (! (exists ($globe_all_files{$child}) )) { 01851 $globe_all_files{$child} = 1; 01852 $globe_file_cnt++; 01853 // #### 01854 // Recursive Call 01855 // #### 01856 &html_file_update ($child); 01857 } 01858 } // for each child 01859 01860 return (1); 01861 01862 } // html_file_update 01863 01864 //############################################################################# 01865 /** @fn int change_nav 01866 ** @brief Removes existing tags in the file and inserts new tags with the 01867 ** content from the master files. 01868 ** 01869 ** @return This updates the globe::entire_file with new information. 01870 ** 01871 ** @note This is different than the change_nav in the voyant_nav.pl 01872 ** program. 01873 ** 01874 ** @ingroup tp_nav 01875 **/ 01876 // ############################################################################# 01877 int change_nav ( ) { 01878 if (0){ 01879 // print "... changing the navigation.\n"; 01880 } 01881 01882 // #### 01883 // Make sure our input HTML file is well formed 01884 // #### 01885 $globe::entire_file =~ s/\<\/Head\>/\<\/head\>/g; 01886 $globe::entire_file =~ s/\<\/HEAD\>/\<\/head\>/g; 01887 $globe::entire_file =~ s/\<Head\>/\<head\>/g; 01888 $globe::entire_file =~ s/\<HEAD\>/\<head\>/g; 01889 $globe::entire_file =~ s/\<\/Body/\<\/body/g; 01890 $globe::entire_file =~ s/\<\/BODY/\<\/body/g; 01891 $globe::entire_file =~ s/\<Body/\<body/g; 01892 $globe::entire_file =~ s/\<BODY/\<body/g; 01893 $globe::entire_file =~ s/\<Link rel\=/\<link rel\=/g; 01894 $globe::entire_file =~ s/\<Link Rel\=/\<link rel\=/g; 01895 $globe::entire_file =~ s/\<LINK REL\=/\<link rel\=/g; 01896 $globe::entire_file =~ s/\<Link/\<link/g; 01897 $globe::entire_file =~ s/\<LINK/\<link/g; 01898 01899 01900 // Remove any style sheets that might be hanging out. 01901 // get rid of any old tags. 01902 01903 while ($globe::entire_file =~ /\<link/){ 01904 ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file, 01905 "<link", ">", 0, 1); 01906 if ($piece) { 01907 $globe::entire_file = join ("", $before, $after); 01908 } 01909 } 01910 while ($globe::entire_file =~ /\<body/){ 01911 ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file, 01912 "<body", ">", 0, 1); 01913 if (1) { 01914 $globe::entire_file = join ("", $before, $after); 01915 } 01916 } 01917 if ($globe::entire_file =~ /\<\/head>/){ // insert a <body> tag 01918 ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file, 01919 "</head", ">", 0, 1); 01920 if (1) { 01921 $globe::entire_file = join ("", $before, "</head><body>",$after); 01922 } 01923 } 01924 01925 01926 // Remove any style sheets that might be hanging out. 01927 // get rid of any old tags. 01928 &remove_voyant_tags; 01929 01930 01931 if (0) { 01932 // print "Finishing up here... premature.\n"; 01933 // // print $globe::entire_file; 01934 // print (OUT_HTML "$globe::entire_file"); 01935 // close (OUT_HTML); 01936 system ("cp $out_file $in_file"); 01937 return (0); 01938 } 01939 01940 01941 // ################# 01942 // voyant header needs to go before </head> 01943 // ################# 01944 $not_critical = 1; 01945 $reinsert_tags = 0; 01946 $def_type = "header"; 01947 if (! &file_chunk_change ($def_type, $not_critical, "</head>", 0, $reinsert_tags, $in_file) ) { 01948 // Could not put proper chunk in; write out what we have an abort. 01949 // // print (OUT_HTML "$globe::entire_file"); 01950 // close (OUT_HTML); 01951 if (0){ // debug 01952 // print "glenn3\n$globe::entire_file"; 01953 // exit(1); 01954 } // debug 01955 return(0); 01956 } // voyant_header 01957 01958 if (0){ // debug 01959 // print "glenn4\n$globe::entire_file"; 01960 // print "oh $def_type $globe::m_info{$def_type}\noh again\n"; 01961 // exit(1); 01962 } // debug 01963 01964 01965 // ################# 01966 // voyant footer needs to go before </body> 01967 // ################# 01968 $not_critical = 1; 01969 $reinsert_tags = 0; 01970 $def_type = "footer"; 01971 if ($globe::entire_file =~ /\<\/body\>/) { 01972 &file_chunk_change ($def_type, $not_critical, "<\/body>", 0, $reinsert_tags, $in_file); 01973 // Could not put proper chunk in; write out what we have an abort. 01974 } else { 01975 // Does not have proper body 01976 push (@file_errors, "ERROR: Does not have </body> in $in_file."); 01977 return (0); 01978 } // voyant footer 01979 01980 01981 // ################# 01982 // voyant common top navigation bar should go after <body> 01983 // Do AFTER FM or Doxygen navigation 01984 // ################# 01985 $not_critical = 0; 01986 $reinsert_tags = 0; 01987 $def_type = "nav_common"; 01988 01989 if (! &file_chunk_change ($def_type, $not_critical, "<body>", 1, $reinsert_tags, $in_file) ) { 01990 // Could not put proper chunk in; 01991 // Not necessarily a problem. 01992 } // Common Navigation 01993 01994 if (0){ // debug 01995 // print "glenn4\n$globe::entire_file"; 01996 // print "oh $def_type $globe::m_info{$def_type}\noh again\n"; 01997 // exit(1); 01998 } // debug 01999 02000 02001 02002 // ################# 02003 // Handle the PDF file names 02004 // ################# 02005 $temp = "$globe::rel_path_to_start_point$globe::pdf_dir$globe::master_order_pdf[$globe::master_order_key]"; 02006 $globe::entire_file =~ s/$globe::this_pdf_flag/$temp/g; 02007 02008 // ################# 02009 // Handle the relative path 02010 // glenn 02011 // ################# 02012 // $globe::rel_path_to_start_point_def = "../"; 02013 // $globe::rel_path_to_start_point = "./"; 02014 if (0) { 02015 // print "getting rid of path $begin_file\n"; 02016 } 02017 $_t_path = &purge_full_path ($begin_file, $globe::nav_purge_path{master}); 02018 @_a_path = split (/\//, $_t_path); 02019 pop (@_a_path); // because last element is file name 02020 $globe::rel_path_to_start_point = ""; 02021 for ($t=0; $t<=$// _a_path; $t++) { 02022 $globe::rel_path_to_start_point .= "..\/"; 02023 } 02024 // undef (@_a_path); 02025 // undef ($_t_path); 02026 if (0) { 02027 // print "new $_t_path\n$globe::rel_path_to_start_point\n"; 02028 } 02029 02030 $globe::entire_file =~ s/$globe::rel_path_to_start_point_def/$globe::rel_path_to_start_point/g; 02031 02032 // ################# 02033 // Handle the manual or group names 02034 // ################# 02035 $temp = $globe::master_order_title[$globe::master_order_key]; 02036 $temp =~ s/[\s]+/\ \;/g; 02037 02038 if ($globe::entire_file =~ /$globe::xmanual/){ 02039 $globe::entire_file =~ s/$globe::xmanual/$temp/g; 02040 } 02041 if ($globe::entire_file =~ /$globe::xgroup/){ 02042 $globe::entire_file =~ s/$globe::xgroup/$temp/g; 02043 } 02044 $globe::entire_file =~ s/$globe::fix_path_flag/$globe::fix_path_to/g; 02045 02046 // #### 02047 // Take care of current file name which might be in header, 02048 // as specified in template and needed in tree applet. 02049 // #### 02050 if ($globe::entire_file =~ /$globe::m_var{define}{curr2}/){ 02051 $globe::entire_file =~ s/$globe::m_var{define}{curr2}/$rel_to_file/g; 02052 } 02053 02054 02055 // ################# 02056 // Handle the document number 02057 // ################# 02058 if ($globe::entire_file =~ /$globe::voy_variable_doc_num/){ 02059 $globe::entire_file =~ s/$globe::voy_variable_doc_num/$globe::m_info{_array_order}[$globe::master_order_key]{num}/; 02060 } 02061 02062 02063 // ################################ 02064 // Remove certain specified HTML tags 02065 // ################################ 02066 if ((@globe::voy_html_zap < 1) || ($globe::voy_html_zap[0] =~ /none/)){ 02067 // Do nothing 02068 // // print "$in_file has no tags to zap.\n"; 02069 } else { 02070 // Remove the tags in question 02071 for ($i=0; $i<@globe::voy_html_zap; $i++){ 02072 // Determine start and Stop tags 02073 @zap_tag = split ( /\,/, $globe::voy_html_zap[$i]); 02074 // clean up tag 02075 for ($j=0; $j <@zap_tag; $j++){ 02076 $zap_tag[$j] =~ s/^[\s]*\"//; 02077 $zap_tag[$j] =~ s/\"[\s]*$//; 02078 // // print "Zap this =$zap_tag[$j]\n"; 02079 } 02080 $t_cnt=0; 02081 while ($globe::entire_file =~ /$zap_tag[0]/i){ // case insensitive test 02082 $t_cnt++; 02083 // print "$t_cnt: Removing everything between \"$zap_tag[0]\" and \"$zap_tag[1]\"\n"; 02084 // First tag part A 02085 @pre_chunks = split ( /$zap_tag[0]/, $globe::entire_file, 2); 02086 // First tag part B 02087 @post_chunks = split ( /$zap_tag[1]/, $pre_chunks[1], 2); 02088 if (@zap_tag > 2){ 02089 // Second tag 02090 // Hit the first occurrence of the ending tag 02091 $post_chunks[1] =~ s/$zap_tag[2]//i; // do it only once, case insensitive 02092 } 02093 // rebuild 02094 $globe::entire_file = join ("", $pre_chunks[0], $post_chunks[1]); 02095 } // while it still is infested with the tag 02096 } // for all zap tags 02097 // // print "$in_file was tested for tags to zap.\n"; 02098 } // if zap tags to remove 02099 // undef(@pre_chunks); 02100 // undef(@post_chunks); 02101 // ################################ 02102 02103 } // change_nav 02104 02105 02106 02107 //############################################################################# 02108 /** @fn int remove_voyant_tags 02109 ** 02110 ** 02111 ** 02112 **/ 02113 // ############################################################################# 02114 int remove_voyant_tags ( ) { 02115 02116 // Remove any style sheets that might be hanging out. 02117 // get rid of any old tags. 02118 while ($globe::entire_file =~ $globe::m_define{header}[0]){ 02119 ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file, 02120 $globe::m_define{header}[0], 02121 $globe::m_define{header}[1], 02122 0, 1); 02123 if ($piece) { 02124 $globe::entire_file = join ("", $before, $after); 02125 } 02126 } 02127 while ($globe::entire_file =~ $globe::m_define{nav_book}[0]){ 02128 ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file, 02129 $globe::m_define{nav_book}[0], 02130 $globe::m_define{nav_book}[1], 02131 0, 1); 02132 if ($piece) { 02133 $globe::entire_file = join ("", $before, $after); 02134 } 02135 } 02136 while ($globe::entire_file =~ $globe::m_define{nav_common}[0]){ 02137 ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file, 02138 $globe::m_define{nav_common}[0], 02139 $globe::m_define{nav_common}[1], 02140 0, 1); 02141 if ($piece) { 02142 $globe::entire_file = join ("", $before, $after); 02143 } 02144 } 02145 while ($globe::entire_file =~ $globe::m_define{footer}[0]){ 02146 ($before, $piece, $after) = &globe::get_tag_chunk( $globe::entire_file, 02147 $globe::m_define{footer}[0], 02148 $globe::m_define{footer}[1], 02149 0, 1); 02150 if ($piece) { 02151 $globe::entire_file = join ("", $before, $after); 02152 } 02153 } 02154 02155 02156 02157 } // remove_voyant_tags 02158 02159 02160 02161 02162 //############################################################################# 02163 /** @fn int file_chunk_change 02164 ** @brief Replaces the information for the given chunk. (Not complete). 02165 ** 02166 ** @param _type_define The key into the m_define and m_info hashes. 02167 ** @param _criticality specifies whether or not to record error messages. 02168 ** @param _where_flag The tag where this is supposed to go. 02169 ** @param _after if 0, set before, otherwise after 02170 ** @param _in_file Name of file being worked on. 02171 ** 02172 ** @retval success 1 if found tags, 0 if missing. 02173 ** 02174 ** @lim Uses the $globe::entire_file 02175 ** @ingroup tp_toc 02176 **/ 02177 // ############################################################################# 02178 int file_chunk_change ( ) { 02179 $_type_define = $_[0]; 02180 $_criticality = $_[1]; 02181 $_where_flag = $_[2]; 02182 $_after = $_[3]; 02183 $_insert_tags = $_[4]; // pass through 02184 $_in_file = $_[5]; 02185 02186 if (0) { 02187 // print "file_chunk_change $_type_define $globe::m_define{$_type_define}[0]\n$_in_file\n"; 02188 } 02189 if (( $globe::entire_file =~ /$globe::m_define{$_type_define}[1]/ ) && ( $globe::entire_file =~ /$globe::m_define{$_type_define}[0]/ ) ) { 02190 ($okay, $globe::entire_file) = &globe::replace_tag_chunk ( $globe::entire_file, 02191 $globe::m_define{$_type_define}[0], 02192 $globe::m_define{$_type_define}[1], 02193 $globe::m_info{$_type_define}, $_insert_tags, $_criticality); 02194 if (!$okay) { 02195 push (@file_errors, "$_type_define definition messed up in $in_file."); 02196 } 02197 if (0){ 02198 // print "file_chunk_change ok=$okay\n"; 02199 } 02200 02201 } else { 02202 // Figure out where the $_where_flag should go 02203 if ( $globe::entire_file =~ /$_where_flag/ ) { 02204 @pre_chunks = split ( /$_where_flag/, $globe::entire_file, 2); 02205 if ($_after) { 02206 // $globe::entire_file = sprintf("%s%s%s%s", 02207 $pre_chunks[0], 02208 $_where_flag, 02209 $globe::m_info{$_type_define}, 02210 $pre_chunks[1]); 02211 } else { 02212 // $globe::entire_file = sprintf("%s%s%s%s", 02213 $pre_chunks[0], 02214 $globe::m_info{$_type_define}, 02215 $_where_flag, 02216 $pre_chunks[1]); 02217 } 02218 } else { 02219 push (@file_errors, "WARNING: No $_where_flag defined in $_in_file."); 02220 // Do only if no head/body tags 02221 // print "WARNING: No $_where_flag defined.\n"; 02222 return (0); 02223 } // figuring out where it should go 02224 } 02225 return (1); 02226 02227 } // file_chunk_change 02228 02229 02230 02231 02232 02233 02234 //############################################################################# 02235 /** @fn int html_exist 02236 ** @brief Tests that the URL to a file exists. 02237 ** 02238 ** @param test_file File to test to see if it exists. 02239 ** 02240 ** Discovered that doxygen's installdox does not update the tree.js files, 02241 ** which leads to entries in the TOC that 404. Test it and return an 02242 ** appropriate response. 02243 ** 02244 ** @ingroup tp_nav 02245 **/ 02246 // ############################################################################# 02247 int html_exist ( ) { 02248 // Test the damn URL to make sure the file even exists. 02249 $test_file = $_[0]; 02250 02251 unless (open(TEST, "$globe::path$test_file")) { 02252 if (0){ 02253 // print "$globe::path$test_file doesn't exist.\n"; 02254 } 02255 return(0); 02256 } 02257 // close (TEST); 02258 return (1); 02259 02260 02261 } // html_exist 02262 02263 02264 //############################################################################# 02265 /** @fn int END 02266 ** @brief Code to execute when first entered. 02267 ** 02268 ** @param None. 02269 ** 02270 ** @return None. 02271 ** 02272 ** @lim None 02273 ** @ingroup tp_nav 02274 **/ 02275 // ############################################################################# 02276 int END ( ) { 02277 &globe::do_errors; 02278 // undef ($_file_list); // "_file_list"; 02279 // undef ($_index_file); // = "_index_list"; 02280 // undef ($in_file); // ""; 02281 // undef ($f_type); // "htm"; 02282 02283 02284 // ############################################################################# 02285 // # Memory clean-up. 02286 // ############################################################################# 02287 if ($no_scope_file > 0){ 02288 &xscope::memory_clean_up(); 02289 &globe::memory_clean_up(); 02290 } 02291 02292 if (@file_errors) { 02293 // print "\n============ Summary of errors =================================b\n"; 02294 for ($i=0; $i<@file_errors; $i++){ 02295 // print "$i = $file_errors[$i]\n"; 02296 } 02297 } 02298 02299 // print "\n============ Finished html_look_integrate.pl =================================\n"; 02300 } // END 02301 02302
|
|
|
Open-Source tools compliments of Voyant Technologies, Inc. and Glenn C. Maxey.
01/13/2003
TP Tools v2-00-0a
# tpt-perl-hcr-02