|
|
|
|
|
00001 // This file has been modified on-the-fly with an input filter 00002 // to change it from Perl syntax to C++ strictly for the purposes 00003 // of faking out Doxygen. Modifications include: 00004 00005 // - changing local() definitions to C++ #define statements. 00006 // - commenting out undef statements. 00007 // - changing $globe'... variable names to $globe_... 00008 // - changing sub statements to look like C++ functions. 00009 // - changing # comments to C++ comments. 00010 // - ... 00011 00012 // If you see other strangeness in the HTML version of the Perl file, 00013 // it comes from getting it to look more C++ like. 00014 00015 00016 // #!/usr/#define/bin/perl 00017 00018 ///////////////////////////////////////////////////////////////////////////////// 00019 /** @file 00020 ** @brief Strip out all HTML except for href's. 00021 ** 00022 ** 00023 ** @param Input source file. 00024 ** @return Output is file with changes. 00025 ** 00026 ** @ingroup tp_tools tp_dox 00027 ** 00028 ** @author Glenn C. Maxey 00029 **/ 00030 // # 00031 //// $Id: strip_html.pl,v 1.1 2002/02/28 00:43:01 gmaxe Exp $ 00032 //// 00033 //// 2002 Created by Voyant Technologies, Inc., Westminster, Colorado, USA. 00034 //// 00035 //// Permission to use, copy, modify, and distribute this software and its 00036 //// documentation under the terms of the GNU General Public License is hereby 00037 //// granted. No representations are made about the suitability of this software 00038 //// for any purpose. It is provided "as is" without express or implied warranty. 00039 //// See the GNU General Public License (http://www.gnu.org/copyleft/gpl.html) 00040 //// for more details. 00041 //// 00042 //// Documents produced by this script are derivative works derived from the 00043 //// input used in their production; they are not affected by this license. 00044 //// 00045 //// $Log: strip_html.pl,v $ 00046 //// Revision 1.1 2002/02/28 00:43:01 gmaxe 00047 //// New file for stripping down an HTML file for purposes of a list. 00048 //// 00049 //// Revision 1.1 2002/02/16 00:59:40 gmaxe 00050 //// Added support for script files needed for TOC applet. 00051 //// 00052 //// 00053 ///////////////////////////////////////////////////////////////////////////////// 00054 00055 BEGIN { 00056 $href = ""; // hyperlink stuff 00057 $separator0 = "\;0\;"; 00058 $separator1 = "\;1\;"; 00059 $line_count = 0; 00060 $in_buffer = ""; 00061 $out_buffer = ""; 00062 } 00063 00064 00065 //#### 00066 // main program 00067 //#### 00068 { 00069 NEW_LINE: while (<>) { 00070 // Read in entire file into buffer 00071 $in_buffer .= $_; 00072 } 00073 if (0) { 00074 // print "We think we read it in.\n"; 00075 // print $in_buffer; 00076 } 00077 00078 $out_buffer = $in_buffer; 00079 00080 while ($out_buffer =~ /\</) { 00081 @html_tag0 = split (/\</, $out_buffer, 2); 00082 $out_buffer .= $html_tag0[0]; 00083 00084 if ($html_tag0[1] =~ /\>/) { 00085 @html_tag1 = split (/\>/, $html_tag0[1], 2); 00086 } else { 00087 // print "ERROR: Unmatching < and >\n"; 00088 } 00089 if (($html_tag1[0] =~ /^a[\s+]/i) && ($html_tag1[0] =~ /href/i)) { 00090 $href = "$separator0$html_tag1[0]$separator1"; 00091 if (0){ 00092 // print "href:$href\n"; 00093 } 00094 $href = &get_rid_of_tag_params ($href, "class\="); 00095 $href = &get_rid_of_tag_params ($href, "target\="); 00096 // get rid of carriage returns from within the hyperlink 00097 $href =~ s/\n//g; 00098 if (0){ 00099 // print "href:$href\n"; 00100 } 00101 } else { 00102 $href = ""; 00103 } 00104 // rebuild line without what's in the middle, except for href. 00105 $out_buffer = "$html_tag0[0]$href$html_tag1[1]"; 00106 } // while there are HTML tags to remove 00107 00108 $out_buffer =~ s/$separator0/\</g; 00109 $out_buffer =~ s/$separator1/\>/g; 00110 $out_buffer =~ s/\r//g; 00111 while ($out_buffer =~ /[\s+]\n$/){ 00112 $out_buffer =~ s/[\s+]\n$/\n/g; 00113 } 00114 while ($out_buffer =~ /[\s*]\n[\s*]\n/){ 00115 $out_buffer =~ s/[\s*]\n[\s*]\n/\n/g; 00116 $out_buffer =~ s/\n/\r\n/g; 00117 } 00118 00119 00120 if (1) { 00121 // The true output; conditional helps in trouble shooting. 00122 // print $out_buffer; 00123 } 00124 00125 } // main program 00126 00127 00128 00129 00130 //####### 00131 // get_rid_of_tag_params 00132 //####### 00133 int get_rid_of_tag_params ( ) { 00134 $tag = $_[0]; 00135 $param = $_[1]; 00136 $delimiter = "\""; 00137 00138 @section0 = split (/[\s]+$param/i, $tag, 2); 00139 @section1 = split (/$delimiter/, $section0[1], 3); 00140 00141 // rebuild 00142 $out = "$section0[0]$section1[2]"; 00143 return ($out); 00144 } // get_rid_of_tag_param 00145 00146 00147 00148 00149 00150 00151
|
|
|
Open-Source tools compliments of Voyant Technologies, Inc. and Glenn C. Maxey.
01/13/2003
TP Tools v2-00-0a
# tpt-perl-hcr-02