-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbm2html.pl
253 lines (195 loc) · 5.86 KB
/
bm2html.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
#!/usr/bin/perl -s
#
# File:
# bm2html.pl
#
# Version:
# 1.0 - June 15, 2000
# 2.0 - August 05, 2000
# 2.1 - August 24, 2000
# 3.0 - November 03, 2001 (never released)
# 3.1 - March 23, 2002
# 3.2 - September 2003
#
# Author:
# Herb Wolfe, Jr.
# hwolfe@inetnebr.com
# http://incolor.inetnebr.com/hwolfe/
# http://incolor.inetnebr.com/hwolfe/computer/mysoftware/bm2html
#
# Description:
# This program strips Netscape's extra tags from a bookmark file and
# creats a links.html file from the output. It also provides options
# to create a yahoo-like group of pages, or a main links.html file
# and links to separate files for each top level folder.
#
# It is best run in a temp directory, so the files can be validated
# and/or formatted before copying to the actual web directory. It does
# produce valid HTML, according to both weblint and tidy. However, it
# does not correct any errors already present, notable unescaped "&'s".
#
# Also there is currently no ability to overwrite files.
#
# See also the accompaning readme.txt
#
# Options:
# -y: Create a yahoo-like directory
# -o: Create one level of files
# -x: Chop a 2nd trailng character, if using on dos/windows file
# -css: Use an external stylesheet
#
# Initialize default values
$scriptname = "bm2html.pl";
$username = "Herb Wolfe";
$headerfile = "header.html";
$footerfile = "footer.html";
$defaultfile = "bookmark.htm";
$outfile = "links";
$ext = ".shtml";
$bgc = "white";
$textc = "black";
$linkc = "blue";
$vlinkc = "maroon";
$level = 0;
# Change to name of your style sheet
#$cssfile ="~hwolfe/mystyle.css";
$cssfile = $css;
# Check command line parameters
$y && $o && die ("-y and -o options can not be used together.\n");
# If file not specified, use $defaultfile
$infile = $ARGV[0] || $defaultfile;
die ("$infile not found.\n") unless ( -e $infile);
die ("$outfile$ext already exists\n") if ( -e $outfile.$ext) ;
# get footer
if ( -e $footerfile ) {
open (FTR, $footerfile);
@ftr = <FTR>;
} else {
@ftr = ("</body>\n", "</html>\n");
}
open (IN, $infile ) || die ( "Error opening $infile: $!");
&ProcessBookMarks("", $outfile, $level);
close (IN);
#-------------------------------------------------
sub ProcessBookMarks {
local ( $currdir, $oldtitle, $level ) = @_;
local ( $newtitle ) = "";
open (CURROUT, ">>$currdir$oldtitle$ext") ||
die ("Error creating output file $currdir$oldtitle$ext\n");
while ( <IN> ) {
# Skip the DOCTYPE, and header, since we print them elsewhere
# Skip blank lines and lines with just <DD>
next if ( ( /DOCTYPE/ ) || ( /<H1>/ ) || ( /^$/ ) ||
( /^ *<DD> *$/ ) );
# Chop the trailing newline character(s).
$x ? substr($_,-2) = '' : chop;
# Remove comments
if ( /<!--/ ) {
while ( ! /-->/ ) {
$_ = <IN>;
}
next;
}
# Print the title out
if ( /TITLE/ ) {
($title) = />([^<]+)</;
&PrintHeader(CURROUT, $title);
next;
}
# Strip netscape's tags
s/(A HREF="[^"]+")[^>]*/$1/;
# Convert the Definition Term tags to List Entry tags for better HTML
s/<DT>/<LI>/;
# Strip <DD>, since we're converting to an unordered list
s/<DD>//;
if ( /<H3/ ) {
# Strip the netscape junk, and convert to <BIG> to fit in lists properly
s/<H3[^>]+>/<H3>/;
s/H3>/BIG>/g;
# Since we're here, check if we need to create subdirectories and/or files
($newtitle) = />([^<]+)</;
# Replace all spaces and / with a single _
$newtitle =~ tr# /#_#s;
# If yahoo option, or one-level option, and at the first level
if ( ( $y ) || ( $o && ( $level == 1 ) ) ) {
# Create a file, and new directory if necessary,
# for "folders" and a link to the new file
if ( $y ) {
$newdir = "$currdir$newtitle/";
printf CURROUT ("\t<LI><A HREF=\"%s/%s%s\">%s</a>\n",
$newtitle, $newtitle, $ext, $newtitle);
} else {
$newdir = $currdir;
printf CURROUT ("\t<LI><A HREF=\"%s%s%s\">%s</a>\n",
$newdir, $newtitle, $ext, $newtitle);
}
close CURROUT;
if ( $y ) {
mkdir ($newdir, 0766) ||
die ("Error creating directory $newdir\n");
}
open (NEWFILE, ">$newdir$newtitle$ext") ||
die ("Error creating file $newdir$newtitle$ext\n");
&PrintHeader(NEWFILE, $newtitle);
close NEWFILE;
&ProcessBookMarks($newdir, $newtitle, $level);
open (CURROUT, ">>$currdir$oldtitle$ext");
} else {
print CURROUT "$_\n";
}
} else {
# Increment the level count because we have a new one
# Also convert the Definition List to an Unordered List
if ( /<DL>/ ) {
$level++;
s/( *)<DL><p>/$1<UL>/;
}
# Decrement the level count when the end is reached
# Stop if the top level is reached, or if doing yahoo-style
if ( m#</DL># ) {
$level--;
s#</DL><p>#</UL>#;
if ( $y || ( $o && ($level <= 1) ) || ( $level == 0 ) ) {
print CURROUT "$_\n@ftr";
close CURROUT;
last;
}
}
print CURROUT "$_\n";
}
}
}
#-------------------------------------------------
sub PrintHeader {
local( $OFILE, $title ) = @_;
if ( -e $headerfile ) {
open (HDR, $headerfile);
@hdr = <HDR>;
print $OFILE (@hdr);
close HDR;
} elsif ($css) {
print $OFILE ("<!DOCTYPE HTML PUBLIC ".
"\"-//W3C//DTD HTML 4.01 Transitional//EN\">\n<HTML>\n");
} else {
print $OFILE ("<!DOCTYPE HTML PUBLIC ".
"\"-//W3C//DTD HTML 3.2//EN\">\n<HTML>\n");
}
# Add username to title
$mytitle = "$title - $username";
# Print the title
if ($css) {
printf $OFILE ("<!-- This file generated by %s -->\n".
"<HEAD>\n<TITLE>%s</TITLE>\n".
"<meta http-equiv=\"Content-Type\" ".
"content=\"text/html; charset=us-ascii\">\n".
"<LINK REL=\"STYLESHEET\" HREF=\"$cssfile\" TYPE=\"TEXT/CSS\">\n".
"</HEAD>\n<BODY>\n<H1>%s</H1>\n",
$scriptname, $mytitle, $title);
} else {
printf $OFILE ("<!-- This file generated by %s -->\n".
"<HEAD><TITLE>%s</TITLE></HEAD>\n".
"<BODY BGCOLOR=%s TEXT=%s LINK=%s VLINK=%s>\n".
"<H1 ALIGN="CENTER">%s</H1>\n",
$scriptname, $mytitle, $bgc, $textc, $linkc, $vlinkc, $title);
}
}