-
Notifications
You must be signed in to change notification settings - Fork 79
/
Copy pathParseHTML.cs
124 lines (119 loc) · 2.95 KB
/
ParseHTML.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
using System;
namespace SensePost.Wikto
{
/// <summary>
/// Summary description for ParseHTML.
///
/// This spider is copyright 2003 by Jeff Heaton. However, it is
/// released under a Limited GNU Public License (LGPL). You may
/// use it freely in your own programs. For the latest version visit
/// http://www.jeffheaton.com.
/// ****************************************************************
/// Changes by SensePost (Pty) Ltd. - Ian de Villiers
/// http://www.sensepost.com
///
/// Fixed the somewhat broken monitor methods causing thread issues
/// on VS2005 .Net where threads would not terminate.
///
/// Removed the Done (Monitor) Class. Not entirely neccessary and
/// actually causes more problems than it assists with in VS 2005.
/// </summary>
public class ParseHTML:Parse
{
public AttributeList GetTag()
{
AttributeList tag = new AttributeList();
tag.Name = m_tag;
foreach(Attribute x in List) tag.Add((Attribute)x.Clone());
return tag;
}
public String BuildTag()
{
String buffer="<";
buffer+=m_tag;
int i=0;
while ( this[i]!=null )
{
buffer+=" ";
if ( this[i].Value == null )
{
if ( this[i].Delim!=0 ) buffer+=this[i].Delim;
buffer+=this[i].Name;
if ( this[i].Delim!=0 ) buffer+=this[i].Delim;
}
else
{
buffer+=this[i].Name;
if ( this[i].Value!=null )
{
buffer+="=";
if ( this[i].Delim!=0 ) buffer+=this[i].Delim;
buffer+=this[i].Value;
if ( this[i].Delim!=0 ) buffer+=this[i].Delim;
}
}
i++;
}
buffer+=">";
return buffer;
}
protected void ParseTag()
{
m_tag="";
Clear();
if ( (GetCurrentChar()=='!') && (GetCurrentChar(1)=='-')&& (GetCurrentChar(2)=='-') )
{
while ( !Eof() )
{
if ( (GetCurrentChar()=='-') && (GetCurrentChar(1)=='-')&& (GetCurrentChar(2)=='>') )
break;
if ( GetCurrentChar()!='\r' ) m_tag+=GetCurrentChar();
Advance();
}
m_tag+="--";
Advance();
Advance();
Advance();
ParseDelim = (char)0;
return;
}
while ( !Eof() )
{
if ( IsWhiteSpace(GetCurrentChar()) || (GetCurrentChar()=='>') ) break;
m_tag+=GetCurrentChar();
Advance();
}
EatWhiteSpace();
while ( GetCurrentChar()!='>' )
{
ParseName = "";
ParseValue = "";
ParseDelim = (char)0;
ParseAttributeName();
if ( GetCurrentChar()=='>' )
{
AddAttribute();
break;
}
ParseAttributeValue();
AddAttribute();
}
Advance();
}
public char Parse()
{
if( GetCurrentChar()=='<' )
{
Advance();
char ch=char.ToUpper(GetCurrentChar());
if ( (ch>='A') && (ch<='Z') || (ch=='!') || (ch=='/') )
{
ParseTag();
return (char)0;
}
else return(AdvanceCurrentChar());
}
else return(AdvanceCurrentChar());
}
}
}