-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawlingxkcd.py
65 lines (65 loc) · 18.2 KB
/
crawlingxkcd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd">
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="qrichtext" content="1" /><style type="text/css">
p, li { white-space: pre-wrap; }
</style></head><body style=" font-family:'Inconsolata'; font-size:14pt; font-weight:400; font-style:normal;">
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">Welcome to Canopy's interactive data-analysis environment!</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"> with pylab-backend set to: qt</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">Type '?' for more information.</p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000080;">In [</span><span style=" font-weight:600; color:#000080;">1</span><span style=" color:#000080;">]:</span> %run "/tmp/tmpgQYQKM.py"</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">('http://udacity.com', 37)</p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000080;">In [</span><span style=" font-weight:600; color:#000080;">2</span><span style=" color:#000080;">]:</span> %run "/tmp/tmpbNdx4f.py"</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#4682b4;"> File </span><span style=" color:#006400;">"/tmp/tmpbNdx4f.py"</span><span style=" color:#4682b4;">, line </span><span style=" color:#006400;">16</span><br /><span style=" color:#a52a2a;"> else:</span><br /><span style=" color:#000000;"> ^</span><br /><span style=" color:#8b0000;">SyntaxError:</span> invalid syntax<br /> </p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000080;">In [</span><span style=" font-weight:600; color:#000080;">3</span><span style=" color:#000080;">]:</span> %run "/tmp/tmp1eUgle.py"</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#4682b4;"> File </span><span style=" color:#006400;">"/tmp/tmp1eUgle.py"</span><span style=" color:#4682b4;">, line </span><span style=" color:#006400;">18</span><br /><span style=" color:#a52a2a;"> print print_all_links('this <a href="test1">link 1</a> is <a href="test2">link</span><br /><span style=" color:#000000;"> ^</span><br /><span style=" color:#8b0000;">SyntaxError:</span> EOL while scanning string literal<br /> </p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000080;">In [</span><span style=" font-weight:600; color:#000080;">4</span><span style=" color:#000080;">]:</span> %run "/tmp/tmpJTK8f0.py"</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#8b0000;">---------------------------------------------------------------------------</span><br /><span style=" color:#8b0000;">NameError</span> Traceback (most recent call last)<br /><a href="file:///home/lenovo/Downloads/canopy/appdata/canopy-1.3.0.1715.rh5-x86_64/lib/python2.7/site-packages/IPython/utils/py3compat.py#line=204"><span style=" text-decoration: underline; color:#0000ff;">/home/lenovo/Downloads/canopy/appdata/canopy-1.3.0.1715.rh5-x86_64/lib/python2.7/site-packages/IPython/utils/py3compat.pyc</span></a> in <span style=" color:#4682b4;">execfile</span><span style=" color:#00008b;">(fname, *where)</span><br /><span style=" color:#006400;"> 202</span> <span style=" color:#006400;">else</span><span style=" color:#a52a2a;">:</span><br /><span style=" color:#006400;"> 203</span> filename <span style=" color:#a52a2a;">=</span> fname<br /><span style=" color:#006400;">--> 204</span><span style=" color:#a52a2a;"> </span>__builtin__<span style=" color:#a52a2a;">.</span>execfile<span style=" color:#a52a2a;">(</span>filename<span style=" color:#a52a2a;">,</span> <span style=" color:#a52a2a;">*</span>where<span style=" color:#a52a2a;">)</span><br /><br /><a href="file:///tmp/tmpJTK8f0.py#line=18"><span style=" text-decoration: underline; color:#0000ff;">/tmp/tmpJTK8f0.py</span></a> in <span style=" color:#4682b4;"><module></span><span style=" color:#00008b;">()</span><br /><span style=" color:#006400;"> 16</span> <span style=" color:#006400;">else</span><span style=" color:#a52a2a;">:</span><br /><span style=" color:#006400;"> 17</span> <span style=" color:#006400;">break</span><br /><span style=" color:#006400;">---> 18</span><span style=" color:#a52a2a;"> </span><span style=" color:#006400;">print</span> print_all_links<span style=" color:#a52a2a;">(</span><span style=" color:#00008b;">'this <a href="test1">link 1</a> is <a href="test2">link2</a>a <a href="test3">link 3</a>'</span><span style=" color:#a52a2a;">)</span><br /><span style=" color:#006400;"> 19</span> <br /><span style=" color:#006400;"> 20</span> <br /><br /><a href="file:///tmp/tmpJTK8f0.py#line=12"><span style=" text-decoration: underline; color:#0000ff;">/tmp/tmpJTK8f0.py</span></a> in <span style=" color:#4682b4;">print_all_links</span><span style=" color:#00008b;">(page)</span><br /><span style=" color:#006400;"> 10</span> <span style=" color:#006400;">def</span> print_all_links<span style=" color:#a52a2a;">(</span>page<span style=" color:#a52a2a;">):</span><br /><span style=" color:#006400;"> 11</span> <span style=" color:#006400;">while</span> True<span style=" color:#a52a2a;">:</span><br /><span style=" color:#006400;">---> 12</span><span style=" color:#a52a2a;"> </span>url<span style=" color:#a52a2a;">,</span>endpos <span style=" color:#a52a2a;">=</span> get_nexttarget<span style=" color:#a52a2a;">(</span>page<span style=" color:#a52a2a;">)</span><br /><span style=" color:#006400;"> 13</span> <span style=" color:#006400;">if</span> url<span style=" color:#a52a2a;">:</span><br /><span style=" color:#006400;"> 14</span> <span style=" color:#006400;">print</span> url<br /><br /><span style=" color:#8b0000;">NameError</span>: global name 'get_nexttarget' is not defined </p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000080;">In [</span><span style=" font-weight:600; color:#000080;">5</span><span style=" color:#000080;">]:</span> %run "/tmp/tmp1alZK3.py"</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">test1</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">test2</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">test3</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">None</p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000080;">In [</span><span style=" font-weight:600; color:#000080;">6</span><span style=" color:#000080;">]:</span> %run "/var/www/python output/python/getting_links.py"</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#8b0000;">---------------------------------------------------------------------------</span><br /><span style=" color:#8b0000;">NameError</span> Traceback (most recent call last)<br /><a href="file:///home/lenovo/Downloads/canopy/appdata/canopy-1.3.0.1715.rh5-x86_64/lib/python2.7/site-packages/IPython/utils/py3compat.py#line=204"><span style=" text-decoration: underline; color:#0000ff;">/home/lenovo/Downloads/canopy/appdata/canopy-1.3.0.1715.rh5-x86_64/lib/python2.7/site-packages/IPython/utils/py3compat.pyc</span></a> in <span style=" color:#4682b4;">execfile</span><span style=" color:#00008b;">(fname, *where)</span><br /><span style=" color:#006400;"> 202</span> <span style=" color:#006400;">else</span><span style=" color:#a52a2a;">:</span><br /><span style=" color:#006400;"> 203</span> filename <span style=" color:#a52a2a;">=</span> fname<br /><span style=" color:#006400;">--> 204</span><span style=" color:#a52a2a;"> </span>__builtin__<span style=" color:#a52a2a;">.</span>execfile<span style=" color:#a52a2a;">(</span>filename<span style=" color:#a52a2a;">,</span> <span style=" color:#a52a2a;">*</span>where<span style=" color:#a52a2a;">)</span><br /><br /><a href="file:///var/www/python output/python/getting_links.py#line=19"><span style=" text-decoration: underline; color:#0000ff;">/var/www/python output/python/getting_links.py</span></a> in <span style=" color:#4682b4;"><module></span><span style=" color:#00008b;">()</span><br /><span style=" color:#006400;"> 17</span> <span style=" color:#006400;">break</span><br /><span style=" color:#006400;"> 18</span> <span style=" color:#8b0000;">#print print_all_links('this <a href="test1">link 1</a> is <a href="test2">link2</a>a <a href="test3">link 3</a>')</span><br /><span style=" color:#006400;">---> 19</span><span style=" color:#a52a2a;"> </span><span style=" color:#006400;">print</span> print_all_links<span style=" color:#a52a2a;">(</span>get_page<span style=" color:#a52a2a;">(</span><span style=" color:#00008b;">'http://xkcd.com/353'</span><span style=" color:#a52a2a;">))</span><br /><span style=" color:#006400;"> 20</span> <br /><span style=" color:#006400;"> 21</span> <span style=" color:#8b0000;">#print get_next_target('this is a <a href="http://udacity.com">link!</a>')</span><br /><br /><span style=" color:#8b0000;">NameError</span>: name 'get_page' is not defined </p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000080;">In [</span><span style=" font-weight:600; color:#000080;">7</span><span style=" color:#000080;">]:</span> %run "/var/www/python output/python/getting_links.py"</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#4682b4;"> File </span><span style=" color:#006400;">"/var/www/python output/python/getting_links.py"</span><span style=" color:#4682b4;">, line </span><span style=" color:#006400;">20</span><br /><span style=" color:#a52a2a;"> </span><br /><span style=" color:#000000;"> ^</span><br /><span style=" color:#8b0000;">SyntaxError:</span> invalid syntax<br /> </p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000080;">In [</span><span style=" font-weight:600; color:#000080;">8</span><span style=" color:#000080;">]:</span> %run "/var/www/python output/python/getting_links.py"</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#4682b4;"> File </span><span style=" color:#006400;">"/var/www/python output/python/getting_links.py"</span><span style=" color:#4682b4;">, line </span><span style=" color:#006400;">19</span><br /><span style=" color:#a52a2a;"> print print_all_links(get_page('http://xkcd.com/353'))</span><br /><span style=" color:#000000;"> <br />^</span><br /><span style=" color:#8b0000;">SyntaxError:</span> EOL while scanning string literal<br /> </p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000080;">In [</span><span style=" font-weight:600; color:#000080;">9</span><span style=" color:#000080;">]:</span> %run "/var/www/python output/python/getting_links.py"</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">/archive</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://what-if.xkcd.com</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://blag.xkcd.com</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://store.xkcd.com/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://blog.xkcd.com/2014/03/12/what-if-i-wrote-a-book/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">/1/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://c.xkcd.com/random/comic/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">/1/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://c.xkcd.com/random/comic/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">/rss.xml</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">/atom.xml</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://threewordphrase.com/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://oglaf.com/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://www.smbc-comics.com/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://www.qwantz.com</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://www.asofterworld.com</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://buttersafe.com/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://pbfcomics.com/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://questionablecontent.net/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://www.buttercupfestival.com/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">http://creativecommons.org/licenses/by-nc/2.5/</p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">None</p>
<p style="-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><br /></p>
<p style=" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#000080;">In [</span><span style=" font-weight:600; color:#000080;">10</span><span style=" color:#000080;">]:</span> </p></body></html>