-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse.rb
36 lines (28 loc) · 843 Bytes
/
parse.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
require 'open-uri'
require 'nokogiri'
require 'csv'
dates = (Date.parse('2014-10-08')..Date.parse('2015-06-15')).to_a.map { |date| date.strftime('%Y/%-m/%-d') }
row_number = 0
dates.each do |date|
url = "http://www.hockey-reference.com/boxscores/#{date}"
source = open(url).read
nokogiri = Nokogiri::HTML(source)
rows = nokogiri.css("#stats tbody tr")
if rows.size == 0
next
end
csv_string = CSV.generate do |csv|
rows.each do |row|
csv << [
(row_number += 1),
"http://www.hockey-reference.com" + row.css("td:first-child a").attr("href"),
row.css("td:first-child a").text,
row.css("td:nth-child(2) a").text,
row.css("td:nth-child(4) a").text,
row.css("td:nth-child(3)").text,
row.css("td:nth-child(5)").text
]
end
end
puts csv_string
end