diff options
author | Mike Crute <mcrute@gmail.com> | 2015-07-28 21:17:56 -0700 |
---|---|---|
committer | Mike Crute <mcrute@gmail.com> | 2015-07-28 21:17:56 -0700 |
commit | fe1d5707cf5a7db7d41ac6ba129155adcb230d95 (patch) | |
tree | e2a90c2686d56ddf249953ffa7bbcb54e7b26cc1 | |
download | hv_snowreport-master.tar.bz2 hv_snowreport-master.tar.xz hv_snowreport-master.zip |
-rw-r--r-- | .rvmrc | 49 | ||||
-rw-r--r-- | Gemfile | 5 | ||||
-rw-r--r-- | Gemfile.lock | 31 | ||||
-rw-r--r-- | extract.rb | 158 |
4 files changed, 243 insertions, 0 deletions
@@ -0,0 +1,49 @@ | |||
1 | #!/usr/bin/env bash | ||
2 | |||
3 | # This is an RVM Project .rvmrc file, used to automatically load the ruby | ||
4 | # development environment upon cd'ing into the directory | ||
5 | |||
6 | # First we specify our desired <ruby>[@<gemset>], the @gemset name is optional. | ||
7 | environment_id="ruby-1.9.2-p136@hv_snowreport" | ||
8 | |||
9 | # | ||
10 | # First we attempt to load the desired environment directly from the environment | ||
11 | # file. This is very fast and efficicent compared to running through the entire | ||
12 | # CLI and selector. If you want feedback on which environment was used then | ||
13 | # insert the word 'use' after --create as this triggers verbose mode. | ||
14 | # | ||
15 | if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \ | ||
16 | && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]] ; then | ||
17 | \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id" | ||
18 | |||
19 | [[ -s "$rvm_path/hooks/after_use" ]] && . "$rvm_path/hooks/after_use" | ||
20 | else | ||
21 | # If the environment file has not yet been created, use the RVM CLI to select. | ||
22 | rvm --create "$environment_id" | ||
23 | fi | ||
24 | |||
25 | # | ||
26 | # If you use an RVM gemset file to install a list of gems (*.gems), you can have | ||
27 | # it be automatically loaded. Uncomment the following and adjust the filename if | ||
28 | # necessary. | ||
29 | # | ||
30 | # filename=".gems" | ||
31 | # if [[ -s "$filename" ]] ; then | ||
32 | # rvm gemset import "$filename" | grep -v already | grep -v listed | grep -v complete | sed '/^$/d' | ||
33 | # fi | ||
34 | |||
35 | # | ||
36 | # If you use bundler and would like to run bundle each time you enter the | ||
37 | # directory, you can uncomment the following code. | ||
38 | # | ||
39 | # # Ensure that Bundler is installed. Install it if it is not. | ||
40 | # if ! command -v bundle >/dev/null; then | ||
41 | # printf "The rubygem 'bundler' is not installed. Installing it now.\n" | ||
42 | # gem install bundler | ||
43 | # fi | ||
44 | # | ||
45 | # # Bundle while reducing excess noise. | ||
46 | # printf "Bundling your gems. This may take a few minutes on a fresh clone.\n" | ||
47 | # bundle | grep -v '^Using ' | grep -v ' is complete' | sed '/^$/d' | ||
48 | # | ||
49 | |||
@@ -0,0 +1,5 @@ | |||
1 | source :rubygems | ||
2 | |||
3 | gem "httparty" | ||
4 | gem "nokogiri" | ||
5 | gem 'ruby-debug19', :require => 'ruby-debug' | ||
diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..1e04b26 --- /dev/null +++ b/Gemfile.lock | |||
@@ -0,0 +1,31 @@ | |||
1 | GEM | ||
2 | remote: http://rubygems.org/ | ||
3 | specs: | ||
4 | archive-tar-minitar (0.5.2) | ||
5 | columnize (0.3.6) | ||
6 | httparty (0.8.1) | ||
7 | multi_json | ||
8 | multi_xml | ||
9 | linecache19 (0.5.12) | ||
10 | ruby_core_source (>= 0.1.4) | ||
11 | multi_json (1.0.4) | ||
12 | multi_xml (0.4.1) | ||
13 | nokogiri (1.5.0) | ||
14 | ruby-debug-base19 (0.11.25) | ||
15 | columnize (>= 0.3.1) | ||
16 | linecache19 (>= 0.5.11) | ||
17 | ruby_core_source (>= 0.1.4) | ||
18 | ruby-debug19 (0.11.6) | ||
19 | columnize (>= 0.3.1) | ||
20 | linecache19 (>= 0.5.11) | ||
21 | ruby-debug-base19 (>= 0.11.19) | ||
22 | ruby_core_source (0.1.5) | ||
23 | archive-tar-minitar (>= 0.5.2) | ||
24 | |||
25 | PLATFORMS | ||
26 | ruby | ||
27 | |||
28 | DEPENDENCIES | ||
29 | httparty | ||
30 | nokogiri | ||
31 | ruby-debug19 | ||
diff --git a/extract.rb b/extract.rb new file mode 100644 index 0000000..10daf66 --- /dev/null +++ b/extract.rb | |||
@@ -0,0 +1,158 @@ | |||
1 | require 'httparty' | ||
2 | require 'nokogiri' | ||
3 | require 'date' | ||
4 | |||
5 | class NokogiriParser < HTTParty::Parser | ||
6 | def html | ||
7 | Nokogiri::HTML(body) | ||
8 | end | ||
9 | end | ||
10 | |||
11 | class HVPage | ||
12 | include HTTParty | ||
13 | parser NokogiriParser | ||
14 | base_uri "http://www.holidayvalley.com" | ||
15 | end | ||
16 | |||
17 | page = HVPage.get("/HolidayValley/snowreport.aspx") | ||
18 | |||
19 | |||
20 | class InvalidData < Exception | ||
21 | end | ||
22 | |||
23 | module Transforms | ||
24 | def self.parse_range(value) | ||
25 | value.split("-").map(&:to_i) | ||
26 | end | ||
27 | |||
28 | def self.to_int(value) | ||
29 | value.to_i | ||
30 | end | ||
31 | |||
32 | def self.is_true(value) | ||
33 | value.downcase == "yes" | ||
34 | end | ||
35 | |||
36 | def self.parse_date(value) | ||
37 | Date.strptime(value, "%m/%d/%Y").strftime("%a, %d %b %Y %H:%M:%S %Z") | ||
38 | end | ||
39 | |||
40 | def self.parse_date_time(value) | ||
41 | DateTime.parse(value).strftime("%a, %d %b %Y %H:%M:%S %Z") | ||
42 | end | ||
43 | |||
44 | def self.parse_open(value) | ||
45 | value.downcase == "open" | ||
46 | end | ||
47 | |||
48 | def self.parse_groomed(value) | ||
49 | value.downcase == "groomed" | ||
50 | end | ||
51 | |||
52 | def self.parse_snowmaking(value) | ||
53 | value.downcase == "new" | ||
54 | end | ||
55 | |||
56 | def self.parse_difficulty(url) | ||
57 | if /green\.gif/ =~ url | ||
58 | "Easier" | ||
59 | elsif /blue\.gif/ =~ url | ||
60 | "Intermediate" | ||
61 | elsif /black\.gif/ =~ url | ||
62 | "Advanced" | ||
63 | elsif /doubleBlack\.gif/ =~ url | ||
64 | "Expert" | ||
65 | elsif /freestyle\.gif/ =~ url | ||
66 | "Freestyle" | ||
67 | end | ||
68 | end | ||
69 | |||
70 | def self.parse_lift_name(value) | ||
71 | data = /([^(]+)\(([^']*)'\)/.match(value) | ||
72 | [data[1].strip, data[2].to_i] | ||
73 | end | ||
74 | end | ||
75 | |||
76 | MTN_REPORT_XPATH = { | ||
77 | last_updated: ["//table[1]/tr[1]/td[2]/text()", :last, Transforms.method(:parse_date_time)], | ||
78 | report_for: ["//table[1]/tr[2]/td[2]/text()", 1, Transforms.method(:parse_date)], | ||
79 | snowfall_24hr: ["//table[1]/tr[3]/td[2]/text()", :first, Transforms.method(:to_int)], | ||
80 | snowfall_48hr: ["//table[1]/tr[4]/td[2]/text()", :first, Transforms.method(:to_int)], | ||
81 | snowfall_7day: ["//table[1]/tr[5]/td[2]/text()", :first, Transforms.method(:to_int)], | ||
82 | snowfall_season: ["//table[1]/tr[6]/td[2]/text()", :first, Transforms.method(:to_int)], | ||
83 | base_depth: ["//table[1]/tr[7]/td[2]/text()", :first, Transforms.method(:parse_range)], | ||
84 | snowmaking_current: ["//table[1]/tr[8]/td[2]/text()", :first, Transforms.method(:is_true)], | ||
85 | snowmaking_24hours: ["//table[1]/tr[9]/td[2]/text()", :first, Transforms.method(:is_true)], | ||
86 | primary_surface: ["//table[1]/tr[10]/td[2]/text()", :first, String.method(:new)], | ||
87 | secondary_surface: ["//table[1]/tr[11]/td[2]/text()", :first, String.method(:new)], | ||
88 | } | ||
89 | |||
90 | def get_transformed_value(page, xpath, finder, transform) | ||
91 | data = page.xpath(xpath) | ||
92 | data = finder.is_a?(Symbol) ? data.send(finder) : data[finder] | ||
93 | transform.call(data.text) | ||
94 | end | ||
95 | |||
96 | def get_trail_info(page, row) | ||
97 | data = page.xpath("//table[2]/tr[#{row}]/child::*").map(&:text) | ||
98 | difficulty_info = page.xpath("//table[2]/tr[#{row}]/td/img/@src").to_s | ||
99 | |||
100 | if data.size != 6 | ||
101 | raise InvalidData.new("Invalid data in row") | ||
102 | end | ||
103 | |||
104 | { | ||
105 | trail_name: data[0], | ||
106 | difficulty: Transforms.parse_difficulty(difficulty_info), | ||
107 | open_day: Transforms.parse_open(data[2]), | ||
108 | open_night: Transforms.parse_open(data[3]), | ||
109 | groomed: Transforms.parse_groomed(data[4]), | ||
110 | snow_making: Transforms.parse_snowmaking(data[5]), | ||
111 | } | ||
112 | end | ||
113 | |||
114 | def get_lift_info(page, row) | ||
115 | data = page.xpath("//table[3]/tr[#{row}]/child::*").map(&:text) | ||
116 | |||
117 | if data.size != 4 | ||
118 | raise InvalidData.new("Invalid data in row") | ||
119 | end | ||
120 | |||
121 | lift_name, vertical = Transforms.parse_lift_name(data[0]) | ||
122 | |||
123 | { | ||
124 | lift_name: lift_name, | ||
125 | vertical: vertical, | ||
126 | open_day: Transforms.parse_open(data[1]), | ||
127 | open_night: Transforms.parse_open(data[2]), | ||
128 | notes: data[3], | ||
129 | } | ||
130 | end | ||
131 | |||
132 | i = 3 | ||
133 | while true | ||
134 | begin | ||
135 | puts get_trail_info(page, i) | ||
136 | i += 1 | ||
137 | rescue InvalidData | ||
138 | break | ||
139 | end | ||
140 | end | ||
141 | |||
142 | k = {} | ||
143 | MTN_REPORT_XPATH.each do |key, value| | ||
144 | data = get_transformed_value(page, *value) | ||
145 | k[key] = data | ||
146 | end | ||
147 | |||
148 | puts k | ||
149 | |||
150 | i = 2 | ||
151 | while true | ||
152 | begin | ||
153 | puts get_lift_info(page, i) | ||
154 | i += 1 | ||
155 | rescue InvalidData | ||
156 | break | ||
157 | end | ||
158 | end | ||