stritch_wolves_college_stats/pull_box_score.py

48 lines
1.4 KiB
Python
Executable file

#!/usr/bin/env python2
from bs4 import BeautifulSoup
import requests
#### FIRST THREE YEARS 2010/2011/2012 ARE DIFFERENT WAYS TO RETRIEVE THAN 13-17
class Old_Stats:
'Retrieve the mens volleyball statistics data from years 2010, 2011, 2012 for MAMVIC NAIA Cardinal Stritch'
# return list of years of stat data
def years(self):
start_year = 2010
end_year = 2012
yearl = []
for year in range(start_year, end_year + 1):
yearl.append(year)
return yearl
def base_urls(self):
self.sport_url = '/teamstat.htm?path=mvball'
base_urll = []
for year in range(0, len(self.years())):
self.base_url = 'static.stritchwolves.com/custompages/MVB/' + str(self.years()[year])
base_urll.append(self.base_url + self.sport_url)
return base_urll
def get_box_score_links(self):
link_list = []
for url_step in range(0, len(self.base_urls())):
r = requests.get('http://' + self.base_urls()[url_step])
data = r.text
soup = BeautifulSoup(data, 'lxml')
for link in soup.find_all('a'):
link_list.append('http://' + self.base_urls()[url_step].replace(self.sport_url, '') + '/' + link.get('href'))
return link_list
def download_html_links(self):
for url_step in range(0, len(self.get_box_score_links()) + 1):
filename = 'box_score' + str(url_step) + '.htm'
response = requests.get(self.get_box_score_links()[url_step])
with open('./data/' + filename, "wb") as code:
code.write(response.content)