48 lines
1.4 KiB
Python
Executable file
48 lines
1.4 KiB
Python
Executable file
#!/usr/bin/env python2
|
|
|
|
from bs4 import BeautifulSoup
|
|
import requests
|
|
|
|
#### FIRST THREE YEARS 2010/2011/2012 ARE DIFFERENT WAYS TO RETRIEVE THAN 13-17
|
|
|
|
class Old_Stats:
|
|
'Retrieve the mens volleyball statistics data from years 2010, 2011, 2012 for MAMVIC NAIA Cardinal Stritch'
|
|
|
|
# return list of years of stat data
|
|
def years(self):
|
|
|
|
start_year = 2010
|
|
end_year = 2012
|
|
|
|
yearl = []
|
|
for year in range(start_year, end_year + 1):
|
|
yearl.append(year)
|
|
return yearl
|
|
|
|
def base_urls(self):
|
|
self.sport_url = '/teamstat.htm?path=mvball'
|
|
base_urll = []
|
|
for year in range(0, len(self.years())):
|
|
self.base_url = 'static.stritchwolves.com/custompages/MVB/' + str(self.years()[year])
|
|
base_urll.append(self.base_url + self.sport_url)
|
|
return base_urll
|
|
|
|
def get_box_score_links(self):
|
|
|
|
link_list = []
|
|
|
|
for url_step in range(0, len(self.base_urls())):
|
|
r = requests.get('http://' + self.base_urls()[url_step])
|
|
data = r.text
|
|
soup = BeautifulSoup(data, 'lxml')
|
|
for link in soup.find_all('a'):
|
|
link_list.append('http://' + self.base_urls()[url_step].replace(self.sport_url, '') + '/' + link.get('href'))
|
|
return link_list
|
|
|
|
def download_html_links(self):
|
|
for url_step in range(0, len(self.get_box_score_links()) + 1):
|
|
filename = 'box_score' + str(url_step) + '.htm'
|
|
response = requests.get(self.get_box_score_links()[url_step])
|
|
with open('./data/' + filename, "wb") as code:
|
|
code.write(response.content)
|
|
|