From e6fc19ca0d406b1d55bd90a828504f351adecdaf Mon Sep 17 00:00:00 2001 From: Brendan McDevitt Date: Wed, 2 Aug 2017 16:48:43 -0400 Subject: [PATCH] first commit yay --- .gitignore | 2 ++ README.md | 2 ++ pull_box_score.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100755 pull_box_score.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a295864 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +__pycache__ diff --git a/README.md b/README.md new file mode 100644 index 0000000..89c4487 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# NAIA Mens Volleyball Stats +I played collegiate volleyball about 7-8 years ago now. I am writing this program to make sure there are offline archived copies of mens volleyball stats for Cardinal Stritch University. Releasing with GPL in case anyone wants to modify for their own team. diff --git a/pull_box_score.py b/pull_box_score.py new file mode 100755 index 0000000..5c72af0 --- /dev/null +++ b/pull_box_score.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python + +from bs4 import BeautifulSoup +import requests + +#### FIRST THREE YEARS 2010/2011/2012 ARE DIFFERENT WAYS TO RETRIEVE THAN 13-17 + + + +class Old_Stats: + 'Retrieve the mens volleyball statistics data from years 2010, 2011, 2012 for MAMVIC NAIA Cardinal Stritch' + +# return list of years of stat data + def years(self): + + start_year = 2010 + end_year = 2012 + + yearl = [] + for year in range(start_year, end_year + 1): + yearl.append(year) + return yearl + + def base_urls(self): + self.sport_url = '/teamstat.htm?path=mvball' + base_urll = [] + for year in range(0, len(self.years())): + self.base_url = 'static.stritchwolves.com/custompages/MVB/' + str(self.years()[year]) + base_urll.append(self.base_url + self.sport_url) + return base_urll + + def get_box_score_links(self): + + link_list = [] + + for url_step in range(0, len(self.base_urls())): + r = requests.get('http://' + self.base_urls()[url_step]) + data = r.text + soup = BeautifulSoup(data, 'lxml') + for link in soup.find_all('a'): + link_list.append('http://' + self.base_urls()[url_step].replace(self.sport_url, '') + '/' + link.get('href')) + return link_list + +old_stats = Old_Stats() + +print old_stats.get_box_score_links()