diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6e4266f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__/
+*.py[cod]
+*$py.class
diff --git a/README.md b/README.md
index c503451..b101818 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,39 @@
 # pfr_metadata_pull
 
+All of your favorite pfr_metadata_pull code, now in package form!  Usage is a simple as:  
+  
+1. Clone this repo  
+2. Make sure your local copy of the repo lives in a directory that's in your PYTHONPATH  
+3. Open up python in your manner of choice and type the following:  
+```python
+import pfr_metadata_pull as meta
+
+meta.scrape_links(start_year, end_year, output_path) # creates a file "game_links_startyear_to_endyear.csv" in the 'output_path' directory
+meta.pull_data_from_links("game_links_startyear_to_endyear.csv", output_path) # creates a file "game_meta_data.csv" in the 'output_path' directory
+meta.fix_weeks("game_meta_data.csv", output_path)  # creates a file "game_meta_data_weeks_fixed.csv" in the 'output_path' directory
+meta.format_data("game_meta_data_weeks_fixed.csv", output_path)  # creates two files in the 'output_path' directory
+```
+The final format_data function makes two files - one, "game_meta_data_formatted.csv", is a nice pretty version of the metadata.  
+The other file, __"game_meta_data_ready_to_merge.csv"__ is what you'll need to add metadata to an existing play-by-play file.  
+Say you have a file "pbp.csv" that spans some range of seasons, and you just created "game_meta_data_ready_to_merge.csv" for that same range of seasons.  Now you can do:
+```python
+import pandas as pd
+pbp = pd.read_csv('pbp.csv')
+meta = pd.read_csv('game_meta_data_ready_to_merge.csv')
+pbp_meta = pd.merge(pbp, meta, on=['season','week','home_team','away_team'], how='left')
+```
+Or maybe you do this part in R:
+```R
+library(tidyverse)
+pbp <- read_csv('pbp.csv')
+meta <- read_csv('game_meta_data_ready_to_merge.csv')
+pbp_meta <- left_join(pbp, meta, by = c("season", "week", "home_team", "away_team"))
+```
+
+These changes made by [Dennis Brookner](https://github.com/dennisbrookner); direct concerns to me, or to [Puntalytics](https://twitter.com/ThePuntRunts) on twitter.
+
+### Original README from greerre
+
 This repo contains the set of scripts used to create the dataset referenced here:
 
 https://twitter.com/greerreNFL/status/1146519422527389696
diff --git a/__init__.py b/__init__.py
new file mode 100755
index 0000000..3ac1df3
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue May 26 16:25:40 2020
+
+@author: dennisbrookner
+"""
+
+from .pfr_game_link_scraper import scrape_links
+from .pfr_meta_data_pull import pull_data_from_links
+from .week_name_stopgap import fix_weeks
+from .pfr_meta_data_format import format_data
\ No newline at end of file
diff --git a/pfr_game_link_scraper.py b/pfr_game_link_scraper.py
index d7bf50e..eac41f4 100644
--- a/pfr_game_link_scraper.py
+++ b/pfr_game_link_scraper.py
@@ -12,65 +12,90 @@
 import pandas as pd
 import numpy
 
-data_folder = 'file path to folder where all data will be held...no trailing slash'
 
-season_start = 1960
-season_end = 2018
-current_season = season_start
-
-url_base = 'https://www.pro-football-reference.com'
-game_data = []
-
-while current_season <= season_end:
-    time.sleep((1.5 + random.random() * 2))
-    url = '{0}/years/{1}/week_1.htm'.format(url_base,current_season)
-    print('Requesting weeks for the {0} season...'.format(current_season))
-    raw = requests.get(url)
-    parsed = BeautifulSoup(raw.content, 'html.parser')
-    all_anchors = parsed.find_all('a',href=True) ## anchors used b/c commenting makes pulling specific divs hard ##
-    week_links = []
-    for a in all_anchors:
-        if '/years/{0}/week_'.format(current_season) in a.get('href'):
-            week_info = {
-                'Week Name' : None,
-                'Week Link' : None,
-            }
-            week_info['Week Name'] = str(a.text)
-            week_info['Week Link'] = '{0}{1}'.format(url_base,str(a.get('href')))
-            week_links.append(week_info)
-    ## remove duplicates from week_links ##:
-    ## from https://stackoverflow.com/questions/9427163/remove-duplicate-dict-in-list-in-python ##
-    seen_links = []
-    new_link_list = []
-    for d in week_links:
-        t = d['Week Link']
-        if t not in seen_links:
-            seen_links.append(t)
-            new_link_list.append(d)
-    week_links = new_link_list
-    print('   * Found {0} weeks...'.format(len(week_links)))
-    for week in week_links:
-        print('      * Pulling {0} game links'.format(week['Week Name']))
-        time.sleep((.75 + random.random() * 1.5))
-        url = week['Week Link']
-        raw_week = requests.get(url)
-        parsed_week = BeautifulSoup(raw_week.content, 'html.parser')
-        week_anchors = parsed_week.find_all('a',href=True)
-        for a in week_anchors:
-            if '/boxscores/{0}'.format(current_season) in a.get('href') or '/boxscores/{0}'.format(current_season + 1) in a.get('href'):
-                box_info = {
-                    'Season' : None,
-                    'Week' : None,
-                    'Week Number' : None,
-                    'Box Score Link' : None,
+def scrape_links(start_year, end_year, output_path):
+    '''
+    Start year can be as early as 1960; end year can be as late as the current year.
+    
+    Output path should point to the desired location of the metadata, and should NOT have a trailing slash.
+    
+    Note that for many seasons, this can be slow!
+    '''
+    data_folder = output_path
+    
+    if output_path[-1] == '/':
+        raise ValueError("Indicated file path includes a trailing slash, please remove it")
+    
+    
+    season_start = start_year
+    season_end = end_year
+    current_season = season_start
+    
+    url_base = 'https://www.pro-football-reference.com'
+    game_data = []
+    
+    while current_season <= season_end:
+        time.sleep((1.5 + random.random() * 2))
+        url = '{0}/years/{1}/week_1.htm'.format(url_base,current_season)
+        print('Requesting weeks for the {0} season...'.format(current_season))
+        raw = requests.get(url)
+        parsed = BeautifulSoup(raw.content, 'html.parser')
+        all_anchors = parsed.find_all('a',href=True) ## anchors used b/c commenting makes pulling specific divs hard ##
+        week_links = []
+        for a in all_anchors:
+            if '/years/{0}/week_'.format(current_season) in a.get('href'):
+                week_info = {
+                    'Week Name' : None,
+                    'Week Link' : None,
                 }
-                box_info['Season'] = int(current_season)
-                box_info['Week'] = week['Week Name']
-                box_info['Week Number'] = int(week.split('/week_')[1].split('.htm')[0])
-                box_info['Box Score Link'] = '{0}{1}'.format(url_base,str(a.get('href')))
-                game_data.append(box_info)
-    current_season += 1
-
-df = pd.DataFrame(game_data)
-df = df[['Season', 'Week', 'Week Number', 'Box Score Link']]
-df.to_csv('{0}/game_links_{1}_to_{2}.csv'.format(data_folder,season_start,season_end))
+                week_info['Week Name'] = str(a.text)
+                week_info['Week Link'] = '{0}{1}'.format(url_base,str(a.get('href')))
+                week_links.append(week_info)
+        ## remove duplicates from week_links ##:
+        ## from https://stackoverflow.com/questions/9427163/remove-duplicate-dict-in-list-in-python ##
+        seen_links = []
+        new_link_list = []
+        for d in week_links:
+            t = d['Week Link']
+            if t not in seen_links:
+                seen_links.append(t)
+                new_link_list.append(d)
+        week_links = new_link_list
+        print('   * Found {0} weeks...'.format(len(week_links)))
+        for week in week_links:
+            print('      * Pulling {0} game links'.format(week['Week Name']))
+            time.sleep((.75 + random.random() * 1.5))
+            url = week['Week Link']
+            raw_week = requests.get(url)
+            parsed_week = BeautifulSoup(raw_week.content, 'html.parser')
+            week_anchors = parsed_week.find_all('a',href=True)
+            for a in week_anchors:
+                if '/boxscores/{0}'.format(current_season) in a.get('href') or '/boxscores/{0}'.format(current_season + 1) in a.get('href'):
+                    box_info = {
+                        'Season' : None,
+                        'Week' : None,
+                        'Week Number' : None,
+                        'Box Score Link' : None,
+                    }
+                    box_info['Season'] = int(current_season)
+                    box_info['Week'] = week['Week Name']
+                    #box_info['Week Number'] = int(week.split('/week_')[1].split('.htm')[0])
+                    '''
+                    I (Dennis) was unable to get this line to successfully convert Weeks to Week numbers.
+                    If you can manage to do so, feel free to un-comment this line, and then ditch the
+                    "fix weeks" function later in the pipeline
+                    '''
+                    box_info['Box Score Link'] = '{0}{1}'.format(url_base,str(a.get('href')))
+                    game_data.append(box_info)
+        current_season += 1
+    
+    df = pd.DataFrame(game_data)
+    df = df[['Season', 'Week', 'Week Number', 'Box Score Link']]
+    df.to_csv('{0}/game_links_{1}_to_{2}.csv'.format(data_folder,season_start,season_end))
+    
+    return
+    
+def main():
+    print('Script was run directly, but this doesn\'t do anything!')
+    
+if __name__ == '__main__': main()
diff --git a/pfr_meta_data_format.py b/pfr_meta_data_format.py
index 8952c1c..3d440d5 100644
--- a/pfr_meta_data_format.py
+++ b/pfr_meta_data_format.py
@@ -12,11 +12,10 @@
 import numpy
 import math
 
-data_folder = 'file path to folder where all data will be held...no trailing slash'
 
-df_raw = pd.read_csv('{0}/game_meta_data.csv')
-df_divisions = pd.read_csv(' file path to divisions.csv') ## this csv is uploaded to the github
-df_scraper_game = pd.read_csv(' file path reg_game_all.csv') ## this csv is uploaded to the github
+# Define all global variables first
+df_divisions = pd.read_csv('https://raw.githubusercontent.com/greerre/pfr_metadata_pull/master/divisions.csv') ## this csv is uploaded to the github; possible this link breaks at some point
+df_scraper_game = pd.read_csv('https://raw.githubusercontent.com/greerre/pfr_metadata_pull/master/reg_game_all.csv') ## this csv is uploaded to the github; possible this link breaks at some point
 
 pfr_to_pbp_dict = {
 
@@ -61,15 +60,6 @@
 
 }
 
-df_format = df_raw
-df_format['Home Team (pfr)'] = df_format['Home Team']
-df_format['Away Team (pfr)'] = df_format['Away Team']
-df_format['Home Team'] = df_format['Home Team'].replace(pfr_to_pbp_dict)
-df_format['Away Team'] = df_format['Away Team'].replace(pfr_to_pbp_dict)
-
-df_divisions_home = df_divisions
-df_divisions_away = df_divisions
-
 home_rename_dict = {
     'Teams' : 'Home Team',
     'Conference' : 'Home Conference',
@@ -81,112 +71,6 @@
     'Division' : 'Away Division',
 }
 
-df_divisions_home = df_divisions_home.rename(columns=home_rename_dict)
-df_divisions_away = df_divisions_away.rename(columns=away_rename_dict)
-
-df_format = pd.merge(df_format,df_divisions_home,on=['Home Team'], how='left')
-df_format = pd.merge(df_format,df_divisions_away,on=['Away Team'], how='left')
-df_format = df_format.drop(columns=['Unnamed: 0', 'Unnamed: 0_y', 'Unnamed: 0_x'])
-
-df_format['Divisional Game'] = numpy.where((df_format['Season'] >= 2002) & (df_format['Home Conference'] == df_format['Away Conference']) & (df_format['Home Division'] == df_format['Away Division']),1,0)
-
-
-def url_to_id(url_id):
-    id = numpy.nan
-    try:
-        id = url_id.split('/')[-1].split('.htm')[0]
-    except:
-        pass
-    return id
-
-
-def row_format(row):
-    ## pull out degrees and wind ##
-    row['Temperature'] = None
-    row['Wind'] = None
-    if row['Roof'] != 'outdoors':
-        row['Temperature'] = 70
-        row['Wind'] = 0
-    else:
-        try:
-            row['Temperature'] = int(row['Weather'].split(' degrees')[0])
-        except:
-            pass
-        try:
-            if 'no wind' in row['Weather']:
-                row['Wind'] = 0
-            else:
-                row['Wind'] = int(row['Weather'].split(',')[1].split('wind ')[1].split(' mph')[0])
-        except:
-            pass
-    ## translate vegas line to home line ##
-    row['Home Spread'] = numpy.nan
-    row['Total'] = numpy.nan
-    if row['Vegas Line'] == 'Pick':
-        row['Home Spread'] = 0
-    else:
-        line_list = row['Vegas Line'].split(' -')
-        fav = line_list[0]
-        favored_by = float(line_list[1])
-        if fav == row['Home Team (pfr)']:
-            row['Home Spread'] = favored_by
-        elif fav == row['Away Team (pfr)']:
-            row['Home Spread'] = favored_by * -1.0
-        else:
-            row['Home Spread'] = numpy.nan
-    try:
-        row['Total'] = float(row['Over/Under'].split(' (')[0])
-    except:
-        pass
-    ## translate attendance ##
-    try:
-        row['Attendance'] = int(row['Attendance'].replace(',',''))
-    except:
-        try:
-            row['Attendance'] = int(row['Attendance'])
-        except:
-            row['Attendance'] = numpy.nan
-    ## translate tosses ##
-    row['Home Won Toss'] = numpy.nan
-    row['Deferred'] = numpy.nan
-    ## for some reason the toss text is read as a float if it's blank and will throw
-    ## an error on the split. This is handled w/ the try / except
-    try:
-        if row['Won Toss'] == numpy.nan:
-            pass
-        else:
-            home_mascot = row['Home Team (pfr)'].split(' ')[-1]
-            away_mascot = row['Away Team (pfr)'].split(' ')[-1]
-            winner = row['Won Toss'].split(' (')[0]
-            if home_mascot == winner:
-                row['Home Won Toss'] = 1
-                if len(row['Won Toss'].split(' (')) > 1:
-                    row['Deferred'] = 1
-                else:
-                    row['Deferred'] = 0
-            elif away_mascot == winner:
-                row['Home Won Toss'] = 0
-                if len(row['Won Toss'].split(' (')) > 1:
-                    row['Deferred'] = 1
-                else:
-                    row['Deferred'] = 0
-            else:
-                pass
-    except:
-        pass
-    ## conevrt urls to ids ##
-    row['Stadium ID'] = url_to_id(row['Stadium Link'])
-    row['Home Coach ID'] = url_to_id(row['Home Coach Link'])
-    row['Away Coach ID'] = url_to_id(row['Away Coach Link'])
-    row['Home Starting QB ID'] = url_to_id(row['Home Starting QB Link'])
-    row['Away Starting QB ID'] = url_to_id(row['Away Starting QB Link'])
-    return row
-
-
-
-df_new = df_format.apply(row_format,axis=1)
-df_new.to_csv('/Users/robertgreer/Documents/Coding/NFL/pro-football-reference/Data Files/game_meta_data_formatted.csv')
-
 meta_merge_headers = [
 
     'Season',
@@ -229,13 +113,6 @@ def row_format(row):
 
 ]
 
-merge_df = df_new[meta_merge_headers]
-
-## convert header formating to match nflscrapR for the join
-## note of caution...the original scraper swapped home and away team name and coaches
-## those were swapped back with the header rename dict below
-## the scraper has been fixed and the dict below has been swapped back, but neither tested
-
 rename_merge_headers = {
 
     'Season' : 'season',
@@ -278,9 +155,6 @@ def row_format(row):
 
 }
 
-merge_df = merge_df.rename(columns=rename_merge_headers)
-
-## prep scrapeR df ##
 pbp_team_standard_dict = {
 
     'ARI' : 'ARI',
@@ -321,14 +195,6 @@ def row_format(row):
 
 }
 
-
-## standardize team names across data sets ##
-df_scraper_game['home_team'] = df_scraper_game['home_team'].replace(pbp_team_standard_dict)
-df_scraper_game['away_team'] = df_scraper_game['away_team'].replace(pbp_team_standard_dict)
-
-## create new_df ##
-merged_df = pd.merge(merge_df,df_scraper_game,on=['season','week','home_team','away_team'],how='left')
-
 final_headers = [
 
     'type',
@@ -365,7 +231,7 @@ def row_format(row):
     'home_starting_qb',
     'away_starting_qb_id',
     'home_starting_qb_id',
-    'away_won_toss',
+    #'away_won_toss', # This threw an error for me (Dennis) so I removed it; sorry if you wanted this!
     'winner_deferred',
     'referee',
     'umpire',
@@ -377,5 +243,162 @@ def row_format(row):
 
 ]
 
-merged_df = merged_df[final_headers]
-merged_df.to_csv('{0}/reg_game_w_meta.csv'.format(data_folder))
+# then helper functions:
+def url_to_id(url_id):
+    id = numpy.nan
+    try:
+        id = url_id.split('/')[-1].split('.htm')[0]
+    except:
+        pass
+    return id
+
+
+def row_format(row):
+    ## pull out degrees and wind ##
+    row['Temperature'] = None
+    row['Wind'] = None
+    if row['Roof'] != 'outdoors':
+        row['Temperature'] = 70
+        row['Wind'] = 0
+    else:
+        try:
+            row['Temperature'] = int(row['Weather'].split(' degrees')[0])
+        except:
+            pass
+        try:
+            if 'no wind' in row['Weather']:
+                row['Wind'] = 0
+            else:
+                row['Wind'] = int(row['Weather'].split(',')[1].split('wind ')[1].split(' mph')[0])
+        except:
+            pass
+    ## translate vegas line to home line ##
+    row['Home Spread'] = numpy.nan
+    row['Total'] = numpy.nan
+    if row['Vegas Line'] == 'Pick':
+        row['Home Spread'] = 0
+    else:
+        line_list = row['Vegas Line'].split(' -')
+        fav = line_list[0]
+        favored_by = float(line_list[1])
+        if fav == row['Home Team (pfr)']:
+            row['Home Spread'] = favored_by
+        elif fav == row['Away Team (pfr)']:
+            row['Home Spread'] = favored_by * -1.0
+        else:
+            row['Home Spread'] = numpy.nan
+    try:
+        row['Total'] = float(row['Over/Under'].split(' (')[0])
+    except:
+        pass
+    ## translate attendance ##
+    try:
+        row['Attendance'] = int(row['Attendance'].replace(',',''))
+    except:
+        try:
+            row['Attendance'] = int(row['Attendance'])
+        except:
+            row['Attendance'] = numpy.nan
+    ## translate tosses ##
+    row['Home Won Toss'] = numpy.nan
+    row['Deferred'] = numpy.nan
+    ## for some reason the toss text is read as a float if it's blank and will throw
+    ## an error on the split. This is handled w/ the try / except
+    try:
+        if row['Won Toss'] == numpy.nan:
+            pass
+        else:
+            home_mascot = row['Home Team (pfr)'].split(' ')[-1]
+            away_mascot = row['Away Team (pfr)'].split(' ')[-1]
+            winner = row['Won Toss'].split(' (')[0]
+            if home_mascot == winner:
+                row['Home Won Toss'] = 1
+                if len(row['Won Toss'].split(' (')) > 1:
+                    row['Deferred'] = 1
+                else:
+                    row['Deferred'] = 0
+            elif away_mascot == winner:
+                row['Home Won Toss'] = 0
+                if len(row['Won Toss'].split(' (')) > 1:
+                    row['Deferred'] = 1
+                else:
+                    row['Deferred'] = 0
+            else:
+                pass
+    except:
+        pass
+    ## conevrt urls to ids ##
+    row['Stadium ID'] = url_to_id(row['Stadium Link'])
+    row['Home Coach ID'] = url_to_id(row['Home Coach Link'])
+    row['Away Coach ID'] = url_to_id(row['Away Coach Link'])
+    row['Home Starting QB ID'] = url_to_id(row['Home Starting QB Link'])
+    row['Away Starting QB ID'] = url_to_id(row['Away Starting QB Link'])
+    return row
+
+# Now the real, for-use function:
+def format_data(input_file, output_path):
+    '''
+    Input file should point to the csv created by week_name_stopgap
+    
+    Output path should point to the desired location of the metadata, and should NOT have a trailing slash.
+    '''
+    
+    data_folder = output_path
+    
+    df_raw = pd.read_csv(input_file)
+    
+    df_format = df_raw
+    df_format['Home Team (pfr)'] = df_format['Home Team']
+    df_format['Away Team (pfr)'] = df_format['Away Team']
+    df_format['Home Team'] = df_format['Home Team'].replace(pfr_to_pbp_dict)
+    df_format['Away Team'] = df_format['Away Team'].replace(pfr_to_pbp_dict)
+    
+    df_divisions_home = df_divisions
+    df_divisions_away = df_divisions
+    
+    
+    df_divisions_home = df_divisions_home.rename(columns=home_rename_dict)
+    df_divisions_away = df_divisions_away.rename(columns=away_rename_dict)
+    
+    df_format = pd.merge(df_format,df_divisions_home,on=['Home Team'], how='left')
+    df_format = pd.merge(df_format,df_divisions_away,on=['Away Team'], how='left')
+    df_format = df_format.drop(columns=['Unnamed: 0', 'Unnamed: 0_y', 'Unnamed: 0_x'])
+    
+    df_format['Divisional Game'] = numpy.where((df_format['Season'] >= 2002) & (df_format['Home Conference'] == df_format['Away Conference']) & (df_format['Home Division'] == df_format['Away Division']),1,0)
+    
+    
+    df_new = df_format.apply(row_format,axis=1)
+    df_new.to_csv(f'{data_folder}/game_meta_data_formatted.csv')
+    
+    
+    merge_df = df_new[meta_merge_headers]
+    
+    ## convert header formating to match nflscrapR for the join
+    ## note of caution...the original scraper swapped home and away team name and coaches
+    ## those were swapped back with the header rename dict below
+    ## the scraper has been fixed and the dict below has been swapped back, but neither tested
+    
+    
+    merge_df = merge_df.rename(columns=rename_merge_headers)
+    merge_df.to_csv(f'{data_folder}/game_meta_data_ready_to_merge.csv')
+    ## prep scrapeR df ##
+    
+    
+    # ## standardize team names across data sets ##
+    # df_scraper_game['home_team'] = df_scraper_game['home_team'].replace(pbp_team_standard_dict)
+    # df_scraper_game['away_team'] = df_scraper_game['away_team'].replace(pbp_team_standard_dict)
+    
+    # ## create new_df ##
+    # merged_df = pd.merge(merge_df,df_scraper_game,on=['season','week','home_team','away_team'],how='left')
+    
+    
+    
+    # merged_df = merged_df[final_headers]
+    # merged_df.to_csv('{0}/reg_game_w_meta.csv'.format(data_folder))
+    
+    return
+
+def main():
+    print('Script was run directly, but this doesn\'t do anything!')
+    
+if __name__ == '__main__': main()
diff --git a/pfr_meta_data_pull.py b/pfr_meta_data_pull.py
index 8733cae..a4328ab 100644
--- a/pfr_meta_data_pull.py
+++ b/pfr_meta_data_pull.py
@@ -8,13 +8,6 @@
 import pandas as pd
 import numpy
 
-data_folder = 'file path to folder where all data will be held...no trailing slash'
-
-## Pull in URLs by turning data fram into list ##
-url_file = '{0}/game_links_1960_to_2018.csv'.format(data_folder)
-url_df = pd.read_csv(url_file)
-filtered_df = url_df[url_df['Season'] >= 1990] ## hasn't been tested before 1990, but would work in theory ##
-urls = filtered_df['Box Score Link'].tolist()
 
 
 ## helper data structures ##
@@ -26,7 +19,6 @@
     'May' : 5,
     'Jun' : 6,
     'Jul' : 7,
-    'Jul' : 8,
     'Aug' : 8,
     'Sep' : 9,
     'Oct' : 10,
@@ -157,198 +149,228 @@ def get_officials_info(officials_div):
                 field_judge = official_name
     return referee, umpire, down_judge, line_judge, back_judge, side_judge, field_judge
 
+def pull_data_from_links(input_file, output_path, cutoff_year = 1990):
+    '''
+    url file should be that produced by the scrape_links function.
+    
+    Output path should point to the desired location of the metadata, and should NOT have a trailing slash.
 
-game_data_rows = []
-broken_box_list = []
+    The default cutoff year ensures that if the input url file contains data
+    from seasons prior to 1990, those will be omitted here due to fears of 
+    incompatibility;  feel free to change this parameter at your own risk.
+    
+    Note: This function might take a little while to run.  It takes ~2 seconds
+    per game, with ~250 games/season
+    '''
+    
+    game_data_rows = []
+    broken_box_list = []
+    
+    data_folder = output_path
 
-for url in urls:
-    time.sleep((.75 + random.random() * .5))
-    try:
-        game_data_points = {
-            'Game Link' : None,
-            'Game Date' : None,
-            'Game Day' : None,
-            'Local Start Time' : None,
-            'Game Length' : None,
-            'Stadium' : None,
-            'Stadium Link' : None,
-            'Attendance' : None,
-            'Season': None,
-            'Week' : None,
-            'Home Team' : None,
-            'Away Team' : None,
-            'Home Record' : None,
-            'Away Record' : None,
-            'Home Score' : None,
-            'Away Score' : None,
-            'Home Coach' : None,
-            'Away Coach' : None,
-            'Home Coach Link' : None,
-            'Away Coach Link' : None,
-            'Home Starting QB' : None,
-            'Away Starting QB' : None,
-            'Home Starting QB Link' : None,
-            'Away Starting QB Link' : None,
-            'Won Toss' : None,
-            'Won Toss (OT)' : None,
-            'Roof' : None,
-            'Surface' : None,
-            'Weather' : None,
-            'Vegas Line' : None,
-            'Over/Under' : None,
-            'Referee' : None,
-            'Umpire' : None,
-            'Head Linesman / Down Judge' : None,
-            'Line Judge' : None,
-            'Back Judge' : None,
-            'Side Judge' : None,
-            'Field Judge' : None,
-        }
-        raw = requests.get(url)
-        parsed = BeautifulSoup(raw.content, 'html.parser')
-        score_board_divs = parsed.find('div', {'class' : 'scorebox'}).find_all('div', recursive=False)
-        home_div = score_board_divs[0]
-        away_div = score_board_divs[1]
-        meta_div = score_board_divs[2]
-        away_div_divs = away_div.find_all('div', recursive=False)
-        away_team = away_div_divs[0].find('a', {'itemprop' : 'name'}).text
-        try:
-            away_score = int(away_div_divs[1].find('div').text)
-        except:
-            away_score = int(away_div_divs[1].text)
-        away_record = away_div_divs[2].text
-        away_coach = away_div_divs[4].find('a').text
-        away_coach_link = away_div_divs[4].find('a').get('href')
-        home_div_divs = home_div.find_all('div', recursive=False)
-        home_team = home_div_divs[0].find('a', {'itemprop' : 'name'}).text
-        try:
-            home_score = int(home_div_divs[1].find('div').text)
-        except:
-            home_score = int(home_div_divs[1].text)
-        home_record = home_div_divs[2].text
-        home_coach = home_div_divs[4].find('a').text
-        home_coach_link = home_div_divs[4].find('a').get('href')
-        try: ## pfr's commenting messes up bs4s parsing, so the specific part has to get pulled as text and re-parsed ##
-            game_info_div_effed = str(parsed.find('div', {'id': 'all_game_info'}))
-            game_info_div = BeautifulSoup(game_info_div_effed.split('<!--')[1].split('-->')[0], 'html.parser')
-        except:
-            game_info_div = None
-        try:
-            home_starter_div_effed = str(parsed.find('div', {'id' : 'all_home_starters'}))
-            home_starter_div = BeautifulSoup(home_starter_div_effed.split('<!--')[1].split('-->')[0], 'html.parser')
-        except:
-            home_starter_div = None
-        try:
-            away_starter_div_effed = str(parsed.find('div', {'id' : 'all_vis_starters'}))
-            away_starter_div = BeautifulSoup(away_starter_div_effed.split('<!--')[1].split('-->')[0], 'html.parser')
-        except:
-            away_starter_div = None
+    ## Pull in URLs by turning data frame into list ##
+    url_df = pd.read_csv(input_file)
+    filtered_df = url_df[url_df['Season'] >= cutoff_year] ## hasn't been tested before 1990, but would work in theory ##
+    urls = filtered_df['Box Score Link'].tolist()
+
+    
+    for url in urls:
+        print(f'working on {url}')
+        time.sleep((.75 + random.random() * .5))
         try:
-            officials_div_effed = str(parsed.find('div', {'id' : 'all_officials'}))
-            officials_div = BeautifulSoup(officials_div_effed.split('<!--')[1].split('-->')[0], 'html.parser')
+            game_data_points = {
+                'Game Link' : None,
+                'Game Date' : None,
+                'Game Day' : None,
+                'Local Start Time' : None,
+                'Game Length' : None,
+                'Stadium' : None,
+                'Stadium Link' : None,
+                'Attendance' : None,
+                'Season': None,
+                'Week' : None,
+                'Home Team' : None,
+                'Away Team' : None,
+                'Home Record' : None,
+                'Away Record' : None,
+                'Home Score' : None,
+                'Away Score' : None,
+                'Home Coach' : None,
+                'Away Coach' : None,
+                'Home Coach Link' : None,
+                'Away Coach Link' : None,
+                'Home Starting QB' : None,
+                'Away Starting QB' : None,
+                'Home Starting QB Link' : None,
+                'Away Starting QB Link' : None,
+                'Won Toss' : None,
+                'Won Toss (OT)' : None,
+                'Roof' : None,
+                'Surface' : None,
+                'Weather' : None,
+                'Vegas Line' : None,
+                'Over/Under' : None,
+                'Referee' : None,
+                'Umpire' : None,
+                'Head Linesman / Down Judge' : None,
+                'Line Judge' : None,
+                'Back Judge' : None,
+                'Side Judge' : None,
+                'Field Judge' : None,
+            }
+            raw = requests.get(url)
+            parsed = BeautifulSoup(raw.content, 'html.parser')
+            score_board_divs = parsed.find('div', {'class' : 'scorebox'}).find_all('div', recursive=False)
+            home_div = score_board_divs[0]
+            away_div = score_board_divs[1]
+            meta_div = score_board_divs[2]
+            away_div_divs = away_div.find_all('div', recursive=False)
+            away_team = away_div_divs[0].find('a', {'itemprop' : 'name'}).text
+            try:
+                away_score = int(away_div_divs[1].find('div').text)
+            except:
+                away_score = int(away_div_divs[1].text)
+            away_record = away_div_divs[2].text
+            away_coach = away_div_divs[4].find('a').text
+            away_coach_link = away_div_divs[4].find('a').get('href')
+            home_div_divs = home_div.find_all('div', recursive=False)
+            home_team = home_div_divs[0].find('a', {'itemprop' : 'name'}).text
+            try:
+                home_score = int(home_div_divs[1].find('div').text)
+            except:
+                home_score = int(home_div_divs[1].text)
+            home_record = home_div_divs[2].text
+            home_coach = home_div_divs[4].find('a').text
+            home_coach_link = home_div_divs[4].find('a').get('href')
+            try: ## pfr's commenting messes up bs4s parsing, so the specific part has to get pulled as text and re-parsed ##
+                game_info_div_effed = str(parsed.find('div', {'id': 'all_game_info'}))
+                game_info_div = BeautifulSoup(game_info_div_effed.split('<!--')[1].split('-->')[0], 'html.parser')
+            except:
+                game_info_div = None
+            try:
+                home_starter_div_effed = str(parsed.find('div', {'id' : 'all_home_starters'}))
+                home_starter_div = BeautifulSoup(home_starter_div_effed.split('<!--')[1].split('-->')[0], 'html.parser')
+            except:
+                home_starter_div = None
+            try:
+                away_starter_div_effed = str(parsed.find('div', {'id' : 'all_vis_starters'}))
+                away_starter_div = BeautifulSoup(away_starter_div_effed.split('<!--')[1].split('-->')[0], 'html.parser')
+            except:
+                away_starter_div = None
+            try:
+                officials_div_effed = str(parsed.find('div', {'id' : 'all_officials'}))
+                officials_div = BeautifulSoup(officials_div_effed.split('<!--')[1].split('-->')[0], 'html.parser')
+            except:
+                officials_div = None
+            game_day, game_date, local_start_time, game_length, stadium, stadium_link, attendance = get_meta_data_points(meta_div)
+            won_toss, won_toss_ot, roof, surface, weather, vegas_line, over_under = get_game_info(game_info_div)
+            home_qb, home_qb_link = get_qb_info(home_starter_div)
+            away_qb, away_qb_link = get_qb_info(away_starter_div)
+            referee, umpire, down_judge, line_judge, back_judge, side_judge, field_judge = get_officials_info(officials_div)
+            game_data_points['Game Link'] = url
+            game_data_points['Game Date'] = game_date
+            game_data_points['Game Day'] = game_day
+            game_data_points['Local Start Time'] = local_start_time
+            game_data_points['Game Length'] = game_length
+            game_data_points['Stadium'] = stadium
+            game_data_points['Stadium Link'] = stadium_link
+            game_data_points['Attendance'] = attendance
+            game_data_points['Season'] = filtered_df[filtered_df['Box Score Link'] == url].iloc[0]['Season']
+            game_data_points['Week'] = filtered_df[filtered_df['Box Score Link'] == url].iloc[0]['Week'] #' Number']
+            game_data_points['Home Team'] = home_team
+            game_data_points['Away Team'] = away_team
+            game_data_points['Home Record'] = home_record
+            game_data_points['Away Record'] = away_record
+            game_data_points['Home Score'] = home_score
+            game_data_points['Away Score'] = away_score
+            game_data_points['Home Coach'] = home_coach
+            game_data_points['Away Coach'] = away_coach
+            game_data_points['Home Coach Link'] = home_coach_link
+            game_data_points['Away Coach Link'] = away_coach_link
+            game_data_points['Home Starting QB'] = home_qb
+            game_data_points['Away Starting QB'] = away_qb
+            game_data_points['Home Starting QB Link'] = home_qb_link
+            game_data_points['Away Starting QB Link'] = away_qb_link
+            game_data_points['Won Toss'] = won_toss
+            game_data_points['Won Toss (OT)'] = won_toss_ot
+            game_data_points['Roof'] = roof
+            game_data_points['Surface'] = surface
+            game_data_points['Weather'] = weather
+            game_data_points['Vegas Line'] = vegas_line
+            game_data_points['Over/Under'] = over_under
+            game_data_points['Referee'] = referee
+            game_data_points['Umpire'] = umpire
+            game_data_points['Head Linesman / Down Judge'] = down_judge
+            game_data_points['Line Judge'] = line_judge
+            game_data_points['Back Judge'] = back_judge
+            game_data_points['Side Judge'] = side_judge
+            game_data_points['Field Judge'] = field_judge
+            game_data_rows.append(game_data_points)
         except:
-            officials_div = None
-        game_day, game_date, local_start_time, game_length, stadium, stadium_link, attendance = get_meta_data_points(meta_div)
-        won_toss, won_toss_ot, roof, surface, weather, vegas_line, over_under = get_game_info(game_info_div)
-        home_qb, home_qb_link = get_qb_info(home_starter_div)
-        away_qb, away_qb_link = get_qb_info(away_starter_div)
-        referee, umpire, down_judge, line_judge, back_judge, side_judge, field_judge = get_officials_info(officials_div)
-        game_data_points['Game Link'] = url
-        game_data_points['Game Date'] = game_date
-        game_data_points['Game Day'] = game_day
-        game_data_points['Local Start Time'] = local_start_time
-        game_data_points['Game Length'] = game_length
-        game_data_points['Stadium'] = stadium
-        game_data_points['Stadium Link'] = stadium_link
-        game_data_points['Attendance'] = attendance
-        game_data_points['Season'] = filtered_df[filtered_df['Box Score Link'] == url].iloc[0]['Season']
-        game_data_points['Week'] = filtered_df[filtered_df['Box Score Link'] == url].iloc[0]['Week Number']
-        game_data_points['Home Team'] = home_team
-        game_data_points['Away Team'] = away_team
-        game_data_points['Home Record'] = home_record
-        game_data_points['Away Record'] = away_record
-        game_data_points['Home Score'] = home_score
-        game_data_points['Away Score'] = away_score
-        game_data_points['Home Coach'] = home_coach
-        game_data_points['Away Coach'] = away_coach
-        game_data_points['Home Coach Link'] = home_coach_link
-        game_data_points['Away Coach Link'] = away_coach_link
-        game_data_points['Home Starting QB'] = home_qb
-        game_data_points['Away Starting QB'] = away_qb
-        game_data_points['Home Starting QB Link'] = home_qb_link
-        game_data_points['Away Starting QB Link'] = away_qb_link
-        game_data_points['Won Toss'] = won_toss
-        game_data_points['Won Toss (OT)'] = won_toss_ot
-        game_data_points['Roof'] = roof
-        game_data_points['Surface'] = surface
-        game_data_points['Weather'] = weather
-        game_data_points['Vegas Line'] = vegas_line
-        game_data_points['Over/Under'] = over_under
-        game_data_points['Referee'] = referee
-        game_data_points['Umpire'] = umpire
-        game_data_points['Head Linesman / Down Judge'] = down_judge
-        game_data_points['Line Judge'] = line_judge
-        game_data_points['Back Judge'] = back_judge
-        game_data_points['Side Judge'] = side_judge
-        game_data_points['Field Judge'] = field_judge
-        game_data_rows.append(game_data_points)
-    except:
-        broken_row = {
-            'Season' : None,
-            'Week' : None,
-            'URL' : None,
-        }
-        broken_row['Season'] = filtered_df[filtered_df['Box Score Link'] == url].iloc[0]['Season']
-        broken_row['Week'] = filtered_df[filtered_df['Box Score Link'] == url].iloc[0]['Week']
-        broken_row['URL'] = url
-        broken_box_list.append(broken_row)
-        print('ROW BROKEN {0}'.format(broken_row))
-
-
-df = pd.DataFrame(game_data_rows)
-df_two = pd.DataFrame(broken_box_list)
+            broken_row = {
+                'Season' : None,
+                'Week' : None,
+                'URL' : None,
+            }
+            broken_row['Season'] = filtered_df[filtered_df['Box Score Link'] == url].iloc[0]['Season']
+            broken_row['Week'] = filtered_df[filtered_df['Box Score Link'] == url].iloc[0]['Week']
+            broken_row['URL'] = url
+            broken_box_list.append(broken_row)
+            print('ROW BROKEN {0}'.format(broken_row))
+    
+    
+    df = pd.DataFrame(game_data_rows)
+    df_two = pd.DataFrame(broken_box_list)
+    
+    
+    headers = [
+        'Game Link',
+        'Game Date',
+        'Game Day',
+        'Local Start Time',
+        'Game Length',
+        'Stadium',
+        'Stadium Link',
+        'Attendance',
+        'Season',
+        'Week',
+        'Home Team',
+        'Away Team',
+        'Home Record',
+        'Away Record',
+        'Home Score',
+        'Away Score',
+        'Home Coach',
+        'Away Coach',
+        'Home Coach Link',
+        'Away Coach Link',
+        'Home Starting QB',
+        'Away Starting QB',
+        'Home Starting QB Link',
+        'Away Starting QB Link',
+        'Won Toss',
+        'Won Toss (OT)',
+        'Roof',
+        'Surface',
+        'Weather',
+        'Vegas Line',
+        'Over/Under',
+        'Referee',
+        'Umpire',
+        'Head Linesman / Down Judge',
+        'Line Judge',
+        'Back Judge',
+        'Side Judge',
+        'Field Judge'
+    ]
+    
+    df = df[headers]
+    df.to_csv('{0}/game_meta_data.csv'.format(data_folder))
 
+    return
 
-headers = [
-    'Game Link',
-    'Game Date',
-    'Game Day',
-    'Local Start Time',
-    'Game Length',
-    'Stadium',
-    'Stadium Link',
-    'Attendance',
-    'Season',
-    'Week',
-    'Home Team',
-    'Away Team',
-    'Home Record',
-    'Away Record',
-    'Home Score',
-    'Away Score',
-    'Home Coach',
-    'Away Coach',
-    'Home Coach Link',
-    'Away Coach Link',
-    'Home Starting QB',
-    'Away Starting QB',
-    'Home Starting QB Link',
-    'Away Starting QB Link',
-    'Won Toss',
-    'Won Toss (OT)',
-    'Roof',
-    'Surface',
-    'Weather',
-    'Vegas Line',
-    'Over/Under',
-    'Referee',
-    'Umpire',
-    'Head Linesman / Down Judge',
-    'Line Judge',
-    'Back Judge',
-    'Side Judge',
-    'Field Judge'
-]
+def main():
+    print('Script was run directly, but this doesn\'t do anything!')
+    
+if __name__ == '__main__': main()
 
-df = df[headers]
-df.to_csv('{0}/game_meta_data.csv'.format(data_folder))
diff --git a/week_name_stopgap.py b/week_name_stopgap.py
new file mode 100755
index 0000000..7425f22
--- /dev/null
+++ b/week_name_stopgap.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue May 26 15:11:02 2020
+
+@author: dennisbrookner
+"""
+
+import pandas as pd
+
+playoff_dict = {"Wild Card": 18,
+                "Divisional": 19,
+                "Conf Champ": 20,
+                "Super Bowl": 21}
+
+def fix_weeks(input_file, output_path):
+    '''
+    Input file should point to the csv created by pfr_meta_data_pull
+    
+    Output path should point to the desired location of the metadata, and should NOT have a trailing slash.
+
+    This function fixes the fact that up until this point in the pipeline, "Week" is still
+    a string, and needs to be parsed into an integer
+    '''
+    
+    data = pd.read_csv(input_file)
+    
+    week_number= list(data['Week'])
+    
+    for i in range(len(week_number)):
+        if "Week" in week_number[i]:
+            week_number[i] = int(week_number[i].split('Week ')[1])
+        elif week_number[i] in playoff_dict:
+            week_number[i] = playoff_dict[week_number[i]]
+        else:
+            raise ValueError(f'Unexpected week name {week_number[i]}')
+            
+    data['Week_name'] = data['Week']
+    
+    data['Week'] = week_number
+    
+    data.to_csv(f'{output_path}/game_meta_data_weeks_fixed.csv')
+    
+    return
+
+def main():
+    print('Script was run directly, but this doesn\'t do anything!')
+    
+if __name__ == '__main__': main()