/* ////////////////////////////////////////////////////////////////////////////////////////////////
Name: 			final_refs_data.do
Description: 	Switch the pks to match level average and then merge with cards, fouls, and offsides data

Notes: 			- Created by Jeff 
				- Last updated 10/26/2022
//////////////////////////////////////////////////////////////////////////////////////////////// */

* Set up environment ------------------------------------------------------------------------------
clear
set more off
version 16.0

*set scheme plotplainblind

*set processors 24
*set max_memory 115g

*Add in your file path with cap in front
cap cd "C:/Users/jcross/Dropbox/HFA and VAR"
cap cd "D:/Dropbox/HFA and VAR"
cap cd "C:\Users\uhrig_R\Dropbox\HFA and VAR"
cap cd "C:\Users\richa\Dropbox\HFA and VAR"
cap cd "C:\Users\camel\Dropbox\HFA and VAR"

* Raw data
global rawdata "1_data/0_raw"
global cleandata "1_data/1_clean"
global finaldata "1_data/2_final"

* Results
global graphs "4_results/Figures"
global regressions "4_results/Tables"

**************** Grab new data  *****************
	use "${cleandata}/cards_fouls_offsides", clear
	
	replace team_name = strtrim(team_name)
	replace team_name = stritrim(team_name)	
	
	tempfile cards
	save `cards', replace	

	use "${finaldata}/final_treatment", clear
	
	
	**drop Korea, who adopt VAR midseason in 2017
	drop if country == "korea"	
	drop if goal_diff == .
	
	bys home_team season: egen max_weeks_home = count(wk) //counts obs
	bys away_team season: egen max_weeks_away = count(wk) //counts obs
	bys country season: egen max_weeks = max(wk) // just grab max weeks (is this good enough?)
	
	tab max_weeks country 
	tab max_weeks_home country
	tab max_weeks_away country 
	
	* Yes, it is good enough except for Japan when it had two "stages" with 17 max (but really 34 per season)
	replace max_weeks = 34 if max_weeks == 17 & country == "japan"
	
	drop max_weeks_home max_weeks_away
	
	rename max_weeks games
	
	* Tried doing team name merge but its just sloppe
	
	collapse (max) games, by(country season)
	
	replace country = strtrim(country)
	replace country = stritrim(country)	
	
	tempfile matches
	save `matches', replace
	
	
	use "${cleandata}/pks_per_season", clear

	
	replace country = strtrim(country)
	replace country = stritrim(country)		
	
	merge m:1 country season using `matches'
	
	drop if _merge == 2
	bys country: egen ref = mean(games) // did mean just to sanity check that it is the same for all seasons
	tab ref // yup looks good
	tab _merge if ref != games // good good
	replace games = ref if _merge == 1
	drop _merge ref
	
	replace team_name = strtrim(team_name)
	replace team_name = stritrim(team_name)		
	
	replace team_name = "Icel Idman Yurdu" if team_name == "Yeni Mersin Idmanyurdu" // same team based on googling
	
	gen pk_pg_overall = pk_overall/games
	gen pk_pg_home = pk_home/(games/2) // note that for all of these leagues it is the normal even cases (checked)
	gen pk_pg_away = pk_away/(games/2)
	gen pk_pg_diff = pk_pg_home - pk_pg_away
	
	drop pk_overall pk_home pk_away pk_diff
	
	merge 1:1 team_name season using `cards'
	
	drop _merge
	
	save "${finaldata}/final_refs_merged", replace
	
	
	
	