/* ////////////////////////////////////////////////////////////////////////////////////////////////
Name: 			preliminary.do
Description: 	Preliminary analysis

Notes: 			- Created by Richard 
				- Last updated 8/24/2022
//////////////////////////////////////////////////////////////////////////////////////////////// */

* Set up environment ------------------------------------------------------------------------------
clear
set more off
version 16.0

* ssc install blindschemes
set scheme plotplainblind

*set processors 24
*set max_memory 115g

*Add in your file path with cap in front
cap cd "C:/Users/jcross/Dropbox/HFA and VAR"
cap cd "D:/Dropbox/HFA and VAR"
cap cd "C:\Users\uhrig_R\Dropbox\HFA and VAR"
cap cd "C:\Users\richa\Dropbox\HFA and VAR"
cap cd "C:\Users\camel\Dropbox\HFA and VAR"

* Raw data
global rawdata "1_data/0_raw"
global cleandata "1_data/1_clean"
global finaldata "1_data/2_final"

* Results
global graphs "4_results/Figures"
global regressions "4_results/Tables"

**************** Grab new data  *****************
	use "${finaldata}/final_treatment", clear
	

	gen figure_label = "No VAR"
	replace figure_label = "VAR" if VAR == 1
	
	hist goal_diff, by(figure_label, note("")) bcolor(blue) w(1) start(-10.5) frac
	graph export "${graphs}/goal_diff_hist.png", replace
	
	sum goal_diff if VAR == 0
	local no = `r(mean)'
	sum goal_diff if VAR == 1
	local yes = `r(mean)'
	
	twoway (hist goal_diff if VAR == 0, xline(`no' `yes') bcolor(blue%50) w(1) start(-10.5) frac) ///
	(hist goal_diff if VAR == 1, bcolor(red%50) w(1) start(-10.5) frac ///
	legend(label(1 "No VAR") label(2 "VAR")))
	graph export "${graphs}/goal_diff_hist_2.png", replace
	
	
	*************************** Kolmogorov - Smirnov test, equality of distributions ************
	
	
	* ksmirnov goal_diff, by(VAR) exact
	
	* If you run the previous line, it takes a lot of time (~8 mins) until the results show up:
*Two-sample Kolmogorov–Smirnov test for equality of distribution functions
*Smaller group             D     p-value      Exact
* -------------------------------------------------
* 0                    0.0063       0.756
* 1                   -0.0056       0.797
* Combined K-S         0.0063       0.999          .

* We fail to reject the null hypothesis of equality of distributions

* Maybe valuable: 
* kdensity goal_diff if VAR == 1, plot(kdensity goal_diff if VAR == 0) legend(label(1 "VAR") label(2 "No VAR") rows(1)) 


	
**************** Comparing to our old estimates *************************
	gen comparison = 0 if _n == 1
	replace comparison = 0.2 if _n == 2
	replace comparison = 0.4 if _n == 3
	
	encode country, gen(country_num)
	
	* Country and season fixed effects to make the figures *
	
	reghdfe goal_diff VAR, absorb(i.country_num i.season) cluster(country)
	* This is very brute force but i'm lazy *
	gen beta = 0.029 if _n == 1
	gen lci = beta-(1.96*.03994) if _n == 1
	gen uci = beta+(1.96*.03994) if _n == 1
	
	reghdfe goal_diff VAR if country == "serie_a" | country == "bundesliga" | country == "premier_league" | country == "la_liga", absorb(i.country_num i.season) cluster(country)
	
	* second set which is the same data as our paper *
	replace beta = 0.000075 if _n == 2
	replace lci = beta-(1.96*0.06) if _n == 2
	replace uci = beta+(1.96*0.06) if _n == 2
	
	* this is from our Table 4, column 2 (week by league FEs)
	replace beta = -0.233 if _n == 3
	replace lci = beta - (1.96*0.103) if _n == 3
	replace uci = beta + (1.96*0.103) if _n == 3
	
	twoway (scatter beta comparison) (rcap uci lci comparison, vertical xtitle("") ytitle("Estimated Impact on HFA") ///
	xlabel(-0.05 " " 0 "VAR (Full)" 0.2 "VAR (Big 4)" 0.4 `" "No Fans (Big 4)"  "(Cross and Uhrig 2023)" "' 0.5 " ") legend(label(1 "Point Estimate") label(2 "Confidence Interval")))
	graph export "${graphs}/comparison_var_hfa.png", replace	
	
	
************** Event study Figure *****************************

	gen time = season
	replace time = 10 if season == 910
	replace time = 11 if season == 1011
	replace time = 12 if season == 1112
	replace time = 13 if season == 1213
	replace time = 14 if season == 1314
	replace time = 15 if season == 1415
	replace time = 16 if season == 1516
	replace time = 17 if season == 1617
	replace time = 18 if season == 1718
	replace time = 19 if season == 1819
	
	sort country season 
	gen first_year = time if country == country[_n-1] & VAR != VAR[_n-1]
	
	bys country: egen reference = max(first_year)
	
	gen t = time - reference // empty for control. Negative in the pre-period and positive in the post-period

	
local loop "goal_diff goal_diff_xg win draw home_points total_goals total_xgoals"

foreach l of local loop {	

			if "`l'" == "goal_diff" {
				local dep = "Home Goals minus Away Goals"
			}
			if "`l'" == "goal_diff_xg" {
				local dep = "Home xGoals minus Away xGoals"
			}
			if "`l'" == "win" {
				local dep = "Pr(Win)"
			}
			if "`l'" == "draw" {
				local dep = "Pr(Draw)"
			}		
			if "`l'" == "home_points" {
				local dep = "Home Team Points per Game"
			}	
			if "`l'" == "total_goals" {
				local dep = "Goals per Game"
			}	
			if "`l'" == "total_xgoals" {
				local dep = "xGoals per Game"
			}			
	  eventdd `l', timevar(t) ci("twoway_rcap") level(95) ///
		cluster(country) method(hdfe, absorb(i.country_num i.season)) ///
		graph_op(scheme(sj) xtitle("Seasons from VAR") title("`dep'") graphregion(color(white)))
	graph export "${graphs}/event_study_`l'.png", replace	
	
	  eventdd `l', timevar(t) ci("twoway_rcap") level(95) ///
		cluster(country) method(hdfe, absorb(i.country_num i.season_last2)) ///
		graph_op(scheme(sj) xtitle("Seasons from VAR") title("`dep'") graphregion(color(white)))
	graph export "${graphs}/event_study_`l'_last2.png", replace		
	
}	
		
	

	
	
	