/* ============================================================================
Name: 			summary_stats.do
Description: 	Summary statistics with the merged dataset

Inputs: 		Full data with all outcomes
Outputs:		Summary statistics tables
============================================================================ */


* Set up environment ------------------------------------------------------------------------------
clear
set more off
version 16.0

*set scheme plotplainblind

*set processors 24
*set max_memory 115g

*Add in your file path with cap in front
cap cd "C:/Users/jcross/Dropbox/HFA and VAR"
cap cd "D:/Dropbox/HFA and VAR"
cap cd "C:\Users\uhrig_R\Dropbox\HFA and VAR"
cap cd "C:\Users\richa\Dropbox\HFA and VAR"
cap cd "C:\Users\camel\Dropbox\HFA and VAR"

* Raw data
global rawdata "1_data/0_raw"
global cleandata "1_data/1_clean"
global finaldata "1_data/2_final"

* Results
global graphs "4_results/Figures"
global regressions "4_results/Tables"

**************** Grab new data  *****************
	use "${finaldata}/final_treatment", clear

	drop if country == "korea"	
	drop if goal_diff == .
	
	* For balance table *
	*global tabOptions "cells("mean(fmt(2)) b(star fmt(2) )" "sd(par fmt(2)) se(par fmt(2))") label collabels(none)"
	
	gen region = "A"
	replace region = "B" if country == "denmark" |country == "dutch" |  ///
	country == "greece" | country == "portugal" | country == "russia" | country == "scotland" | country == "switzerland" | country == "turkey"
	replace region = "C" if country == "australia" | country == "brazil" | country == "japan" 
	
	gen numb_leagues = 5 if region == "A"
	replace numb_leagues = 8 if region == "B"
	replace numb_leagues = 3 if region == "C"
	
	
	* Treatment waves (18/19 or 19): Brazil, Dutch, France, La Liga, Turkey
	* Treatment waves (17/18 or 18): Australia, Bundesliga, Portugal, Serie A
	
	gen numb_treat_18 = 2 if region == "A"
	replace numb_treat_18 = 1 if region == "B"
	replace numb_treat_18 = 1 if region == "C"

	gen numb_treat_19 = 2 if region == "A"
	replace numb_treat_19 = 2 if region == "B"
	replace numb_treat_19 = 1 if region == "C"	
	
	
	global varList "goal_diff total_goals win draw"
	
	
	*************************** Balance Tables *********************************	
	
	***************************************************************************

	*Labels (they don't import from the reshape):
	 la var goal_diff "Home-Away Goals"
	 la var win "Pr(Home Win)"
	 la var draw "Pr(Draw)"
	 la var home_points "Home Points"
	 la var total_goals "Total Goals"

	 
	*1. BALANCE TABLE #2: Broken down by region with the match outcomes
	
	levelsof region, local(loop) // no need here
	local z =1
	*local loop "`"Europe(Top 5)"' `"Europe (Other)"' `"Asia/South America"'"
	
	foreach l of local loop {
		*Bundesliga
		eststo results_`z': estpost summarize $varList if region == "`l'"
			if "`l'" == "A" {
				estadd local never_treat = 1
				estadd local early_treat = 2 
				estadd local late_treat = 2
			}
			
			if "`l'" == "B" {
				estadd local never_treat = 5
				estadd local early_treat = 1
				estadd local late_treat = 2
			}
			
			if "`l'" == "C" {
				estadd local never_treat = 1
				estadd local early_treat = 1
				estadd local late_treat = 1
			}			
		di "`l'"
		local z = `z'+1
		
	}
	
	
	* Loop goes Asia -> Europe (other) -> Europe Top 5
			
	esttab results_1 results_2 results_3 using "$regressions/summary/summary_stats_region_match_outcomes.tex", replace ///
	mtitles("Europe (Top 5)" "Europe (Other)" "Asia/South America") collabels(none) noobs nonumbers ///
	cell(mean(fmt(%9.2f)) sd(par)) label ///
	stats(never_treat early_treat late_treat N, labels("Control Leagues" "Early Treated (2018) Leagues" "Late Treated (2019) Leagues" "Total Matches"))	

************************************* Second Tables ***************************	
	
**************** Grab new data  *****************
	use "${finaldata}/final_refs_merged", clear

	drop if country == "korea"	
	
	* For balance table *
	*global tabOptions "cells("mean(fmt(2)) b(star fmt(2) )" "sd(par fmt(2)) se(par fmt(2))") label collabels(none)"
	
	gen region = "A"
	replace region = "B" if country == "denmark" |country == "dutch" |  ///
	country == "greece" | country == "portugal" | country == "russia" | country == "scotland" | country == "switzerland" | country == "turkey"
	replace region = "C" if country == "australia" | country == "brazil" | country == "japan" 	
	
	
	gen numb_leagues = 5 if region == "A"
	replace numb_leagues = 4 if region == "B"
	replace numb_leagues = 1 if region == "C"
	
	
	* Treatment waves (18/19 or 19): Brazil, Dutch, France, La Liga, Turkey
	* Treatment waves (17/18 or 18): Australia, Bundesliga, Portugal, Serie A
	
	gen numb_treat_18 = 2 if region == "A"
	replace numb_treat_18 = 1 if region == "B"
	replace numb_treat_18 = 0 if region == "C" // only change is loss of Australia

	gen numb_treat_19 = 2 if region == "A"
	replace numb_treat_19 = 2 if region == "B"
	replace numb_treat_19 = 1 if region == "C"	
	
	* Generate Differences Outcomes *
	gen yellow_diff = yellow_home-yellow_away
	gen red_diff = red_home-red_away
	gen fouls_diff = fouls_home-fouls_away
	gen offside_diff = caughtoffside_home-caughtoffside_away	
	
	rename caughtoffside_overall offside_overall	

	
	global varList "yellow_diff red_diff fouls_diff offside_diff pk_pg_diff"
	
	
	*************************** Balance Tables *********************************	
	
	***************************************************************************

	*Labels (they don't import from the reshape):
	 la var yellow_diff "Home-Away Yellows per Game"
	 la var red_diff "Home-Away Reds per Game"
	 la var fouls_diff "Home-Away Fouls per Game"
	 la var offside_diff "Home-Away Offsides per Game"
	 la var pk_pg_diff "Home-Away PKs per Game"

	 
	*1. BALANCE TABLE #2: Broken down by region with the match outcomes
	
	levelsof region, local(loop) // no need here
	local z =1
	
	foreach l of local loop {
		di "`l'"
		*Bundesliga
		eststo results_`z': estpost summarize $varList if region == "`l'"
			if "`l'" == "A" {
				estadd local never_treat = 1
				estadd local early_treat = 2 
				estadd local late_treat = 2
			}
			
			if "`l'" == "B" {
				estadd local never_treat = 1
				estadd local early_treat = 1
				estadd local late_treat = 2
			}
			
			if "`l'" == "C" {
				estadd local never_treat = 0
				estadd local early_treat = 0
				estadd local late_treat = 1
			}			
		di "`l'"
		local z = `z'+1
		
	}
	
	* Loop goes Asia -> Europe (other) -> Europe Top 5
			
	esttab results_1 results_2 results_3 using "$regressions/summary/summary_stats_region_ref_outcomes.tex", replace ///
	mtitles("Europe (Top 5)" "Europe (Other)" "Asia/South America") collabels(none) noobs nonumbers ///
	cell(mean(fmt(%9.2f)) sd(par)) label ///
	stats(never_treat early_treat late_treat N, labels("Control Leagues" "Early Treated (2018) Leagues" "Late Treated (2019) Leagues" "Total Matches"))		
	 	