##################################################
### Tail risk of infectious diseases: COVID-19 ###
##################################################

### Code prepared by Kirsi Manz and Ulrich Mansmann
### July 3 2020, IBE Munich 

# All analyses are performed for John Hopkins University data #

input_file <- "jhudata_2020_06-30.xls" 

# Needed packages
library(fExtremes)
library(readxl)
library(evir)
library(DescTools)
library(rmutil)

# John-Hopkins-University data is used and all deaths from 200 on are included
# for the analysis of heavy tails 

# Data cut-off
day <- "2020_06-30"

# Here you can decide on the minimum number of deaths to be inluded: 200, 500, 1000
no = 200
#no = 500
#no = 1000

# Read in the data set
jhudata <- read_excel(input_file)

# Cuts the data at the desired number of deaths to be analysed 
jhudata <- jhudata[which(jhudata$cumdeaths>no),]

# Ordered estimates from large to small
est <- jhudata$cumdeaths

# Prepare maximum to sum ratio plot
ms<- msratioPlot(est)

# Moments:
# p=1: mean
# p=2: variance
# p=3: skewness
# p=4: kurtosis

# Print the moments
mean(est)
(sd(est))^2
median(est)
skewness(est)
kurtosis(est)

# quantiles
quantile(est,  probs = c(0.5, 0.9, 0.99))

# order the death counts from small to large 
data <- est[order(est)]
table(data)

# Prepare the Zipf plot 
emd<-emdPlot(data)
# Fit linear model to the data
lm = lm(log(emd$y) ~ log(emd$x))
lm
lines(emd$x, exp(predict(lm, newdata=list(x=emd$x))) ,col="red")


# Prepare the mean excess plot
me <- meplot(data = data, omit=3)


### Lorenz curve and Gini index for John Hopkins data ###
#########################################################

# JHU data with all countries with >0 deaths on June 30 2020
alle <- read_excel(input_file)

# Calculates Gini index for deaths with bootstrapped confidence intervals
giniest<- Gini(alle$cumdeaths, conf.level = 0.95)
giniest

# Calculates Gini index for casess with bootstrapped confidence intervals
giniest_c<- Gini(alle$cumcases, conf.level = 0.95)
giniest_c

gint <- round(giniest[1], 2)
ginc<- round(giniest_c[1], 2)

# Lorenz curves
Lor <- Lc(alle$cumdeaths)
Lor_c <- Lc(alle$cumcases)


### Plot all graphs (downloadable from the IBE webpage): ###
pdf(paste0("All_Figures_",day,"_",no,".pdf"), width=7,height=5)
par(las=1, mar=c(5.1, 6.1, 4.1, 2.1))
plot(ms$X1, type="n", ylim=c(0,1), ann=F, main=NA)
mtext(expression(paste(frac("M"[n]^p,"S"[n]^p))), side=2, line=3)
mtext("Observations, n", side=1, line=3)
title("Maximum to sum plot")
lines(ms$X1, col="dodgerblue", lty=1)
lines(ms$X2, col="tan1", lty=1)
lines(ms$X3, col="limegreen", lty=1)
lines(ms$X4, col="red", lty=1)

hist(est/10^5, breaks = 140, col = "#69b3a2", main="Histogram of COVID-19 deaths", 
     xlab = "Casualties (x10^5)", ylab = "No. events")

par(las=1, mar=c(5.1, 6.1, 4.1, 2.1))
plot(emd$x, emd$y, bg="#69b3a2", pch=21, xlab="Casualties (log scale)",
     ylab="Empirical survival (log scale)", log="xy", main="Zipf plot")
lines(emd$x, exp(predict(lm, newdata=list(x=emd$x))) ,col="red")

par(las=1, mar=c(5.1, 6.1, 4.1, 2.1))
plot(me$x/10^4, me$y/10^4, bg="#69b3a2", pch=21, xlab="Threshold (x10^4)",
     ylab="Mean excess function (x10^4)", main="Mean excess plot")

par(las=0, mar=c(5.1, 6.1, 4.1, 2.1))
hill(est, option = "xi", col="dodgerblue", labels = FALSE)#,
mtext(expression(paste(xi, " with 95% confidence interval")), side=2, line=3)
mtext("Order statistics", side=1, line=3)
title("Hill plot", line=2.5)

plot(Lor_c, xlab="Proportion of countries", ylab="Proportion of cases/deaths", col="dodgerblue",
     main="Lorenz curve")
legend("topleft" ,col=c("dodgerblue","red"), lty=c(1,1), legend=c("Cases: Gini index = 0.86",
"Deaths: Gini index = 0.91"), border=NA, bty="n")
lines(Lor, col="red")
dev.off()

# This is how a Pareto distribtion could look like
#location parameter m, dispersion s: dpareto(y, m, s)
plot(dpareto(1:30, 10, 3))
s=3
m=10
y=1:30
# density function manually
pareto = s*(1 + y/(m*(s-1)))^(-s-1)/(m*(s-1))

plot(y, pareto)
