Download the script here


1 Set up

rm(list=ls())

library(readr)
library(tidyverse)

2 Loading the dataset

2.1 Explore the data (with dplyr)

# https://r4ds.had.co.nz/transform.html
#https://garthtarr.github.io/meatR/dplyr.html

# the dplyr counterpart to the str() function is the 'glimpse' function
glimpse(coronaNet)
## Rows: 2,249
## Columns: 38
## $ record_id          <chr> "1000775Ds", "1000775Dm", "1003875NA", "1004103NA"…
## $ policy_id          <dbl> 1342691, 1342691, 1003875, 1004103, 2575122, 10052…
## $ recorded_date      <dttm> 2020-04-04 15:43:15, 2020-04-04 15:43:15, 2020-04…
## $ date_updated       <date> 2020-04-04, 2020-04-04, 2020-04-15, 2020-05-15, 2…
## $ date_announced     <date> 2020-03-14, 2020-03-14, 2020-04-07, 2020-05-15, 2…
## $ date_start         <date> 2020-03-14, 2020-03-14, 2020-04-07, 2020-05-15, 2…
## $ date_end           <date> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2020-…
## $ entry_type         <chr> "update", "update", "new_entry", "new_entry", "upd…
## $ update_type        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ event_description  <chr> "Chile extends mandatory quarantine for people fro…
## $ domestic_policy    <dbl> 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ type               <chr> "Quarantine", "Quarantine", "Other Policy Not List…
## $ type_sub_cat       <chr> "Self-Quarantine (i.e. quarantine at home)", "Quar…
## $ type_text          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ index_high_est     <dbl> 52.03633, 52.03633, 56.17884, NA, 54.14313, 55.981…
## $ index_med_est      <dbl> 51.22552, 51.22552, 54.83055, NA, 53.08173, 54.901…
## $ index_low_est      <dbl> 50.20661, 50.20661, 53.59422, NA, 52.28079, 53.777…
## $ index_country_rank <dbl> 113, 113, 181, NA, 144, 182, 168, NA, NA, NA, 130,…
## $ correct_type       <chr> "original", "original", "original", "original", "o…
## $ country            <chr> "Chile", "Chile", "Guatemala", "Turkey", "Kenya", …
## $ init_country_level <chr> "National", "National", "National", "National", "N…
## $ province           <chr> NA, NA, NA, NA, NA, "Bern", NA, NA, NA, NA, "Sardi…
## $ city               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ source_corr_type   <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ target_country     <chr> "China,France,Germany,Iran,Italy,Japan,North Korea…
## $ target_geog_level  <chr> "One or more countries, but not all countries", "O…
## $ target_region      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ target_province    <chr> NA, NA, NA, NA, NA, "Bern", NA, NA, NA, NA, "Sardi…
## $ target_city        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ target_other       <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ target_who_what    <chr> "All Travelers (Citizen Travelers + Foreign Travel…
## $ target_direction   <chr> "Inbound", "Inbound", NA, NA, "Inbound", NA, NA, N…
## $ travel_mechanism   <chr> "All kinds of transport", "All kinds of transport"…
## $ compliance         <chr> "Mandatory with Fines", "Mandatory with Fines", "M…
## $ enforcer           <chr> "National Government,Ministry/Department of Health…
## $ link               <chr> "https://www.gob.cl/noticias/covid-19-ministerio-d…
## $ ISO_A3             <chr> "CHL", "CHL", "GTM", "TUR", "KEN", "CHE", "ITA", "…
## $ ISO_A2             <chr> "CL", "CL", "GT", "TR", "KE", "CH", "IT", "VN", "V…
# dplyr counterpart to the 'which' function is the 'filter' function
filter(coronaNet, type == "Closure and Regulation of Schools")
## # A tibble: 205 x 38
##    record_id policy_id recorded_date       date_updated date_announced
##    <chr>         <dbl> <dttm>              <date>       <date>        
##  1 1005610Dc   1005610 2020-05-29 07:51:28 2020-05-29   2020-05-07    
##  2 1005610Dd   1005610 2020-05-29 07:51:28 2020-05-29   2020-05-07    
##  3 1005610Dr   1005610 2020-05-29 07:51:28 2020-05-29   2020-05-07    
##  4 1033361Dd   1033361 2020-04-29 03:46:29 2020-04-29   2020-04-17    
##  5 1033361Dr   1033361 2020-04-29 03:46:29 2020-04-29   2020-04-17    
##  6 1043355Dc   1043355 2020-04-03 09:44:07 2020-04-03   2020-03-20    
##  7 1043355Dd   1043355 2020-04-03 09:44:07 2020-04-03   2020-03-20    
##  8 1043355Dr   1043355 2020-04-03 09:44:07 2020-04-03   2020-03-20    
##  9 1047400Dc   1047400 2020-04-03 00:29:03 2020-04-03   2020-03-18    
## 10 1047400Dd   1047400 2020-04-03 00:29:03 2020-04-03   2020-03-18    
## # … with 195 more rows, and 33 more variables: date_start <date>,
## #   date_end <date>, entry_type <chr>, update_type <chr>,
## #   event_description <chr>, domestic_policy <dbl>, type <chr>,
## #   type_sub_cat <chr>, type_text <dbl>, index_high_est <dbl>,
## #   index_med_est <dbl>, index_low_est <dbl>, index_country_rank <dbl>,
## #   correct_type <chr>, country <chr>, init_country_level <chr>,
## #   province <chr>, city <chr>, source_corr_type <lgl>, target_country <chr>,
## #   target_geog_level <chr>, target_region <chr>, target_province <chr>,
## #   target_city <chr>, target_other <lgl>, target_who_what <chr>,
## #   target_direction <chr>, travel_mechanism <chr>, compliance <chr>,
## #   enforcer <chr>, link <chr>, ISO_A3 <chr>, ISO_A2 <chr>
# Class question: what if you want to find all observations that fits the condition above but only if the initiating country (country) is Germany?

# Class question: what if you want to find all observations that fits the condition above AND the type of policy (type) is 'External Border Restrictions'?


# Class question: what if you want to find all observations that fits the condition above that are not NA for the end of policy field (date_end)



# the dplyr counterpart to the '[ ]' command to subset by rows is the slice function
slice(coronaNet, 1:10)
## # A tibble: 10 x 38
##    record_id policy_id recorded_date       date_updated date_announced
##    <chr>         <dbl> <dttm>              <date>       <date>        
##  1 1000775Ds   1342691 2020-04-04 15:43:15 2020-04-04   2020-03-14    
##  2 1000775Dm   1342691 2020-04-04 15:43:15 2020-04-04   2020-03-14    
##  3 1003875NA   1003875 2020-04-15 11:04:50 2020-04-15   2020-04-07    
##  4 1004103NA   1004103 2020-05-15 11:04:46 2020-05-15   2020-05-15    
##  5 1004681NA   2575122 2020-04-03 08:33:25 2020-04-03   2020-03-22    
##  6 100526NA     100526 2020-04-11 03:45:41 2020-04-11   2020-04-02    
##  7 4866080NA   1005278 2020-03-30 05:21:44 2020-03-30   2020-03-20    
##  8 1005610Dc   1005610 2020-05-29 07:51:28 2020-05-29   2020-05-07    
##  9 1005610Dd   1005610 2020-05-29 07:51:28 2020-05-29   2020-05-07    
## 10 1005610Dr   1005610 2020-05-29 07:51:28 2020-05-29   2020-05-07    
## # … with 33 more variables: date_start <date>, date_end <date>,
## #   entry_type <chr>, update_type <chr>, event_description <chr>,
## #   domestic_policy <dbl>, type <chr>, type_sub_cat <chr>, type_text <dbl>,
## #   index_high_est <dbl>, index_med_est <dbl>, index_low_est <dbl>,
## #   index_country_rank <dbl>, correct_type <chr>, country <chr>,
## #   init_country_level <chr>, province <chr>, city <chr>,
## #   source_corr_type <lgl>, target_country <chr>, target_geog_level <chr>,
## #   target_region <chr>, target_province <chr>, target_city <chr>,
## #   target_other <lgl>, target_who_what <chr>, target_direction <chr>,
## #   travel_mechanism <chr>, compliance <chr>, enforcer <chr>, link <chr>,
## #   ISO_A3 <chr>, ISO_A2 <chr>
# the dplyr counterpart to the 'sort' function is the arrange function

arrange(coronaNet, date_start)
## # A tibble: 2,249 x 38
##    record_id policy_id recorded_date       date_updated date_announced
##    <chr>         <dbl> <dttm>              <date>       <date>        
##  1 1074096NA   1074096 2020-04-14 06:04:49 2020-04-14   2019-12-31    
##  2 1747784NA   1747784 2020-04-05 10:20:16 2020-04-05   2020-03-11    
##  3 8098722NA   1428806 2020-04-02 11:26:23 2020-04-02   2020-01-02    
##  4 1518388Bn   1518388 2020-04-14 06:46:27 2020-04-14   2020-01-09    
##  5 1921058NA   1921058 2020-04-02 18:30:56 2020-04-02   2020-01-09    
##  6 1989552NA   1989552 2020-03-31 04:56:40 2020-03-31   2020-01-10    
##  7 1654091NA   1654091 2020-05-05 09:56:24 2020-05-05   2020-01-13    
##  8 1523205NA   1523205 2020-04-02 20:19:50 2020-04-02   2020-01-18    
##  9 1990707NA   1990707 2020-04-02 20:45:50 2020-04-02   2020-01-20    
## 10 1024919NA   1024919 2020-04-03 11:19:15 2020-04-03   2020-01-21    
## # … with 2,239 more rows, and 33 more variables: date_start <date>,
## #   date_end <date>, entry_type <chr>, update_type <chr>,
## #   event_description <chr>, domestic_policy <dbl>, type <chr>,
## #   type_sub_cat <chr>, type_text <dbl>, index_high_est <dbl>,
## #   index_med_est <dbl>, index_low_est <dbl>, index_country_rank <dbl>,
## #   correct_type <chr>, country <chr>, init_country_level <chr>,
## #   province <chr>, city <chr>, source_corr_type <lgl>, target_country <chr>,
## #   target_geog_level <chr>, target_region <chr>, target_province <chr>,
## #   target_city <chr>, target_other <lgl>, target_who_what <chr>,
## #   target_direction <chr>, travel_mechanism <chr>, compliance <chr>,
## #   enforcer <chr>, link <chr>, ISO_A3 <chr>, ISO_A2 <chr>
arrange(coronaNet, desc(date_start))
## # A tibble: 2,249 x 38
##    record_id policy_id recorded_date       date_updated date_announced
##    <chr>         <dbl> <dttm>              <date>       <date>        
##  1 1886438NA   6801359 2020-05-17 14:54:56 2020-05-17   2020-05-13    
##  2 1010884NA   1010884 2020-04-22 16:50:38 2020-04-22   2020-04-10    
##  3 1061496NA   1061496 2020-05-29 04:58:03 2020-05-29   2020-05-11    
##  4 1291080Eb   1291080 2020-05-28 08:49:15 2020-05-28   2020-05-22    
##  5 1401604Db   1401604 2020-05-09 09:12:24 2020-05-09   2020-04-24    
##  6 14907NA       14907 2020-05-22 07:00:55 2020-05-22   2020-05-22    
##  7 1556359Dc   1556359 2020-05-30 16:43:31 2020-05-30   2020-05-27    
##  8 1556359Dd   1556359 2020-05-30 16:43:31 2020-05-30   2020-05-27    
##  9 1556359Dr   1556359 2020-05-30 16:43:31 2020-05-30   2020-05-27    
## 10 1691517NA   1691517 2020-05-27 06:12:51 2020-05-27   2020-05-13    
## # … with 2,239 more rows, and 33 more variables: date_start <date>,
## #   date_end <date>, entry_type <chr>, update_type <chr>,
## #   event_description <chr>, domestic_policy <dbl>, type <chr>,
## #   type_sub_cat <chr>, type_text <dbl>, index_high_est <dbl>,
## #   index_med_est <dbl>, index_low_est <dbl>, index_country_rank <dbl>,
## #   correct_type <chr>, country <chr>, init_country_level <chr>,
## #   province <chr>, city <chr>, source_corr_type <lgl>, target_country <chr>,
## #   target_geog_level <chr>, target_region <chr>, target_province <chr>,
## #   target_city <chr>, target_other <lgl>, target_who_what <chr>,
## #   target_direction <chr>, travel_mechanism <chr>, compliance <chr>,
## #   enforcer <chr>, link <chr>, ISO_A3 <chr>, ISO_A2 <chr>
# Class question: How could you use arrange() to sort all missing values to the date_end variable? (Hint: use is.na()).



# the dplyr counterpart to the $ operator or the [,variableName] operator is 'select'

select(coronaNet, entry_type)
## # A tibble: 2,249 x 1
##    entry_type
##    <chr>     
##  1 update    
##  2 update    
##  3 new_entry 
##  4 new_entry 
##  5 update    
##  6 new_entry 
##  7 new_entry 
##  8 new_entry 
##  9 new_entry 
## 10 new_entry 
## # … with 2,239 more rows
select(coronaNet, entry_type, country)
## # A tibble: 2,249 x 2
##    entry_type country    
##    <chr>      <chr>      
##  1 update     Chile      
##  2 update     Chile      
##  3 new_entry  Guatemala  
##  4 new_entry  Turkey     
##  5 update     Kenya      
##  6 new_entry  Switzerland
##  7 new_entry  Italy      
##  8 new_entry  Vietnam    
##  9 new_entry  Vietnam    
## 10 new_entry  Vietnam    
## # … with 2,239 more rows
select(coronaNet, entry_type:country)
## # A tibble: 2,249 x 13
##    entry_type update_type event_descripti… domestic_policy type  type_sub_cat
##    <chr>      <chr>       <chr>                      <dbl> <chr> <chr>       
##  1 update     <NA>        "Chile extends …               0 Quar… Self-Quaran…
##  2 update     <NA>        "Chile extends …               0 Quar… Quarantine …
##  3 new_entry  <NA>        "On April 7th, …               1 Othe… <NA>        
##  4 new_entry  <NA>        "The total numb…               1 Othe… <NA>        
##  5 update     <NA>        "On 22nd March,…               0 Exte… <NA>        
##  6 new_entry  <NA>        "On April 2nd, …               1 Heal… <NA>        
##  7 new_entry  <NA>        "Italy is enfor…               1 Soci… <NA>        
##  8 new_entry  <NA>        "Schools have r…               1 Clos… Preschool o…
##  9 new_entry  <NA>        "Schools have r…               1 Clos… Primary Sch…
## 10 new_entry  <NA>        "Schools have r…               1 Clos… Secondary S…
## # … with 2,239 more rows, and 7 more variables: type_text <dbl>,
## #   index_high_est <dbl>, index_med_est <dbl>, index_low_est <dbl>,
## #   index_country_rank <dbl>, correct_type <chr>, country <chr>
select(coronaNet, -c(entry_type:country))
## # A tibble: 2,249 x 25
##    record_id policy_id recorded_date       date_updated date_announced
##    <chr>         <dbl> <dttm>              <date>       <date>        
##  1 1000775Ds   1342691 2020-04-04 15:43:15 2020-04-04   2020-03-14    
##  2 1000775Dm   1342691 2020-04-04 15:43:15 2020-04-04   2020-03-14    
##  3 1003875NA   1003875 2020-04-15 11:04:50 2020-04-15   2020-04-07    
##  4 1004103NA   1004103 2020-05-15 11:04:46 2020-05-15   2020-05-15    
##  5 1004681NA   2575122 2020-04-03 08:33:25 2020-04-03   2020-03-22    
##  6 100526NA     100526 2020-04-11 03:45:41 2020-04-11   2020-04-02    
##  7 4866080NA   1005278 2020-03-30 05:21:44 2020-03-30   2020-03-20    
##  8 1005610Dc   1005610 2020-05-29 07:51:28 2020-05-29   2020-05-07    
##  9 1005610Dd   1005610 2020-05-29 07:51:28 2020-05-29   2020-05-07    
## 10 1005610Dr   1005610 2020-05-29 07:51:28 2020-05-29   2020-05-07    
## # … with 2,239 more rows, and 20 more variables: date_start <date>,
## #   date_end <date>, init_country_level <chr>, province <chr>, city <chr>,
## #   source_corr_type <lgl>, target_country <chr>, target_geog_level <chr>,
## #   target_region <chr>, target_province <chr>, target_city <chr>,
## #   target_other <lgl>, target_who_what <chr>, target_direction <chr>,
## #   travel_mechanism <chr>, compliance <chr>, enforcer <chr>, link <chr>,
## #   ISO_A3 <chr>, ISO_A2 <chr>
select(coronaNet, contains('type'))
## # A tibble: 2,249 x 7
##    entry_type update_type type  type_sub_cat type_text correct_type
##    <chr>      <chr>       <chr> <chr>            <dbl> <chr>       
##  1 update     <NA>        Quar… Self-Quaran…        NA original    
##  2 update     <NA>        Quar… Quarantine …        NA original    
##  3 new_entry  <NA>        Othe… <NA>                NA original    
##  4 new_entry  <NA>        Othe… <NA>                NA original    
##  5 update     <NA>        Exte… <NA>                NA original    
##  6 new_entry  <NA>        Heal… <NA>                NA original    
##  7 new_entry  <NA>        Soci… <NA>                NA correction  
##  8 new_entry  <NA>        Clos… Preschool o…        NA original    
##  9 new_entry  <NA>        Clos… Primary Sch…        NA original    
## 10 new_entry  <NA>        Clos… Secondary S…        NA original    
## # … with 2,239 more rows, and 1 more variable: source_corr_type <lgl>
# Class question: what are some ways you can select record_id, policy_id, target_direction, ISO_A3, and ISO_A2?


# the dplyr counterpart to making a new variable is the 'mutate' function

mutate(coronaNet, newVar1 = 1, 
       newVar2 = ifelse(entry_type == 'new_entry', 1, 0)) %>%
  select(newVar1, newVar2)
## # A tibble: 2,249 x 2
##    newVar1 newVar2
##      <dbl>   <dbl>
##  1       1       0
##  2       1       0
##  3       1       1
##  4       1       1
##  5       1       0
##  6       1       1
##  7       1       1
##  8       1       1
##  9       1       1
## 10       1       1
## # … with 2,239 more rows
# if you only want to keep the new variables, use transmute()
transmute(coronaNet, newVar1 = 1, 
          newVar2 = ifelse(entry_type == 'new_entry', 1, 0))  
## # A tibble: 2,249 x 2
##    newVar1 newVar2
##      <dbl>   <dbl>
##  1       1       0
##  2       1       0
##  3       1       1
##  4       1       1
##  5       1       0
##  6       1       1
##  7       1       1
##  8       1       1
##  9       1       1
## 10       1       1
## # … with 2,239 more rows

3 Summarizing the Data

coronaNet %>% group_by(country) %>%
  summarise(count = n(),
            numberOfTypesOfPolicies = n_distinct(type),
            mostActive = median(date_start))
## # A tibble: 190 x 4
##    country             count numberOfTypesOfPolicies mostActive
##    <chr>               <int>                   <int> <date>    
##  1 Afghanistan            10                       4 2020-03-28
##  2 Albania                 1                       1 2020-03-14
##  3 Algeria                 2                       1 2020-03-15
##  4 Andorra                 4                       2 2020-03-16
##  5 Angola                 19                       7 2020-04-02
##  6 Antigua and Barbuda     6                       5 2020-03-28
##  7 Argentina               9                       7 2020-03-25
##  8 Armenia                14                       5 2020-04-08
##  9 Australia              22                      11 2020-03-24
## 10 Austria                15                       7 2020-03-19
## # … with 180 more rows
coronaNet %>% group_by(country) %>%
  mutate(correct_type = ifelse(correct_type == 'original', 0, 1)) %>% 
  summarise(count = n(),
            numberOfTypesOfPolicies = n_distinct(type),
            mostActive = median(date_start),
            percCorrected = mean(correct_type))
## # A tibble: 190 x 5
##    country             count numberOfTypesOfPolicies mostActive percCorrected
##    <chr>               <int>                   <int> <date>             <dbl>
##  1 Afghanistan            10                       4 2020-03-28             0
##  2 Albania                 1                       1 2020-03-14             0
##  3 Algeria                 2                       1 2020-03-15             0
##  4 Andorra                 4                       2 2020-03-16             0
##  5 Angola                 19                       7 2020-04-02             0
##  6 Antigua and Barbuda     6                       5 2020-03-28             0
##  7 Argentina               9                       7 2020-03-25             0
##  8 Armenia                14                       5 2020-04-08             0
##  9 Australia              22                      11 2020-03-24             0
## 10 Austria                15                       7 2020-03-19             0
## # … with 180 more rows
coronaNet %>% group_by(country, date_start) %>%
  summarise(count = n(),
            numberOfTypesOfPolicies = n_distinct(type))
## # A tibble: 1,281 x 4
## # Groups:   country [190]
##    country     date_start count numberOfTypesOfPolicies
##    <chr>       <date>     <int>                   <int>
##  1 Afghanistan 2020-02-12     1                       1
##  2 Afghanistan 2020-02-27     1                       1
##  3 Afghanistan 2020-03-22     2                       2
##  4 Afghanistan 2020-03-24     1                       1
##  5 Afghanistan 2020-04-01     1                       1
##  6 Afghanistan 2020-04-15     3                       1
##  7 Afghanistan 2020-04-29     1                       1
##  8 Albania     2020-03-14     1                       1
##  9 Algeria     2020-03-13     1                       1
## 10 Algeria     2020-03-18     1                       1
## # … with 1,271 more rows

4 Frequency Table

#install.packages("kableExtra")

library(kableExtra)

coronaNet %>% 
  filter(init_country_level == "Yes, it is at the province/state level",
         #entry_type=="new_entry",
         country=="Germany") %>%
  group_by(type) %>% 
  summarize(`Total Number of Policies`=n(),
            `Number of Countries`=length(unique(country)),
            `Number of Targeted Countries`=length(unique(target_country)),
            `% With Mandatory Enforcement`=round(mean(grepl(x=compliance,pattern="Mandatory")*100,na.rm=T),0)) %>% 
  dplyr::select(Type="type",everything()) %>% 
  filter(!is.na(Type)) %>% 
  arrange(desc(`Total Number of Policies`))  %>% 
  ungroup() %>%
  knitr::kable("html",booktabs=T,
               caption="Policies by Germany on state level (Bundesländer)") 
Policies by Germany on state level (Bundesländer)
Type Total Number of Policies Number of Countries Number of Targeted Countries % With Mandatory Enforcement
Restriction and Regulation of Businesses 22 1 1 100
Closure and Regulation of Schools 19 1 1 100
Health Resources 14 1 1 0
Restrictions of Mass Gatherings 6 1 1 100
Social Distancing 5 1 1 100
Health Testing 2 1 1 50
New Task Force, Bureau or Administrative Configuration 2 1 1 100
Quarantine 2 1 1 100
External Border Restrictions 1 1 1 100
Internal Border Restrictions 1 1 1 100
Other Policy Not Listed Above 1 1 1 0
Public Awareness Measures 1 1 1 100
Restriction and Regulation of Government Services 1 1 1 100

5 Density Plot for Policies

In this case, we want to have the density plot for the frequencies of policies.

#install.packages("lubridate")

library(lubridate)

d<- coronaNet%>% 
  filter(!is.na(type)) %>% 
  group_by(type,date_announced) %>% 
  summarize(Policies=length(unique(record_id))) %>% 
  arrange(type,date_announced) %>% 
  mutate(Policies=cumsum(Policies)) %>% 
  ungroup %>% 
  mutate( type=recode(type,
      
                    `External Border Restrictions`="External\nBorder\nRestrictions",
                      `Restriction of Non-Essential Businesses`="Restriction of\nNon-Essential\nBusinesses",
                      `Restrictions of Mass Gatherings`="Restrictions of\nMass Gatherings",
                      `Restriction of Non-Essential Government Services`="Restriction of\nNon-Essential\nGovernment Services",
                      `Internal Border Restrictions`="Internal\nBorder Restrictions",
                      `External Border Restrictions`="External\nBorder Restrictions",
                      `Public Awareness Campaigns`="Public\nAwareness Campaigns",
                      `New Task Force or Bureau`="New Task Force")) %>% 
  ggplot(aes(y=Policies,x=date_announced)) +
  geom_area() +
  theme_minimal() +
  theme(panel.grid = element_blank(),
        strip.background = element_blank()) +
  xlab("") +
  facet_wrap(~type)

d_all<-d + scale_x_date(date_labels = "%d/%m")
d_all

6 T-Test

# https://www.dummies.com/education/science/biology/how-to-use-student-t-tests-to-compare-averages/

# Doing a t-test in R
# https://uc-r.github.io/t_test

# Doing a t-test by hand
# http://onlinestatbook.com/2/tests_of_means/difference_means.html
# https://stattrek.com/hypothesis-test/difference-in-means.aspx

# Interpreting p-values
# https://blog.minitab.com/blog/adventures-in-statistics-2/how-to-correctly-interpret-p-values


# class question: what is the average gdpPPP


# lets see if there is a statistical difference in the number of business restrictions for countries over the average GDP PPP level 
# compared to countries under the average GDP PPP level
t.test(dataAgg %>% filter(gdpPPP > mean(dataAgg$gdpPPP, na.rm = TRUE)) %>% select(numBusinessRestrictions),
       dataAgg %>% filter(gdpPPP <= mean(dataAgg$gdpPPP, na.rm = TRUE))%>% select(numBusinessRestrictions))
## 
##  Welch Two Sample t-test
## 
## data:  dataAgg %>% filter(gdpPPP > mean(dataAgg$gdpPPP, na.rm = TRUE)) %>%  and dataAgg %>% filter(gdpPPP <= mean(dataAgg$gdpPPP, na.rm = TRUE)) %>%     select(numBusinessRestrictions) and     select(numBusinessRestrictions)
## t = 1.5584, df = 18.136, p-value = 0.1364
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.54962 17.22874
## sample estimates:
## mean of x mean of y 
## 10.210526  2.870968
dataAgg %>% filter(gdpPPP > mean(dataAgg$gdpPPP, na.rm = TRUE)) %>% summarise(avgNumBusinessRestrictions = mean(numBusinessRestrictions, na.rm = TRUE))  
## # A tibble: 1 x 1
##   avgNumBusinessRestrictions
##                        <dbl>
## 1                       10.2
dataAgg %>% filter(gdpPPP <= mean(dataAgg$gdpPPP, na.rm = TRUE)) %>% summarise(avgNumBusinessRestrictions = mean(numBusinessRestrictions, na.rm = TRUE))  
## # A tibble: 1 x 1
##   avgNumBusinessRestrictions
##                        <dbl>
## 1                       2.87