Task A
From the data
folder on Github, get the data sets in the list below. Load them in
R giving the respective names: qualtrics_data
,
data_f
, data_g
, data_h
. Inspect
them using head()
or glimpse()
. Finally, save
them to your local data
directory (that you should have as
a sub-directory in your R course directory) as csv
files.
Task B
Go to this website: https://www.britishelectionstudy.com/data-objects/cross-sectional-data/ (you can register for free).
Download the 2017 Face-to-face Post-election Survey Version 1.5
SPSS file in your local data
directory
(see above). Then, load it in R assigning it to the name
british_cross_sectional_data
using the appropriate function
for SPSS files and inspect it using head()
or
glimpse()
.
library('tidyverse')
library('haven')
qualtrics_data = read_dta("https://github.com/laurafontanesi/r-seminar22/blob/main/data/20180321_qualtrics_managers_historical_social_comparisons.dta?raw=true")
head(qualtrics_data, 2)
## # A tibble: 2 x 58
## cur_emp yrs_workforce yrs_curjob num_emp_work num_emp_man for_profit yrs_for_profit non_profit yrs_non_profit
## <chr> <dbl> <dbl> <chr> <chr> <chr> <dbl> <chr> <dbl>
## 1 I work in th… 17 15 500-999 50-99 Yes 6 No NA
## 2 I work in th… 21.2 21.2 50-99 9-May Yes 4 No NA
## # … with 49 more variables: public <chr>, yrs_public <dbl>, eng <dbl>, eng_i_u <dbl>, eng_i_l <dbl>, eng_d_u <dbl>,
## # eng_d_l <dbl>, eng_pct_pass <dbl>, eng_more_useful_comp <chr>, eng_why_select_comp <chr>, eng_hist <chr>,
## # eng_soc <chr>, age <dbl>, educ <chr>, prof_degree <chr>, prof_degree_other <chr>, gender <chr>,
## # gender_other <dbl>, race <chr>, race_other <chr>, hispanic <chr>, math <dbl>, math_i <dbl>, math_d <dbl>,
## # math_u <dbl>, math_l <dbl>, math_pct_pass <dbl>, math_more_useful_comp <chr>, math_why_select_comp <chr>,
## # math_hist <chr>, math_soc <chr>, gc <dbl>, sector <chr>, employment <dbl+lbl>, pub_sec <dbl+lbl>,
## # female <dbl+lbl>, education <dbl+lbl>, eng_outcome_group <dbl+lbl>, eng_outcome <dbl>, …
data_f = read_csv2('https://raw.githubusercontent.com/laurafontanesi/r-seminar22/main/data/data_to_import_f.csv')
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## X1 = col_logical(),
## Locations = col_character(),
## Cases = col_double(),
## Deaths = col_double(),
## Recovered = col_character(),
## References = col_character()
## )
head(data_f)
## # A tibble: 6 x 6
## X1 Locations Cases Deaths Recovered References
## <lgl> <chr> <dbl> <dbl> <chr> <chr>
## 1 NA China (mainland)[d] 81093 3270 72703 [31]
## 2 NA Italy[e] 59138 5476 7024 [34]
## 3 NA United States[f] 40850 481 187 [36][35]
## 4 NA Spain 33089 2206 3355 [37]
## 5 NA Germany 27546 115 422 [38]
## 6 NA Iran[g] 23049 1812 8376 [39][40]
data_g = read_delim('https://raw.githubusercontent.com/laurafontanesi/r-seminar22/main/data/data_to_import_g.txt', delim='\t')
##
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## Symptom = col_character(),
## `%` = col_character()
## )
head(data_g)
## # A tibble: 6 x 2
## Symptom `%`
## <chr> <chr>
## 1 Fever 87.9%
## 2 Dry cough 67.7%
## 3 Fatigue 38.1%
## 4 Sputum production 33.4%
## 5 Anosmia (loss of smell)[251] 30-66%
## 6 Shortness of breath 18.6%
data_h = read_csv('https://raw.githubusercontent.com/laurafontanesi/r-seminar22/main/data/data_to_import_h.csv')
##
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## Name = col_character(),
## Canton = col_character(),
## Range = col_character(),
## `Area1973 (km²)[3]` = col_double(),
## `Start ofmeasurement(year)[3]` = col_double(),
## `Max length(km)[3]` = col_double(),
## `1973length(km)[3]` = col_double(),
## `2016length(km)[3]` = col_double(),
## `length change(m)[3]` = col_double(),
## `Highest point in drainage basin[4]` = col_character(),
## `Altitude(m)` = col_number(),
## `Front alt.(m)` = col_number(),
## Outflow = col_character(),
## Basin = col_character()
## )
head(data_h)
## # A tibble: 6 x 14
## Name Canton Range `Area1973 (km²)… `Start ofmeasur… `Max length(km)… `1973length(km)… `2016length(km)…
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Grand Aletsch VS Bernese Alps 86.6 1870 25.6 24.0 22.5
## 2 Gorner VS Pennine Alps 59.4 1882 15.0 13.5 12.3
## 3 Fiesch VS Bernese Alps 34.2 1891 15.9 15.4 14.5
## 4 Unteraar BE Bernese Alps 29.5 1876 14.4 13.0 11.8
## 5 Lower Grindelwald BE Bernese Alps 20.8 1879 9.46 8.3 5.95
## 6 Upper Aletsch VS Bernese Alps 22.8 1870 10.3 9.05 8.82
## # … with 6 more variables: length change(m)[3] <dbl>, Highest point in drainage basin[4] <chr>, Altitude(m) <dbl>,
## # Front alt.(m) <dbl>, Outflow <chr>, Basin <chr>
british_cross_sectional_data = read_sav('~/Dropbox/teaching/r-course22/data/bes_f2f_2017_v1.5.sav')
head(british_cross_sectional_data)
## # A tibble: 6 x 363
## finalserialno serial a01 a02 a03 m02_1 m02_2 m02_3 m02_4 m02_5 m02_6 b01 b02
## <dbl> <chr> <chr> <dbl+lbl> <dbl+lb> <dbl+lb> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+lb>
## 1 10115 000000399 nhs 1 [Labo… 2 [Fair… 4 [Agre… 4 [Agr… 3 [Nei… 2 [Dis… 4 [Agr… 4 [Agr… 1 [Yes… 1 [Lab…
## 2 10119 000000398 brexit 0 [None… 2 [Fair… 3 [Neit… 1 [Str… 4 [Agr… 4 [Agr… 3 [Nei… 4 [Agr… 1 [Yes… 1 [Lab…
## 3 10125 000000400 socie… -1 [Don`… 2 [Fair… 4 [Agre… 4 [Agr… 4 [Agr… 4 [Agr… 4 [Agr… 2 [Dis… 2 [No,… NA
## 4 10215 000000347 immig… -1 [Don`… 2 [Fair… 4 [Agre… 5 [Str… 5 [Str… 5 [Str… 5 [Str… 4 [Agr… 1 [Yes… 1 [Lab…
## 5 10216 000000349 brexit 1 [Labo… 2 [Fair… 3 [Neit… 2 [Dis… 2 [Dis… 2 [Dis… 2 [Dis… 3 [Nei… 1 [Yes… 1 [Lab…
## 6 10217 000000374 getti… -1 [Don`… 2 [Fair… 2 [Disa… 4 [Agr… 2 [Dis… 2 [Dis… 3 [Nei… 2 [Dis… 1 [Yes… 1 [Lab…
## # … with 350 more variables: b04 <dbl+lbl>, b05 <dbl+lbl>, b0601 <dbl+lbl>, b0602 <dbl+lbl>, b0603 <dbl+lbl>,
## # b0604 <dbl+lbl>, b0605 <dbl+lbl>, b0606 <dbl+lbl>, b0607 <dbl+lbl>, b0608 <dbl+lbl>, b0609 <dbl+lbl>,
## # b0610 <dbl+lbl>, b0611 <dbl+lbl>, b0612 <dbl+lbl>, b0613 <dbl+lbl>, b0614 <dbl+lbl>, b0615 <dbl+lbl>,
## # b0616 <dbl+lbl>, b0617 <dbl+lbl>, b0618 <dbl+lbl>, b0619 <dbl+lbl>, b06a <dbl+lbl>, b07 <dbl+lbl>,
## # b09 <dbl+lbl>, b12_1 <dbl+lbl>, b12_2 <dbl+lbl>, b12_3 <dbl+lbl>, b12_4 <dbl+lbl>, b12_5 <dbl+lbl>,
## # b12_6 <dbl+lbl>, b13_1 <dbl+lbl>, b13_2 <dbl+lbl>, b13_3 <dbl+lbl>, b13_4 <dbl+lbl>, b13_5 <dbl+lbl>,
## # b13_6 <dbl+lbl>, c01 <dbl+lbl>, c02_1 <dbl+lbl>, c02_2 <dbl+lbl>, c02_3 <dbl+lbl>, c02_4 <dbl+lbl>, …