Task A

From the data folder on Github, get the data sets in the list below. Load them in R giving the respective names: qualtrics_data, data_f, data_g, data_h. Inspect them using head() or glimpse(). Finally, save them to your local data directory (that you should have as a sub-directory in your R course directory) as csv files.

  1. 20180321_qualtrics_managers_historical_social_comparisons.dta
  2. data_to_import_f.csv
  3. data_to_import_g.csv
  4. data_to_import_h.csv

Task B

Go to this website: https://www.britishelectionstudy.com/data-objects/cross-sectional-data/ (you can register for free).

Download the 2017 Face-to-face Post-election Survey Version 1.5 SPSS file in your local data directory (see above). Then, load it in R assigning it to the name british_cross_sectional_data using the appropriate function for SPSS files and inspect it using head() or glimpse().

library('tidyverse')
library('haven')
qualtrics_data = read_dta("https://github.com/laurafontanesi/r-seminar22/blob/main/data/20180321_qualtrics_managers_historical_social_comparisons.dta?raw=true")

head(qualtrics_data, 2)
## # A tibble: 2 x 58
##   cur_emp       yrs_workforce yrs_curjob num_emp_work num_emp_man for_profit yrs_for_profit non_profit yrs_non_profit
##   <chr>                 <dbl>      <dbl> <chr>        <chr>       <chr>               <dbl> <chr>               <dbl>
## 1 I work in th…          17         15   500-999      50-99       Yes                     6 No                     NA
## 2 I work in th…          21.2       21.2 50-99        9-May       Yes                     4 No                     NA
## # … with 49 more variables: public <chr>, yrs_public <dbl>, eng <dbl>, eng_i_u <dbl>, eng_i_l <dbl>, eng_d_u <dbl>,
## #   eng_d_l <dbl>, eng_pct_pass <dbl>, eng_more_useful_comp <chr>, eng_why_select_comp <chr>, eng_hist <chr>,
## #   eng_soc <chr>, age <dbl>, educ <chr>, prof_degree <chr>, prof_degree_other <chr>, gender <chr>,
## #   gender_other <dbl>, race <chr>, race_other <chr>, hispanic <chr>, math <dbl>, math_i <dbl>, math_d <dbl>,
## #   math_u <dbl>, math_l <dbl>, math_pct_pass <dbl>, math_more_useful_comp <chr>, math_why_select_comp <chr>,
## #   math_hist <chr>, math_soc <chr>, gc <dbl>, sector <chr>, employment <dbl+lbl>, pub_sec <dbl+lbl>,
## #   female <dbl+lbl>, education <dbl+lbl>, eng_outcome_group <dbl+lbl>, eng_outcome <dbl>, …
data_f = read_csv2('https://raw.githubusercontent.com/laurafontanesi/r-seminar22/main/data/data_to_import_f.csv')
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   X1 = col_logical(),
##   Locations = col_character(),
##   Cases = col_double(),
##   Deaths = col_double(),
##   Recovered = col_character(),
##   References = col_character()
## )
head(data_f)
## # A tibble: 6 x 6
##   X1    Locations           Cases Deaths Recovered References
##   <lgl> <chr>               <dbl>  <dbl> <chr>     <chr>     
## 1 NA    China (mainland)[d] 81093   3270 72703     [31]      
## 2 NA    Italy[e]            59138   5476 7024      [34]      
## 3 NA    United States[f]    40850    481 187       [36][35]  
## 4 NA    Spain               33089   2206 3355      [37]      
## 5 NA    Germany             27546    115 422       [38]      
## 6 NA    Iran[g]             23049   1812 8376      [39][40]
data_g = read_delim('https://raw.githubusercontent.com/laurafontanesi/r-seminar22/main/data/data_to_import_g.txt', delim='\t')
## 
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   Symptom = col_character(),
##   `%` = col_character()
## )
head(data_g)
## # A tibble: 6 x 2
##   Symptom                      `%`   
##   <chr>                        <chr> 
## 1 Fever                        87.9% 
## 2 Dry cough                    67.7% 
## 3 Fatigue                      38.1% 
## 4 Sputum production            33.4% 
## 5 Anosmia (loss of smell)[251] 30-66%
## 6 Shortness of breath          18.6%
data_h = read_csv('https://raw.githubusercontent.com/laurafontanesi/r-seminar22/main/data/data_to_import_h.csv')
## 
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   Name = col_character(),
##   Canton = col_character(),
##   Range = col_character(),
##   `Area1973 (km²)[3]` = col_double(),
##   `Start ofmeasurement(year)[3]` = col_double(),
##   `Max length(km)[3]` = col_double(),
##   `1973length(km)[3]` = col_double(),
##   `2016length(km)[3]` = col_double(),
##   `length change(m)[3]` = col_double(),
##   `Highest point in drainage basin[4]` = col_character(),
##   `Altitude(m)` = col_number(),
##   `Front alt.(m)` = col_number(),
##   Outflow = col_character(),
##   Basin = col_character()
## )
head(data_h)
## # A tibble: 6 x 14
##   Name              Canton Range        `Area1973 (km²)… `Start ofmeasur… `Max length(km)… `1973length(km)… `2016length(km)…
##   <chr>             <chr>  <chr>                   <dbl>            <dbl>            <dbl>            <dbl>            <dbl>
## 1 Grand Aletsch     VS     Bernese Alps             86.6             1870            25.6             24.0             22.5 
## 2 Gorner            VS     Pennine Alps             59.4             1882            15.0             13.5             12.3 
## 3 Fiesch            VS     Bernese Alps             34.2             1891            15.9             15.4             14.5 
## 4 Unteraar          BE     Bernese Alps             29.5             1876            14.4             13.0             11.8 
## 5 Lower Grindelwald BE     Bernese Alps             20.8             1879             9.46             8.3              5.95
## 6 Upper Aletsch     VS     Bernese Alps             22.8             1870            10.3              9.05             8.82
## # … with 6 more variables: length change(m)[3] <dbl>, Highest point in drainage basin[4] <chr>, Altitude(m) <dbl>,
## #   Front alt.(m) <dbl>, Outflow <chr>, Basin <chr>
british_cross_sectional_data = read_sav('~/Dropbox/teaching/r-course22/data/bes_f2f_2017_v1.5.sav')

head(british_cross_sectional_data)
## # A tibble: 6 x 363
##   finalserialno serial    a01          a02      a03    m02_1   m02_2   m02_3   m02_4   m02_5   m02_6     b01      b02
##           <dbl> <chr>     <chr>  <dbl+lbl> <dbl+lb> <dbl+lb> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+lb>
## 1         10115 000000399 nhs     1 [Labo… 2 [Fair… 4 [Agre… 4 [Agr… 3 [Nei… 2 [Dis… 4 [Agr… 4 [Agr… 1 [Yes…  1 [Lab…
## 2         10119 000000398 brexit  0 [None… 2 [Fair… 3 [Neit… 1 [Str… 4 [Agr… 4 [Agr… 3 [Nei… 4 [Agr… 1 [Yes…  1 [Lab…
## 3         10125 000000400 socie… -1 [Don`… 2 [Fair… 4 [Agre… 4 [Agr… 4 [Agr… 4 [Agr… 4 [Agr… 2 [Dis… 2 [No,… NA      
## 4         10215 000000347 immig… -1 [Don`… 2 [Fair… 4 [Agre… 5 [Str… 5 [Str… 5 [Str… 5 [Str… 4 [Agr… 1 [Yes…  1 [Lab…
## 5         10216 000000349 brexit  1 [Labo… 2 [Fair… 3 [Neit… 2 [Dis… 2 [Dis… 2 [Dis… 2 [Dis… 3 [Nei… 1 [Yes…  1 [Lab…
## 6         10217 000000374 getti… -1 [Don`… 2 [Fair… 2 [Disa… 4 [Agr… 2 [Dis… 2 [Dis… 3 [Nei… 2 [Dis… 1 [Yes…  1 [Lab…
## # … with 350 more variables: b04 <dbl+lbl>, b05 <dbl+lbl>, b0601 <dbl+lbl>, b0602 <dbl+lbl>, b0603 <dbl+lbl>,
## #   b0604 <dbl+lbl>, b0605 <dbl+lbl>, b0606 <dbl+lbl>, b0607 <dbl+lbl>, b0608 <dbl+lbl>, b0609 <dbl+lbl>,
## #   b0610 <dbl+lbl>, b0611 <dbl+lbl>, b0612 <dbl+lbl>, b0613 <dbl+lbl>, b0614 <dbl+lbl>, b0615 <dbl+lbl>,
## #   b0616 <dbl+lbl>, b0617 <dbl+lbl>, b0618 <dbl+lbl>, b0619 <dbl+lbl>, b06a <dbl+lbl>, b07 <dbl+lbl>,
## #   b09 <dbl+lbl>, b12_1 <dbl+lbl>, b12_2 <dbl+lbl>, b12_3 <dbl+lbl>, b12_4 <dbl+lbl>, b12_5 <dbl+lbl>,
## #   b12_6 <dbl+lbl>, b13_1 <dbl+lbl>, b13_2 <dbl+lbl>, b13_3 <dbl+lbl>, b13_4 <dbl+lbl>, b13_5 <dbl+lbl>,
## #   b13_6 <dbl+lbl>, c01 <dbl+lbl>, c02_1 <dbl+lbl>, c02_2 <dbl+lbl>, c02_3 <dbl+lbl>, c02_4 <dbl+lbl>, …