# install.packages("tidyverse")
# install.packages("here")
library (tidyverse)
library (here)
## Load the data
characters <- readRDS (file = here:: here ("raw_data" , "characters.rds" ))
psych_stats <- read.csv (
file = here:: here ("raw_data" , "psych_stats.csv" ),
sep = ";"
)
## Reshape into long format:
psych_stats <- psych_stats %>%
pivot_longer (cols = messy_neat: innocent_jaded,
names_to = "question" ,
values_to = "rating" )
## Take a look at the data sets
str (characters)
'data.frame': 889 obs. of 7 variables:
$ id : chr "F2" "F1" "F5" "F4" ...
$ name : chr "Monica Geller" "Rachel Green" "Chandler Bing" "Joey Tribbiani" ...
$ uni_id : chr "F" "F" "F" "F" ...
$ uni_name : chr "Friends" "Friends" "Friends" "Friends" ...
$ notability: num 79.7 76.7 74.4 74.3 72.6 51.6 86.5 84.2 82.6 65.6 ...
$ link : chr "https://openpsychometrics.org/tests/characters/stats/F/2" "https://openpsychometrics.org/tests/characters/stats/F/1" "https://openpsychometrics.org/tests/characters/stats/F/5" "https://openpsychometrics.org/tests/characters/stats/F/4" ...
$ image_link: chr "https://openpsychometrics.org/tests/characters/test-resources/pics/F/2.jpg" "https://openpsychometrics.org/tests/characters/test-resources/pics/F/1.jpg" "https://openpsychometrics.org/tests/characters/test-resources/pics/F/5.jpg" "https://openpsychometrics.org/tests/characters/test-resources/pics/F/4.jpg" ...
tibble [323,596 × 3] (S3: tbl_df/tbl/data.frame)
$ char_id : chr [1:323596] "F2" "F2" "F2" "F2" ...
$ question: chr [1:323596] "messy_neat" "disorganized_self.disciplined" "diligent_lazy" "on.time_tardy" ...
$ rating : num [1:323596] 95.7 95.2 6.1 6.2 6.4 ...
Now we have gotten to know our characters
data set a bit more. However, the personality ratings are not included yet. For that, we need to combine it with the psych_stats
data set.
Exercise 1
Merge the characters
data frame and the psych_stats
data frame on a common column.
Identify the common columns. Are they named the same in both data frames? Look at the documentation of ?merge
to see, how you can merge data frames that don’t have identically named columns.
First, let’s take a look at both data sets again:
'data.frame': 889 obs. of 7 variables:
$ id : chr "F2" "F1" "F5" "F4" ...
$ name : chr "Monica Geller" "Rachel Green" "Chandler Bing" "Joey Tribbiani" ...
$ uni_id : chr "F" "F" "F" "F" ...
$ uni_name : chr "Friends" "Friends" "Friends" "Friends" ...
$ notability: num 79.7 76.7 74.4 74.3 72.6 51.6 86.5 84.2 82.6 65.6 ...
$ link : chr "https://openpsychometrics.org/tests/characters/stats/F/2" "https://openpsychometrics.org/tests/characters/stats/F/1" "https://openpsychometrics.org/tests/characters/stats/F/5" "https://openpsychometrics.org/tests/characters/stats/F/4" ...
$ image_link: chr "https://openpsychometrics.org/tests/characters/test-resources/pics/F/2.jpg" "https://openpsychometrics.org/tests/characters/test-resources/pics/F/1.jpg" "https://openpsychometrics.org/tests/characters/test-resources/pics/F/5.jpg" "https://openpsychometrics.org/tests/characters/test-resources/pics/F/4.jpg" ...
tibble [323,596 × 3] (S3: tbl_df/tbl/data.frame)
$ char_id : chr [1:323596] "F2" "F2" "F2" "F2" ...
$ question: chr [1:323596] "messy_neat" "disorganized_self.disciplined" "diligent_lazy" "on.time_tardy" ...
$ rating : num [1:323596] 95.7 95.2 6.1 6.2 6.4 ...
It seems like both data frames have a column containing an ID for the character. We can use that column for merging:
characters_stats <- merge (
x = characters,
y = psych_stats,
by.x = "id" ,
by.y = "char_id"
)
str (characters_stats)
'data.frame': 323596 obs. of 9 variables:
$ id : chr "AD1" "AD1" "AD1" "AD1" ...
$ name : chr "Michael Bluth" "Michael Bluth" "Michael Bluth" "Michael Bluth" ...
$ uni_id : chr "AD" "AD" "AD" "AD" ...
$ uni_name : chr "Arrested Development" "Arrested Development" "Arrested Development" "Arrested Development" ...
$ notability: num 76.9 76.9 76.9 76.9 76.9 76.9 76.9 76.9 76.9 76.9 ...
$ link : chr "https://openpsychometrics.org/tests/characters/stats/AD/1" "https://openpsychometrics.org/tests/characters/stats/AD/1" "https://openpsychometrics.org/tests/characters/stats/AD/1" "https://openpsychometrics.org/tests/characters/stats/AD/1" ...
$ image_link: chr "https://openpsychometrics.org/tests/characters/test-resources/pics/AD/1.jpg" "https://openpsychometrics.org/tests/characters/test-resources/pics/AD/1.jpg" "https://openpsychometrics.org/tests/characters/test-resources/pics/AD/1.jpg" "https://openpsychometrics.org/tests/characters/test-resources/pics/AD/1.jpg" ...
$ question : chr "messy_neat" "disorganized_self.disciplined" "diligent_lazy" "on.time_tardy" ...
$ rating : num 68.6 73.3 10.8 22.2 45.1 16.2 86.3 74.7 15.4 36.2 ...
Worked like a charm!