library(hyenaR) ## For our hyena specific functionslibrary(dplyr) ## For most data wranglinglibrary(ggplot2) ## For plottinglibrary(lubridate) ## Working with dateslibrary(tidyr) ## Extra data wrangling functionslibrary(stringr) ## Working with textlibrary(waldo) ## To compare objectslibrary(skimr) ## Inspect data
STEP 2: Load the database
load_package_database.full(# Location of our database filedb.path ="example_git/source_data/Fisidata_2022_08_10.sqlite")
Today’s goals
GOAL 1: 📝HOMEWORK: Lesson 10.
TASK 1:
Create a data frame of all females.
create_id_starting.table(sex ="female")
# A tibble: 1,211 × 1
ID
<chr>
1 A-001
2 A-002
3 A-003
4 A-004
5 A-006
6 A-007
7 A-008
8 A-009
9 A-010
10 A-013
# … with 1,201 more rows
TASK 1:
Use group_by()/summarise() to show how many individuals are left or right censored.
create_id_starting.table(sex ="female") %>%# Find which are left and right censoredmutate(left_censor =fetch_id_is.censored.left(ID),right_censor =fetch_id_is.censored.right(ID)) %>%#Group by and countgroup_by(left_censor, right_censor) %>%count()
# A tibble: 901 × 1
ID
<chr>
1 A-060
2 A-080
3 A-081
4 A-088
5 A-096
6 A-098
7 A-101
8 A-102
9 A-104
10 A-105
# … with 891 more rows
TASK 2:
Extract lifetime reproductive success of females (consider only offspring that reach atleast 18 months old).
create_id_starting.table(sex ="female") %>%filter(!fetch_id_is.censored(ID)) %>%# Use age.mature and unit to find individualsmutate(LRS =fetch_id_number.offspring(ID, age.mature =18, unit ="month"))
Extract standardised rank within females at the date of adulthood (i.e. when they were 2 years old).
create_id_starting.table(sex ="female") %>%filter(!fetch_id_is.censored(ID)) %>%mutate(LRS =fetch_id_number.offspring(ID, age.mature =18, unit ="month"),#Extract standardised rank within sexes#(i.e. just comparing females to other females)rank =fetch_id_rank.sex.std(ID,at =fetch_id_date.at.age(ID, age =2)))
# A tibble: 901 × 3
ID LRS rank
<chr> <int> <dbl>
1 A-060 0 NA
2 A-080 4 0.538
3 A-081 7 -1
4 A-088 0 NA
5 A-096 0 NA
6 A-098 0 NA
7 A-101 0 0
8 A-102 6 0.231
9 A-104 0 0.429
10 A-105 1 -0.2
# … with 891 more rows
TASK 2:
Inspect the data. Are there any individuals you might exclude?
output <-create_id_starting.table(sex ="female") %>%filter(!fetch_id_is.censored(ID)) %>%mutate(LRS =fetch_id_number.offspring(ID, age.mature =18, unit ="month"),rank =fetch_id_rank.sex.std(ID, at =fetch_id_date.at.age(ID, age =2)))#Check for NAsany(is.na(output$LRS))
[1] FALSE
any(is.na(output$rank))
[1] TRUE
TASK 2:
Use {skimr} package to inspect data!
NOTE We need to look in RStudio because it doesn’t work well on the slides!
skim(output)
TASK 3:
Create a new column with rank category (top rank >0 or low ranking <=0). HINT: You might need to use ifelse.
output %>%filter(!is.na(rank)) %>%#Create high and low categorymutate(rank_cat =ifelse(rank >0, "high", "low"))
# A tibble: 456 × 4
ID LRS rank rank_cat
<chr> <int> <dbl> <chr>
1 A-080 4 0.538 high
2 A-081 7 -1 low
3 A-101 0 0 low
4 A-102 6 0.231 high
5 A-104 0 0.429 high
6 A-105 1 -0.2 low
7 A-106 10 -0.333 low
8 A-107 4 0.0588 high
9 A-108 0 -0.0588 low
10 A-110 0 -0.556 low
# … with 446 more rows
TASK 3:
What is the mean and SE of lifetime reproductive success in each group? HINT: Remember SE = \(\frac{sd}{\sqrt N}\)
# A tibble: 3 × 3
rank_cat mean se
<chr> <dbl> <dbl>
1 rankgrp1 2.30 0.213
2 rankgrp2 1.81 0.255
3 rankgrp3 1.13 0.174
TASK 3:
BONUS: Look at the help for the case_when() function. Can you use this to make 3 groups instead of 2?
output %>%filter(!is.na(rank)) %>%#Do the same thing with case_when()mutate(rank_cat =case_when(rank >0.33~"rankgrp1", rank <=0.33& rank >-0.33~"rankgrp2", rank <=-0.33~"rankgrp3")) %>%#group-by/summarisegroup_by(rank_cat) %>%summarise(mean =mean(LRS),se =sd(LRS)/sqrt(n()))
# A tibble: 3 × 3
rank_cat mean se
<chr> <dbl> <dbl>
1 rankgrp1 2.30 0.213
2 rankgrp2 1.81 0.255
3 rankgrp3 1.13 0.174
TASK 3:
BONUS: Look at the help for the case_when() function. Can you use this to make 3 groups instead of 2?
output %>%filter(!is.na(rank)) %>%mutate(rank_cat =case_when(rank >0.33~"rankgrp1", rank <=0.33& rank >-0.33~"rankgrp2",#Everything else that doesn't match othersTRUE~"rankgrp3")) %>%group_by(rank_cat) %>%summarise(mean =mean(LRS),se =sd(LRS)/sqrt(n()))
# A tibble: 3 × 3
rank_cat mean se
<chr> <dbl> <dbl>
1 rankgrp1 2.30 0.213
2 rankgrp2 1.81 0.255
3 rankgrp3 1.13 0.174
TASK 3:
BONUS: Look at the help for the case_when() function. Can you use this to make 3 groups instead of 2?
# Be careful using the TRUE group. It will include *anything* that doesn't match other criteriaoutput %>%mutate(birthclan =fetch_id_clan.birth(ID),clan_grp =case_when(birthclan =="X"~"Outside",TRUE~"Main")) %>%filter(clan_grp =="Main") %>%group_by(birthclan) %>%count()
# A tibble: 11 × 2
# Groups: birthclan [11]
birthclan n
<chr> <int>
1 A 170
2 B 1
3 E 74
4 F 84
5 L 167
6 M 147
7 N 84
8 R 2
9 S 100
10 T 50
11 U 6
TASK 3:
BONUS: Look at the help for the case_when() function. Can you use this to make 3 groups instead of 2?
# Be careful *without* TRUE. Everything else is NA!output %>%mutate(birthclan =fetch_id_clan.birth(ID),clan_grp =case_when(birthclan %in%find_clan_name.all(main.clans =TRUE) ~"Main")) %>%group_by(clan_grp) %>%count()
# A tibble: 2 × 2
# Groups: clan_grp [2]
clan_grp n
<chr> <int>
1 Main 876
2 <NA> 25
BONUS: Plotting
Code
library(ggdist)plot_data <- output %>%filter(!is.na(rank)) %>%mutate(rank_cat =case_when(rank >0.33~"rankgrp1", rank <=0.33& rank >-0.33~"rankgrp2",#Everything else that doesn't match othersTRUE~"rankgrp3"))plot_data_summary <- plot_data %>%group_by(rank_cat) %>%summarise(mean =mean(LRS),se =sd(LRS)/sqrt(n()))ggplot(data = plot_data_summary) +geom_col(aes(x = rank_cat, y = mean)) +geom_errorbar(aes(x = rank_cat, ymin = mean - se, ymax = mean + se), width =0.2) +scale_x_discrete(labels =c("High", "Medium", "Low"), name ="") +scale_y_continuous(name ="Lifetime reproductive success (mean +- SE)") +theme_classic()