Data science with `{hyenaR}`:
LESSON 14

Prepare our workspace

STEP 1: Load required packages

library(hyenaR) ## For our hyena specific functions
library(dplyr) ## For most data wrangling
library(ggplot2) ## For plotting
library(lubridate) ## Working with dates
library(tidyr) ## Extra data wrangling functions
library(stringr) ## Working with text
library(waldo) ## To compare objects
library(skimr) ## Inspect data

STEP 2: Load the database

load_package_database.full(
  
  # Location of our database file
  db.path = "example_git/source_data/Fisidata_2022_08_10.sqlite"
  
)

Today’s goals

GOAL 1: 🧑‍🏫 Practical application of `{hyenaR}`

List of all adults of main clans with their highest lifetime rank.

GOAL 1: 🧑‍🏫 Practical application of `{hyenaR}`

List of all adults of main clans with their highest lifetime rank.

GOAL 1: 🧑‍🏫 Practical application of `{hyenaR}`

List of all adults of main clans with their highest lifetime rank.

List of all adults of main clans

## Work within the observation period...
start_date = find_pop_date.observation.first()
end_date = find_pop_date.observation.last()
create_id_starting.table(
  #Only known sex
  sex = c("female", "male"),
  #Individuals started and stopped being adults
  #in the study period (i.e. uncensored)
  lifestage = "adult",
  lifestage.overlap = "within",
  #Only use individuals born in main clans
  #Males that disperse from outside could have adult period unobserved
  clan.birth = find_clan_name.all(main.clans = TRUE),
  from = start_date,
  to = end_date
)

# A tibble: 904 × 1
   ID   
   <chr>
 1 A-008
 2 A-010
 3 A-018
 4 A-019
 5 A-040
 6 A-041
 7 A-046
 8 A-049
 9 A-054
10 A-056
# … with 894 more rows

…with their highest lifetime rank.

create_id_starting.table(
  sex = c("female", "male"),
  lifestage = "adult",
  lifestage.overlap = "within",
  clan.birth = find_clan_name.all(main.clans = TRUE),
  from = start_date,
  to = end_date
) %>% 
  # Find the period during which ranks need to be calculated
  mutate(start_adult = fetch_id_date.at.age(ID, age = 2),
         end_adult = fetch_id_date.death(ID))

# A tibble: 904 × 3
   ID    start_adult end_adult 
   <chr> <date>      <date>    
 1 A-008 1996-05-20  2008-11-30
 2 A-010 1996-09-20  2000-02-06
 3 A-018 1996-09-20  2004-06-28
 4 A-019 1996-09-16  2010-05-17
 5 A-040 1996-04-12  2006-10-26
 6 A-041 1996-07-13  2003-05-03
 7 A-046 1996-09-20  2003-11-03
 8 A-049 1996-04-13  2003-09-29
 9 A-054 1997-12-22  2002-12-03
10 A-056 1996-06-05  2008-05-16
# … with 894 more rows

…with their highest lifetime rank.

(expanded_df <- create_id_starting.table(
  sex = c("female", "male"),
  lifestage = "adult", lifestage.overlap = "within",
  clan.birth = find_clan_name.all(main.clans = TRUE),
  from = start_date, to = end_date) %>% 
  mutate(start_adult = fetch_id_date.at.age(ID, age = 2),
         end_adult = fetch_id_date.death(ID)) %>% 
  #Expand data to one row per 6 month period of an adults life
  reshape_row_date.seq(ID, from = start_adult, to = end_adult, by = "6 month"))

# A tibble: 9,798 × 2
   ID    date      
   <chr> <date>    
 1 A-008 1996-05-20
 2 A-008 1996-11-20
 3 A-008 1997-05-20
 4 A-008 1997-11-20
 5 A-008 1998-05-20
 6 A-008 1998-11-20
 7 A-008 1999-05-20
 8 A-008 1999-11-20
 9 A-008 2000-05-20
10 A-008 2000-11-20
# … with 9,788 more rows

…with their highest lifetime rank.

mainclans <- find_clan_name.all()
system.time({expanded_df_rank <- expanded_df %>% 
    # Extract current clan and per 6 months
    mutate(current_clan = fetch_id_clan.current(ID, at = date)) %>% 
    # Remove individuals that were outside the clan at any point
    group_by(ID) %>% 
    filter(all(current_clan %in% mainclans)) %>% 
    ungroup() %>% 
    #Extract ranks of remaining individuals
    mutate(rank = fetch_id_rank.std(ID, at = date))})

   user  system elapsed 
  5.655   0.595   6.053

…with their highest lifetime rank.

expanded_df_rank

# A tibble: 9,348 × 4
   ID    date       current_clan    rank
   <chr> <date>     <chr>          <dbl>
 1 A-008 1996-05-20 A             0.0244
 2 A-008 1996-11-20 A            -0.116 
 3 A-008 1997-05-20 A            -0.277 
 4 A-008 1997-11-20 A            -0.257 
 5 A-008 1998-05-20 A            -0.189 
 6 A-008 1998-11-20 S            -0.111 
 7 A-008 1999-05-20 S             0     
 8 A-008 1999-11-20 S             0.167 
 9 A-008 2000-05-20 S             0     
10 A-008 2000-11-20 S             0.333 
# … with 9,338 more rows

…with their highest lifetime rank.

Code

plot_data <- expanded_df_rank %>% 
  mutate(sex = fetch_id_sex(ID),
         philo = case_when(fetch_id_is.native(ID, at = date) ~ "philopatric",
                           TRUE ~ "disperser")) %>% 
  group_by(ID, philo) %>% 
  mutate(years_since_start = (date - min(date))/365,
         relative_rank_change = rank - min(rank))

ggplot(data = plot_data) +
  geom_line(aes(x = date, y = rank,
                group = ID, colour = ID)) +
  scale_x_date(breaks = seq(as.Date("1995-01-01"), as.Date("2020-01-01"), "5 year"),
               date_labels = "%Y",
               limits = c(as.Date("1995-01-01"), as.Date("2021-01-01"))) +
  labs(x = "Date", y = "Standardised rank") +
  facet_wrap(facets = ~sex + philo) +
  theme_classic() +
  theme(legend.position = "none",
        axis.title = element_text(size = 15),
        axis.text = element_text(size = 10, colour = "black"))

…with their highest lifetime rank.

Code

plot_data2 <- plot_data %>% 
  filter(current_clan == "A")

ggplot(data = plot_data2) +
  geom_line(aes(x = date, y = rank,
                group = ID, colour = ID)) +
  scale_x_date(breaks = seq(as.Date("1995-01-01"), as.Date("2020-01-01"), "5 year"),
               date_labels = "%Y",
               limits = c(as.Date("1995-01-01"), as.Date("2021-01-01"))) +
  facet_wrap(facets = ~sex + philo) +
  labs(x = "Date", y = "Standardised rank",
       title = "Airstrip") +
  theme_classic() +
  theme(legend.position = "none",
        axis.title = element_text(size = 15),
        axis.text = element_text(size = 10, colour = "black"),
        plot.title = element_text(size = 20, colour = "black", face = "bold"))

…with their highest lifetime rank.

Code

ggplot(data = plot_data2) +
  geom_line(aes(x = years_since_start, y = rank,
                group = ID, colour = ID)) +
  labs(y = "Standardised rank", x = "Years in lifestage",
       title = "Airstrip") +
  facet_wrap(facets = ~sex + philo) +
  theme_classic() +
  theme(legend.position = "none",
        axis.title = element_text(size = 15),
        axis.text = element_text(size = 10, colour = "black"),
        plot.title = element_text(size = 20, colour = "black", face = "bold"))

…with their highest lifetime rank.

Code

ggplot(data = plot_data2) +
  geom_line(aes(x = years_since_start, y = rank,
                group = ID), colour = "grey60",
            alpha = 0.5) +
  geom_smooth(aes(x = years_since_start, y = rank),
              colour = "black", se = FALSE) +
  labs(y = "Standardised rank", x = "Years in lifestage",
       title = "Airstrip") +
  facet_wrap(facets = ~sex + philo) +
  theme_classic() +
  theme(legend.position = "none",
        axis.title = element_text(size = 15),
        axis.text = element_text(size = 10, colour = "black"),
        plot.title = element_text(size = 20, colour = "black", face = "bold"))

…with their highest lifetime rank.

Code

plot_data3 <- plot_data %>% 
  filter(ID == "A-019")

ggplot(data = plot_data3) +
  geom_line(aes(x = date, y = rank,
                group = ID), size = 1,
            colour = "black") +
  geom_point(aes(x = date, y = rank),
             size = 2, shape = 21,
            colour = "black", fill = "white", stroke = 1) +
  labs(y = "Standardised rank", x = "Years in lifestage",
       title = "A-019") +
  scale_y_continuous(limits = c(NA, 1),
                     breaks = seq(-0.25, 1, 0.25)) +
  scale_x_date(breaks = seq(as.Date("1995-01-01"), as.Date("2020-01-01"), "5 year"),
               date_labels = "%Y",
               limits = c(as.Date("1995-01-01"), as.Date("2021-01-01"))) +
  theme_classic() +
  theme(legend.position = "none",
        axis.title = element_text(size = 15),
        axis.text = element_text(size = 10, colour = "black"),
        plot.title = element_text(size = 20, colour = "black", face = "bold"))

…with their highest lifetime rank.

expanded_df_rank %>% 
  #Determine the max rank per ID
  group_by(ID) %>% 
  summarise(max_rank = max(rank)) %>% 
  arrange(desc(max_rank))

# A tibble: 877 × 2
   ID    max_rank
   <chr>    <dbl>
 1 A-019        1
 2 A-114        1
 3 A-194        1
 4 A-353        1
 5 E-006        1
 6 E-089        1
 7 E-166        1
 8 E-191        1
 9 F-006        1
10 F-107        1
# … with 867 more rows

…with their highest lifetime rank.

Code

plot_data4 <- expanded_df_rank %>% 
  #Determine the max rank per ID
  group_by(ID) %>% 
  summarise(max_rank = max(rank)) %>% 
  arrange(desc(max_rank)) %>% 
  mutate(birthclan = fetch_id_clan.birth(ID),
         sex = fetch_id_sex(ID))

ggplot(data = plot_data4) +
  geom_boxplot(aes(x = birthclan, y = max_rank, fill = sex)) +
  labs(x = "Clan", y = "Maximum rank") +
  theme_classic() +
  theme(axis.title = element_text(size = 15),
        axis.text = element_text(size = 10, colour = "black"),
        plot.title = element_text(size = 20, colour = "black", face = "bold"))

Data science with {hyenaR}:LESSON 14

Prepare our workspace

STEP 1: Load required packages

STEP 2: Load the database

Today’s goals

GOAL 1: 🧑‍🏫 Practical application of {hyenaR}

GOAL 1: 🧑‍🏫 Practical application of {hyenaR}

List of all adults of main clans with their highest lifetime rank.

GOAL 1: 🧑‍🏫 Practical application of {hyenaR}

List of all adults of main clans with their highest lifetime rank.

GOAL 1: 🧑‍🏫 Practical application of {hyenaR}

List of all adults of main clans with their highest lifetime rank.

List of all adults of main clans

…with their highest lifetime rank.

…with their highest lifetime rank.

…with their highest lifetime rank.

…with their highest lifetime rank.

…with their highest lifetime rank.

…with their highest lifetime rank.

…with their highest lifetime rank.

…with their highest lifetime rank.

…with their highest lifetime rank.

…with their highest lifetime rank.

…with their highest lifetime rank.

Data science with `{hyenaR}`:
LESSON 14

GOAL 1: 🧑‍🏫 Practical application of `{hyenaR}`

GOAL 1: 🧑‍🏫 Practical application of `{hyenaR}`

GOAL 1: 🧑‍🏫 Practical application of `{hyenaR}`

GOAL 1: 🧑‍🏫 Practical application of `{hyenaR}`