Data science with {hyenaR}:
LESSON 2

Recap: Any questions after last week?

Prepare our workspace


STEP 1: Load required packages

library(hyenaR) ## For our hyena specific functions
library(dplyr) ## For most data wrangling
library(ggplot2) ## Some (bonus) plotting


STEP 2: Load the database

load_package_database.full(
  
  # Location of our database file
  db.path = "example_git/source_data/Fisidata_2022_08_10.sqlite"
  
)

TASK 1: Reproductive success of males

EXAMPLE 1: Find 10 most successful males

create_id_starting.table(
  
  ## Select all males..
  sex = "male")
# A tibble: 1,313 × 1
   ID   
   <chr>
 1 A-011
 2 A-040
 3 A-041
 4 A-042
 5 A-043
 6 A-044
 7 A-045
 8 A-046
 9 A-047
10 A-048
# … with 1,303 more rows

EXAMPLE 1: Find 10 most successful males

create_id_starting.table(
  sex = "male") %>% 
  
  # Create columns with total number of offspring for each male
  mutate(RS = fetch_id_number.offspring(ID = ID))
# A tibble: 1,313 × 2
   ID       RS
   <chr> <int>
 1 A-011     8
 2 A-040     2
 3 A-041    10
 4 A-042     2
 5 A-043     0
 6 A-044     0
 7 A-045     6
 8 A-046     7
 9 A-047     0
10 A-048     0
# … with 1,303 more rows

EXAMPLE 1: Find 10 most successful males

create_id_starting.table(
  sex = "male") %>% 
  mutate(RS = fetch_id_number.offspring(ID = ID)) %>% 
  
  ## Arrange data so that males with highest RS are at the top
  arrange(desc(RS))
# A tibble: 1,313 × 2
   ID       RS
   <chr> <int>
 1 F-146    29
 2 A-150    26
 3 M-046    22
 4 X-024    20
 5 L-047    19
 6 M-240    19
 7 X-017    19
 8 A-122    18
 9 A-128    18
10 E-094    17
# … with 1,303 more rows

EXAMPLE 1: Find 10 most successful males

create_id_starting.table(
  sex = "male") %>% 
  mutate(RS = fetch_id_number.offspring(ID = ID)) %>% 
  arrange(desc(RS)) %>% 
  
  ## 'Slice' off the top 10 records
  slice(1:10)
# A tibble: 10 × 2
   ID       RS
   <chr> <int>
 1 F-146    29
 2 A-150    26
 3 M-046    22
 4 X-024    20
 5 L-047    19
 6 M-240    19
 7 X-017    19
 8 A-122    18
 9 A-128    18
10 E-094    17

EXAMPLE 1: Find 10 most successful males

create_id_starting.table(
  sex = "male") %>% 
  mutate(RS = fetch_id_number.offspring(ID = ID),
         #Determine the rank of males based on reproductive success
         #Largest RS is ranked 1
         rank = dense_rank(-RS)) %>%
  #Filter top 10 ranked males (allows for ties)
  filter(rank %in% 1:10) %>% 
  arrange(rank)
# A tibble: 27 × 3
   ID       RS  rank
   <chr> <int> <int>
 1 F-146    29     1
 2 A-150    26     2
 3 M-046    22     3
 4 X-024    20     4
 5 L-047    19     5
 6 M-240    19     5
 7 X-017    19     5
 8 A-122    18     6
 9 A-128    18     6
10 E-094    17     7
# … with 17 more rows

TASK 1: Reproductive success of males

EXAMPLE 2: Find clan that produces most successful males on average

create_id_starting.table(
  
  ## Select all males..
  sex = "male",
  
)
# A tibble: 1,313 × 1
   ID   
   <chr>
 1 A-011
 2 A-040
 3 A-041
 4 A-042
 5 A-043
 6 A-044
 7 A-045
 8 A-046
 9 A-047
10 A-048
# … with 1,303 more rows

EXAMPLE 2: Find clan that produces most successful males on average

create_id_starting.table(
  sex = "male") %>% 
  
  # Create columns with total number of offspring for each male
  mutate(RS = fetch_id_number.offspring(ID = ID),
         
         #Create second column with birth clan of individual
         birth.clan = fetch_id_clan.birth(ID = ID))
# A tibble: 1,313 × 3
   ID       RS birth.clan
   <chr> <int> <chr>     
 1 A-011     8 X         
 2 A-040     2 A         
 3 A-041    10 A         
 4 A-042     2 X         
 5 A-043     0 A         
 6 A-044     0 X         
 7 A-045     6 X         
 8 A-046     7 A         
 9 A-047     0 X         
10 A-048     0 X         
# … with 1,303 more rows

EXAMPLE 2: Find clan that produces most successful males on average

create_id_starting.table(
  sex = "male") %>% 
  mutate(RS = fetch_id_number.offspring(ID = ID),
         birth.clan = fetch_id_clan.birth(ID = ID)) %>% 
  
  ## Group by birth clan
  group_by(birth.clan) %>% 
  #Return avg male RS for each clan
  summarise(meanRS = mean(RS),
            maxRS = max(RS)) %>% 
  #Arrange to put most productive clan on top
  arrange(desc(meanRS))
# A tibble: 12 × 3
   birth.clan meanRS maxRS
   <chr>       <dbl> <int>
 1 U           6        12
 2 X           2.61     22
 3 N           1.46     14
 4 M           1.42     19
 5 A           1.27     26
 6 L           1.13     14
 7 F           1.13     29
 8 E           1.01     17
 9 T           0.462     8
10 S           0.357     7
11 C           0         0
12 R           0         0

EXAMPLE 2: Find clan that produces most successful males on average

create_id_starting.table(
  sex = "male") %>% 
  mutate(RS = fetch_id_number.offspring(ID = ID),
         birth.clan = fetch_id_clan.birth(ID = ID)) %>% 
  
  #Filter only individuals born in main clans
  ## TIP: FILTER AS EARLY AS POSSIBLE!!
  filter(birth.clan %in% find_clan_name.all(main.clans = TRUE)) %>% 
  
  group_by(birth.clan) %>% 
  summarise(meanRS = mean(RS),
            maxRS = max(RS)) %>% 
  arrange(desc(meanRS))

EXAMPLE 2: Find clan that produces most successful males on average

create_id_starting.table(
  sex = "male") %>% 
  mutate(RS = fetch_id_number.offspring(ID = ID),
         birth.clan = fetch_id_clan.birth(ID = ID)) %>% 
  filter(birth.clan %in% find_clan_name.all(main.clans = TRUE)) %>% 
  group_by(birth.clan) %>% 
  summarise(meanRS = mean(RS),
            maxRS = max(RS)) %>% 
  arrange(desc(meanRS))
# A tibble: 8 × 3
  birth.clan meanRS maxRS
  <chr>       <dbl> <int>
1 N           1.46     14
2 M           1.42     19
3 A           1.27     26
4 L           1.13     14
5 F           1.13     29
6 E           1.01     17
7 T           0.462     8
8 S           0.357     7

TASK 1: Reproductive success of males

EXAMPLE 3: Reproductive success of uncensored males

## Do the same thing with `create_id_starting table()`
create_id_starting.table(
  
  ## Select all males..
  sex = "male",
  
  ## Males that 'started being cubs' during our focal period
  ## i.e. they were born during this time
  lifestage = "cub", lifestage.overlap = "start",
  
  ## Only individuals born into main clans
  clan = find_clan_name.all(main.clans = TRUE),
  
  ## Only cubs born within the observation period
  ## (rather than from first estimated birth or conception)
  from = find_pop_date.observation.first(),
  to = find_pop_date.observation.last()
  
)

EXAMPLE 3: Reproductive success of uncensored males

# Extract data 'manually' without `create_id_starting.table()` arguments
create_id_starting.table(
  sex = "male"
) %>% 
  #Extract birth date and birth clan
  mutate(birth.date = fetch_id_date.birth(ID = ID),
         birth.clan = fetch_id_clan.birth(ID = ID)) %>% 
  filter(
    #Filter individuals born after study started (i.e. non left censored)
    birth.date > find_pop_date.observation.first(),
    #Filter individuals born in main clans
    birth.clan %in% find_clan_name.all(main.clans = TRUE))

EXAMPLE 3: Reproductive success of uncensored males

create_id_starting.table(
  sex = "male",
  lifestage = "cub", lifestage.overlap = "start",
  clan = find_clan_name.all(main.clans = TRUE),
  from = find_pop_date.observation.first(), to = find_pop_date.observation.last()
) %>% 
  
  ## Filter only males that are not right censored
  ## NOT THE BEST WAY
  mutate(is.alive = fetch_id_is.alive(ID = ID,
                                      at = find_pop_date.observation.last())) %>% 
  filter(is.alive == FALSE)
# A tibble: 934 × 2
   ID    is.alive
   <chr> <lgl>   
 1 A-059 FALSE   
 2 A-086 FALSE   
 3 A-087 FALSE   
 4 A-089 FALSE   
 5 A-090 FALSE   
 6 A-091 FALSE   
 7 A-092 FALSE   
 8 A-093 FALSE   
 9 A-095 FALSE   
10 A-097 FALSE   
# … with 924 more rows

EXAMPLE 3: Reproductive success of uncensored males

create_id_starting.table(
  sex = "male",
  lifestage = "cub", lifestage.overlap = "start",
  clan = find_clan_name.all(main.clans = TRUE),
  from = find_pop_date.observation.first(), to = find_pop_date.observation.last()
) %>% 
  
  ## Filter only males that are not right censored
  ## NOT THE BEST WAY
  mutate(iscensored = fetch_id_is.censored.right(ID = ID)) %>% 
  filter(iscensored == FALSE)
# A tibble: 934 × 2
   ID    iscensored
   <chr> <lgl>     
 1 A-059 FALSE     
 2 A-086 FALSE     
 3 A-087 FALSE     
 4 A-089 FALSE     
 5 A-090 FALSE     
 6 A-091 FALSE     
 7 A-092 FALSE     
 8 A-093 FALSE     
 9 A-095 FALSE     
10 A-097 FALSE     
# … with 924 more rows

EXAMPLE 3: Reproductive success of uncensored males

create_id_starting.table(
  sex = "male",
  lifestage = "cub", lifestage.overlap = "start",
  clan = find_clan_name.all(main.clans = TRUE),
  from = find_pop_date.observation.first(), to = find_pop_date.observation.last()
) %>% 
  
  ## Filter only males that are not right censored
  ## BETTER
  filter(!fetch_id_is.censored.right(ID = ID))
# A tibble: 934 × 1
   ID   
   <chr>
 1 A-059
 2 A-086
 3 A-087
 4 A-089
 5 A-090
 6 A-091
 7 A-092
 8 A-093
 9 A-095
10 A-097
# … with 924 more rows

EXAMPLE 3: Reproductive success of uncensored males

create_id_starting.table(
  sex = "male",
  lifestage = "cub", lifestage.overlap = "start",
  clan = find_clan_name.all(main.clans = TRUE),
  from = find_pop_date.observation.first(), to = find_pop_date.observation.last()
) %>% 
  filter(!fetch_id_is.censored.right(ID = ID)) %>% 
  
  ## RS now defined as offspring that reach 6mo
  mutate(RS = fetch_id_number.offspring(ID = ID, age.mature = 6, unit = "months"),
         #Birth clan
         birth.clan = fetch_id_clan.birth(ID = ID),
         #Death date for each individual
         death.date = fetch_id_date.death(ID = ID),
         #Age at death (use date info from above)
         max.age = fetch_id_age(ID = ID, at = death.date))

EXAMPLE 3: Reproductive success of uncensored males

create_id_starting.table(
  sex = "male",
  lifestage = "cub", lifestage.overlap = "start",
  clan = find_clan_name.all(main.clans = TRUE),
  from = find_pop_date.observation.first(), to = find_pop_date.observation.last()
) %>% 
  filter(!fetch_id_is.censored.right(ID = ID)) %>% 
  
  ## RS now defined as offspring that reach 6mo
  mutate(RS = fetch_id_number.offspring(ID = ID, age.mature = 6, unit = "months"),
         #Birth clan
         birth.clan = fetch_id_clan.birth(ID = ID),
         #Age at death (don't store date info)
         max.age = fetch_id_age(ID = ID, at = fetch_id_date.death(ID = ID)))

EXAMPLE 3: Reproductive success of uncensored males

create_id_starting.table(
  sex = "male",
  lifestage = "cub", lifestage.overlap = "start",
  clan = find_clan_name.all(main.clans = TRUE),
  from = find_pop_date.observation.first(), to = find_pop_date.observation.last()
) %>% 
  filter(!fetch_id_is.censored.right(ID = ID)) %>% 
  mutate(RS = fetch_id_number.offspring(ID = ID, age.mature = 6, unit = "months"),
         birth.clan = fetch_id_clan.birth(ID = ID),
         max.age = fetch_id_age(ID = ID, at = fetch_id_date.death(ID = ID))) %>% 
  
  #Group by/summarise
  group_by(birth.clan) %>% 
  summarise(meanRS = mean(RS)) %>% 
  arrange(desc(meanRS))

EXAMPLE 3: Reproductive success of uncensored males

create_id_starting.table(sex = "male", lifestage = "cub", clan = find_clan_name.all(main.clans = TRUE), lifestage.overlap = "start", from = find_pop_date.observation.first(), to = find_pop_date.observation.last()) %>% 
  filter(!fetch_id_is.censored.right(ID = ID)) %>% 
  mutate(RS = fetch_id_number.offspring(ID = ID, age.mature = 6, unit = "months"),
         birth.clan = fetch_id_clan.birth(ID = ID),
         max.age = fetch_id_age(ID = ID, at = fetch_id_date.death(ID = ID))) %>% 
  group_by(birth.clan) %>% 
  summarise(meanRS = mean(RS)) %>% 
  arrange(desc(meanRS))
# A tibble: 8 × 2
  birth.clan meanRS
  <chr>       <dbl>
1 M           1.34 
2 A           1.24 
3 N           1.13 
4 L           1.03 
5 F           0.927
6 E           0.908
7 T           0.388
8 S           0.370

EXAMPLE 3: Reproductive success of uncensored males

create_id_starting.table(sex = "male", lifestage = "cub", clan = find_clan_name.all(main.clans = TRUE), lifestage.overlap = "start", from = find_pop_date.observation.first(), to = find_pop_date.observation.last()) %>% 
  filter(!fetch_id_is.censored.right(ID = ID)) %>% 
  mutate(RS = fetch_id_number.offspring(ID = ID, age.mature = 6, unit = "months"),
         birth.clan = fetch_id_clan.birth(ID = ID),
         max.age = fetch_id_age(ID = ID, at = fetch_id_date.death(ID = ID))) %>% 
  group_by(birth.clan) %>% 
  summarise(meanRS = mean(RS), meanRS_peryr = mean(RS/max.age)) %>% 
  arrange(desc(meanRS_peryr))
# A tibble: 8 × 3
  birth.clan meanRS meanRS_peryr
  <chr>       <dbl>        <dbl>
1 M           1.34        0.123 
2 N           1.13        0.119 
3 F           0.927       0.113 
4 A           1.24        0.111 
5 E           0.908       0.0905
6 L           1.03        0.0834
7 T           0.388       0.0515
8 S           0.370       0.0383