Data Science with {hyenaR}:
LESSON 1

Liam D. Bailey

Structure of {hyenaR}

The basics

Collect data

Update .csv files

Upload to GitHub

Download the data

Build the database

Get to work

Why are we using GitHub?


  • Accessible online from anywhere
  • Includes version control of data
  • Can be stored in the same place as our code

What is GitHub?

What is GitHub?


  • Git: A powerful version control system (i.e. we can keep track of how things change)

  • GitHub: An online repository for projects run using Git.

A (simple) example of GitHub

Download {hyenaR}


Use {drat} to access the current (stable) version of {hyenaR}.


## To download package from other sources
library(drat)

## Include 'hyenaproject' as a package source
addRepo("hyenaproject") 

## Download hyenaR
install.packages("hyenaR")

Accessing the data


library(hyenaR)
download_package_csv(
  
  #Where to download
  csv.output.folder = "example_git/source_data"
  
)

Accessing the data

Build the database


build_package_database.full(
  
  #Name of database file
  db.name = "Fisidata_2022_08_10",
  
  #Where the .csv files are stored
  input.folder = "example_git/source_data",
  
  #Where the database will be stored
  db.output.folder = "example_git/source_data"
)

Build the database

How to work with {hyenaR}


STEP 1: Load the database we built

load_package_database.full(
  
  # Location of our database file
  db.path = "example_git/source_data/Fisidata_2022_08_10.sqlite"
  
)


STEP 2: Use a {hyenaR} function

fetch_id_sex(ID = "A-001")
[1] "female"

{hyenaR} function ‘types’

create

create: Create a data frame using hyena data


Example 1: Life-history of individuals

create_id_life.history.table(
  
  ## ID of individual(s) we want to build life-history table for
  ID = "A-001"
  
  )
# A tibble: 4 × 7
  ID    clan  life_stage  starting_date ending_date isrightcensored isleftcens…¹
  <chr> <chr> <chr>       <date>        <date>      <lgl>           <lgl>       
1 A-001 A     cub         1988-05-21    1989-05-20  FALSE           TRUE        
2 A-001 A     subadult    1989-05-21    1990-05-20  FALSE           TRUE        
3 A-001 A     philopatric 1990-05-21    2007-06-07  FALSE           TRUE        
4 A-001 A     dead        2007-06-08    Inf         TRUE            FALSE       
# … with abbreviated variable name ¹​isleftcensored

create: Create a data frame using hyena data


Example 2: All litters of an individual

create_litter_starting.table(
  
  ## ID of individual(s) we want to find litters from
  parentID = "A-001"
  
  )
# A tibble: 9 × 2
  parentID litterID 
  <chr>    <chr>    
1 A-001    A-001_001
2 A-001    A-001_002
3 A-001    A-001_003
4 A-001    A-001_004
5 A-001    A-001_005
6 A-001    A-001_006
7 A-001    A-001_007
8 A-001    A-001_008
9 A-001    A-001_009

fetch

fetch: Return information about target (e.g. ID or clan)


Example 1: Sex of individuals

fetch_id_sex(
  
  ## ID of individual(s) we want to know sex
  ID = c("A-001", "L-015")
  
  )
[1] "female" "female"

fetch: Return information about target (e.g. ID or clan)


Example 2: Age of individuals at a certain time

fetch_id_age(
  
  ## ID of individual(s) we want to know age
  ID = c("A-001"),
  
  ## Date at which we want to know the age
  at = "1997-05-01",
  
  ## Units age is measured in
  unit = "years"
  
  )
[1] 8.944559

fetch: Return information about target (e.g. ID or clan)


Example 3: Number of individuals in a clan at a certain time

fetch_clan_number(
  
  #Clan we want to find size of
  clan = "S",
  
  #Date at which we count number of individuals
  at = "1996-10-01"
  
  )
[1] 9

find

find: (Generally) used to find names of ID/clans


Example 1: Name of individuals in a clan at a certain time

find_clan_id(
  
  #Clan we want to find inhabitants
  clan = "S",
  
  #Date at which we find inhabitants
  at = "1996-10-01"
  
  )
[1] "S-001" "S-002" "S-003" "S-005" "S-040" "S-041" "S-042" "S-043" "S-080"

find: (Generally) used to find names of ID/clans


Example 2: Name of offspring known to an individual

find_id_id.offspring(
  
  #Individual(s) we want to find offspring for
  ID = "A-001",
  
  #Offspring born before this date are counted
  to = "1996-12-31"
  
  )
[1] "A-010" "A-018" "A-046" "A-084" "A-088" "A-089"

find: (Generally) used to find names of ID/clans


Example 3: Name of all ancestors of an individual

find_id_id.ancestor.all(
  
  #Individual(s) to find ancestors
  ID = "A-089"
  
  )
[1] "A-089" "A-001"

The ‘grammar’ of {hyenaR}

verb_target_qualifier

fetch_id_sex

Using {hyenaR} with {tidyverse}

Using {hyenaR} with {tidyverse}


EXAMPLE 1: create, fetch and summarise

library(tidyverse)

#Create data frame with all offspring of an individual
create_id_offspring.table(ID = "A-001")
# A tibble: 15 × 4
   parentID offspringID birthdate  filiation            
   <chr>    <chr>       <date>     <chr>                
 1 A-001    A-010       1994-09-21 mother_social_genetic
 2 A-001    A-018       1994-09-21 mother_social_genetic
 3 A-001    A-046       1994-09-21 mother_social_genetic
 4 A-001    A-084       1995-10-08 mother_social_genetic
 5 A-001    A-088       1996-12-12 mother_social_genetic
 6 A-001    A-089       1996-12-12 mother_social_genetic
 7 A-001    A-101       1998-01-18 mother_social_genetic
 8 A-001    A-107       1999-03-17 mother_social_genetic
 9 A-001    A-108       1999-03-17 mother_social_genetic
10 A-001    A-127       2000-07-25 mother_social_genetic
11 A-001    A-128       2000-07-25 mother_social_genetic
12 A-001    A-146       2002-05-01 mother_social_genetic
13 A-001    A-188       2004-11-14 mother_social_genetic
14 A-001    A-189       2004-11-14 mother_social_genetic
15 A-001    A-206       2005-10-22 mother_social_genetic

Using {hyenaR} with {tidyverse}


EXAMPLE 1: create, fetch and summarise

library(tidyverse)

#Create data frame with all offspring of an individual
create_id_offspring.table(ID = "A-001") %>% 
  
  #Fetch sex of each offspring
  mutate(sex = fetch_id_sex(ID = offspringID))
# A tibble: 15 × 5
   parentID offspringID birthdate  filiation             sex   
   <chr>    <chr>       <date>     <chr>                 <chr> 
 1 A-001    A-010       1994-09-21 mother_social_genetic female
 2 A-001    A-018       1994-09-21 mother_social_genetic female
 3 A-001    A-046       1994-09-21 mother_social_genetic male  
 4 A-001    A-084       1995-10-08 mother_social_genetic male  
 5 A-001    A-088       1996-12-12 mother_social_genetic female
 6 A-001    A-089       1996-12-12 mother_social_genetic male  
 7 A-001    A-101       1998-01-18 mother_social_genetic female
 8 A-001    A-107       1999-03-17 mother_social_genetic female
 9 A-001    A-108       1999-03-17 mother_social_genetic female
10 A-001    A-127       2000-07-25 mother_social_genetic male  
11 A-001    A-128       2000-07-25 mother_social_genetic male  
12 A-001    A-146       2002-05-01 mother_social_genetic male  
13 A-001    A-188       2004-11-14 mother_social_genetic female
14 A-001    A-189       2004-11-14 mother_social_genetic male  
15 A-001    A-206       2005-10-22 mother_social_genetic female

Using {hyenaR} with {tidyverse}


EXAMPLE 1: create, fetch and summarise

library(tidyverse)

#Create data frame with all offspring of an individual
create_id_offspring.table(ID = "A-001") %>% 
  
  #Fetch sex of each offspring
  mutate(sex = fetch_id_sex(ID = offspringID)) %>% 
  
  #Find number of offspring of each sex
  group_by(sex) %>% 
  summarise(n = n())
# A tibble: 2 × 2
  sex        n
  <chr>  <int>
1 female     8
2 male       7

Using {hyenaR} with {tidyverse}


EXAMPLE 2: create, fetch and plot

## Create dataframe of all females in Airstrip in 1996
create_id_starting.table(
  
  ## Sex we want to find
  sex = "female",
  
  ## Clan we want to target
  clan = "A",
  
  ## Date up until which we search
  to = "1996-12-31")
# A tibble: 20 × 1
   ID   
   <chr>
 1 A-001
 2 A-002
 3 A-003
 4 A-004
 5 A-006
 6 A-007
 7 A-008
 8 A-009
 9 A-010
10 A-013
11 A-014
12 A-015
13 A-016
14 A-017
15 A-018
16 A-019
17 A-020
18 A-080
19 A-081
20 A-088

Using {hyenaR} with {tidyverse}


EXAMPLE 2: create, fetch and plot

create_id_starting.table(sex = "female",
                         clan = "A", to = "1996-12-31") %>% 
  
  ## Find number of offspring for each individual over the same period
  mutate(RS = fetch_id_number.offspring(ID = ID, to = "1996-12-31"))
# A tibble: 20 × 2
   ID       RS
   <chr> <int>
 1 A-001     6
 2 A-002     1
 3 A-003     2
 4 A-004     0
 5 A-006     0
 6 A-007     1
 7 A-008     0
 8 A-009     0
 9 A-010     0
10 A-013     1
11 A-014     2
12 A-015     1
13 A-016     2
14 A-017     0
15 A-018     0
16 A-019     0
17 A-020     2
18 A-080     0
19 A-081     0
20 A-088     0

Using {hyenaR} with {tidyverse}


EXAMPLE 2: create, fetch and plot

create_id_starting.table(sex = "female",
                         clan = "A", to = "1996-12-31") %>% 
  mutate(RS = fetch_id_number.offspring(ID = ID, to = "1996-12-31")) %>% 
  
  ## PLOT OUR RESULTS
  mutate(ID = fct_reorder(ID, RS, .fun = identity)) %>% 
  {ggplot(data = .) +
      geom_col(aes(x = ID, y = RS)) +
      coord_flip() +
      scale_y_continuous(position = "right") +
      theme_classic()}

Using {hyenaR} with {tidyverse}


EXAMPLE 2: create, fetch and plot