---
title: "Prototype for analysis of body weight"
author: "Will Cornwell & Dan Noble"
date: "03/07/2018"
output: 
  html_document:
    code_folding: show
    theme: yeti
    toc: yes
    toc_depth: 3
    toc_float:
      collapsed: false
      smooth_scroll: true
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE, echo=TRUE}
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE, cache=FALSE)
knitr::opts_knit$set(root.dir =  rprojroot::find_root("README.md"))
```


Packages

```{r}
#devtools::install_github("thomasp85/patchwork") # Make sure to download first in a new R session
#install.packages("gridExtra")
library(gridExtra)
library(ggplot2)
library(tidyverse)
```	

Bring in data, Subset data based on body weight

```{r}
data <- readRDS("export/data_clean.rds") 
bw <- data%>%
  filter(parameter_name=="body weight")
```

Check first whether weight and data_point column match

```{r}
any(bw$weight == bw$data_point)
```

Subset the data to get between 100 & 300 days

```{r}
bw_100_300 <- subset(bw, age_in_days > 98 & age_in_days <300)
```

PLot and see what the distribution looks like

```{r}
ggplot(bw_100_300, aes(x=data_point, fill = sex)) + geom_density(alpha = 0.5)
```

Function for cutting data into age bins and then calculating SD and CV for each sex and strain

```{r}
sd_bin <- function(data, bins, strains){
	data$cut_age <- cut(data$age_in_days, bins)
	sd <- data %>% 
	  filter(strain_name %in% strains) %>% 
	  group_by(sex, cut_age, strain_name) %>% 
	  summarise(sd = sd(data_point), cv = sd(data_point) / mean(data_point), n = n())

	return(sd)
}
```

Generate a new dataset containing SD and CV for the following three strains using the full data set so we don't restrict age.

```{r}
age_sd <- sd_bin(data, 10, strains = c("C57BL/6N", "C57BL/6NTac", "C57BL/6NJ"))
```
Check data

```{r}
print(arrange(age_sd, strain_name), n = 30)
```

Simple plot

```{r}
ggplot(age_sd, aes(x=cut_age, y=cv, col = sex)) + geom_point() + facet_grid(~strain_name) + theme_classic()
```

Main plot: PLOT THIS

(**Daniel ahs no idea why these aren't showing up**)

```{r}
p1 <- ggplot(bw, aes(x=age_in_days, y=data_point, col = sex)) + geom_point(alpha = 0.5) + labs(x = "Age (days)", y = "Body weight")

p2 <- ggplot(age_sd, aes(x=cut_age, y=sd, col = sex)) + geom_point(size=2) + facet_grid(~strain_name) + theme_bw() + theme(axis.text.x=element_text(size=rel(0.4))) + labs(x = "Age bin", y = "SD") 
```

```{r}
print(p1)

print(p2)
```

```{r, eval=FALSE}
grid.arrange(p1, p2)
```

## Simulating a power analysis - didn't get far with this....

Get standard deviation in two groups

```{r}
sd <- bw_100_300 %>% group_by(sex) %>% summarise(sd = sd(data_point))
mean <- bw_100_300 %>% group_by(sex) %>% summarise(mean = mean(data_point))
```

Sample sizes

```{r}
n <- c(5, 10, 20, 30)
```

Means of males and females for a trait

```{r}
m_male   <- as.numeric(mean[mean$sex == "male", "mean"])
m_female <- as.numeric(mean[mean$sex == "female", "mean"])
```
SD of males and females for a trait

```{r}
sd_male   <- as.numeric(sd[sd$sex == "male", "sd"])
sd_female <- as.numeric(sd[sd$sex == "female", "sd"])
#pwr.t2n.test
```