#steps <- data.frame("1" = double(), "2" = double(), "3" = double())
#for(i in 1:n){
# steps[as.character(i),] <- pi_0 %*% (P %^% i)
#}
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(expm)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Attaching package: 'expm'
##
## The following object is masked from 'package:Matrix':
##
## expm
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(lattice)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
Problem Background
state_names <- c("sandwich", "burrito", "pizza")
P <- rbind(
c(0, 0.5, 0.5),
c(0.1, 0.4, 0.5),
c(0.2, 0.3, 0.5)
)
pi_0 = c(0, 0, 1)
Write code to setup and run a simulation to investigate the following.
Question 1
X_4
N <- 1000
n <- 4
costs <- c(9, 7, 5)
sim <- function(P, n, N) {
combos <- matrix(ncol = 1)
for(i in 1:N) {
prev <- 3
for(j in 1:n){
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
prev <- choice
}
combos <- rbind(combos, prev)
}
combos <- combos[-1,]
result <- replace(combos, combos == 1, 9)
result <- replace(result, result == 2, 7)
result <- replace(result, result == 3, 5)
cost_tbl <- table(result)
return(list(prop.table(cost_tbl), result))
}
m<- sim(P, n, N)
data.frame(c = m[[2]]) %>%
ggplot() +
geom_bar(aes(c)) +
theme_bw() +
scale_x_continuous(breaks=as.numeric(names(m[[1]]))) +
labs(x = "Cost of Lunch ($)",
y = "",
title = paste("Lunch Cost of Friday (N=", N, ")", sep=""))
- This bar plot shows us that pizza is most likely to be bought on Friday while a sandwich is the least likely. This makes sense because the probability of choosing pizza after any lunch item is 0.5 - the largest probability across all lunch items. Additionally, sandwich has the smallest probably of being chosen with just 0.1, 0.2, and even 0.
Marginal Distribution:
kable(m[1], col.names = c('Cost', 'Probability'))
|
Expected Value:
EV <- round(mean(m[[2]]), 2)
print(paste("$", EV, sep=""))
## [1] "$6.28"
Standard Deviation:
SD <- round(sd(m[[2]]), 2)
print(paste("$", SD, sep=""))
## [1] "$1.44"
T
N <- 1000
sim <- function(P, N) {
result <- matrix(ncol = 1)
for(i in 1:N) {
prev = 3
count <- 0
while(prev != 1) {
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
prev <- choice
count <- count + 1
}
result <- rbind(result, count)
}
result <- result[-1,]
cost_tbl <- table(result)
return(list(prop.table(cost_tbl), result))
}
m <- sim(P, N)
data.frame(c = m[[2]]) %>%
ggplot() +
geom_histogram(aes(c)) +
theme_bw() +
labs(x = "Days",
y = "",
title = paste("Days Until First Sandwich (N=", N, ")", sep=""))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
- This histogram is right skewed with a center of about 5 days, telling us that the first day of selecting a sandwich is more likely to be earlier on than later. In other words, it is unlikely that you will not eat a sandwich within the first two five-day weeks.
Marginal Distribution:
kable(m[[1]], col.names = c('Days', 'Probability'))
Days | Probability |
---|---|
1 | 0.202 |
2 | 0.118 |
3 | 0.124 |
4 | 0.085 |
5 | 0.078 |
6 | 0.060 |
7 | 0.050 |
8 | 0.047 |
9 | 0.037 |
10 | 0.025 |
11 | 0.037 |
12 | 0.024 |
13 | 0.020 |
14 | 0.011 |
15 | 0.018 |
16 | 0.013 |
17 | 0.008 |
18 | 0.007 |
19 | 0.007 |
20 | 0.006 |
21 | 0.002 |
22 | 0.003 |
23 | 0.001 |
24 | 0.005 |
25 | 0.004 |
26 | 0.002 |
27 | 0.001 |
29 | 0.001 |
30 | 0.001 |
31 | 0.001 |
36 | 0.001 |
37 | 0.001 |
Expected Value:
EV <- round(mean(m[[2]]), 2)
print(paste(EV, " days", sep=""))
## [1] "5.9 days"
Standard Deviation:
SD <- round(sd(m[[2]]), 4)
print(paste(SD, " days", sep=""))
## [1] "5.4251 days"
V
N <- 1000
n <- 4
burrito_count <- function(x){
sum(x == 2)
}
sim <- function(P, n, N) {
combos <- matrix(ncol = n)
for(i in 1:N) {
lunch <- c()
prev <- 3
for(j in 1:n){
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
lunch <- append(lunch, choice)
prev <- choice
}
combos <- rbind(combos, lunch)
}
combos <- combos[-1,]
result <- apply(combos, 1, burrito_count)
cost_tbl <- table(result)
return(list(prop.table(cost_tbl), result))
}
m <- sim(P, n, N)
data.frame(c = m[[2]]) %>%
ggplot() +
geom_bar(aes(c)) +
theme_bw() +
labs(x = "Days",
y = "",
title = paste("Days Burrito was Chosen within a Week (N=", N, ")", sep=""))
- This bar plot is slightly right skewed, with a center of around 1.5 days. This tells us that we usually will eat a burrito either once or twice a week or not at all.
Marginal Distribution:
kable(m[[1]], col.names = c('Days', 'Probability'))
Days | Probability |
---|---|
0 | 0.202 |
1 | 0.350 |
2 | 0.308 |
3 | 0.116 |
4 | 0.024 |
Expected Value:
EV <- round(mean(m[[2]]), 2)
print(paste(EV, " days", sep=""))
## [1] "1.41 days"
Standard Deviation:
SD <- round(sd(m[[2]]), 4)
print(paste(SD, " days", sep=""))
## [1] "1.0114 days"
W
N <- 1000
n <- 4
costs <- c(9, 7, 5)
n_day_cost <- function(x){
sum <- 5 # cost of pizza day 1
for(i in x){
sum <- sum + costs[i]
}
return(sum)
}
sim <- function(P, n, N) {
combos <- matrix(ncol = n)
for(i in 1:N) {
lunch <- c()
prev <- 3
for(j in 1:n){
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
lunch <- append(lunch, choice)
prev <- choice
}
combos <- rbind(combos, lunch)
}
combos <- combos[-1,]
result <- apply(combos, 1, n_day_cost)
cost_tbl <- table(result)
return(list(prop.table(cost_tbl), result))
}
m <- sim(P, n, N)
data.frame(c = m[[2]]) %>%
ggplot() +
geom_bar(aes(c)) +
theme_bw() +
scale_x_continuous(breaks=as.numeric(names(m[[1]]))) +
labs(x = "Cost of Lunch ($)",
y = "",
title = paste("Total Lunch Cost of Week (N=", N, ")", sep=""))
- This bar plot is roughly normal with a center at about $30. This tells us that each week we will spend roughly $30 on lunch, varying around $3 each week.
Marginal Distribution:
kable(m[[1]], col.names = c('Cost', 'Probability'))
Cost | Probability |
---|---|
25 | 0.058 |
27 | 0.147 |
29 | 0.264 |
31 | 0.287 |
33 | 0.175 |
35 | 0.058 |
37 | 0.011 |
Expected Value:
EV <- round(mean(m[[2]]), 2)
print(paste("$", EV, sep=""))
## [1] "$30.18"
Standard Deviation:
SD <- round(sd(m[[2]]), 2)
print(paste("$", SD, sep=""))
## [1] "$2.62"
Question 2
X_4 and X_5
N <- 1000
n <- 5
costs <- c(9, 7, 5)
sim <- function(P, n, N) {
combos <- matrix(ncol = n)
for(i in 1:N) {
lunch <- c()
prev <- 3
for(j in 1:n){
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
lunch <- append(lunch, choice)
prev <- choice
}
combos <- rbind(combos, lunch)
}
combos <- combos[-1,]
result <- replace(combos, combos == 1, 9)
result <- replace(result, result == 2, 7)
result <- replace(result, result == 3, 5)
result <- data.frame(X_4 = result[, 4], X_5 = result[, 5])
joint_dist <- table(result)
return(list(cor(result)[1, 2], joint_dist))
}
m <- sim(P, n, N)
levelplot(m[[2]], col.regions=heat.colors(100, rev=TRUE),
xlab = "Cost of Lunch Friday ($)",
ylab = "Cost of Lunch Saturday ($)",
main = paste("Lunch Cost on Friday and Saturday (N = ", N, ")",
sep=""))
print(paste("Correlation between X_4 and X_5:", round(m[[1]], 4)))
## [1] "Correlation between X_4 and X_5: -0.064"
- In this heat map, we can see that the most common pair is (5, 5), which is pizza both days. Furthermore, as the pairs of prices get higher, the probability of seeing said pair decreases. Additionally with a correlation of roughly -0.1, this tells us that there is not a relationship between the price of our food on day 4 and the price of our food on day 5. Although we know that these two steps are dependent, this is likely because the probabilities for each state are similar across the rows of P.
kable(prop.table(m[[2]]))
5 | 7 | 9 | |
---|---|---|---|
5 | 0.254 | 0.157 | 0.101 |
7 | 0.179 | 0.160 | 0.036 |
9 | 0.047 | 0.066 | 0.000 |
T and V
N <- 1000
n <- 4
burrito_count <- function(x){
sum(x == 2)
}
sim <- function(P, n, N) {
counts <- matrix(ncol = 1)
combos <- matrix(ncol = n+1)
for(i in 1:N) {
lunch <- c(3)
prev = 3
count <- 0
while(prev != 1 | count < n+1) {
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
if(count <= n-1) {
lunch <- append(lunch, choice)
}
if(prev != 1) {
prev <- choice
fin_count <- count + 1
}
count <- count + 1
}
combos <- rbind(combos, lunch)
counts <- rbind(counts, fin_count)
}
combos <- combos[-1,]
counts <- counts[-1,]
result <- apply(combos, 1, burrito_count)
result <- data.frame(T = counts, V = result)
joint_dist <- table(result)
return(list(result, joint_dist))
}
m <- sim(P, n, N)
levelplot(m[[2]], col.regions=heat.colors(100, rev=TRUE),
xlab = "Days Until First Sandwich",
ylab = "Days Burrito was Chosen within a Week",
main = paste("Join Distribution for T and V (N = ", N, ")",
sep=""))
print(paste("Correlation between T and V:", round(cor(m[[1]])[1,2], 4)))
## [1] "Correlation between T and V: 0.125"
- This heat map shows us that if we first choose a sandwich early (within 1 to 4 days), we tend to have eaten between 1 and 2 burritos that week. This makes sense because we are more likely to go from pizza to sandwich than from burrito to sandwich, so if we chose burrito more we are were likely to choose sandwich. With a correlation of roughly 0.1, we can say that there is not a relationship between the first day you have a sandwich and the number of burritos you eat in 5 days.
kable(prop.table(m[[2]]))
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
1 | 0.024 | 0.082 | 0.077 | 0.030 | 0.000 |
2 | 0.019 | 0.062 | 0.038 | 0.005 | 0.000 |
3 | 0.018 | 0.052 | 0.024 | 0.006 | 0.000 |
4 | 0.026 | 0.039 | 0.030 | 0.001 | 0.000 |
5 | 0.008 | 0.026 | 0.028 | 0.016 | 0.003 |
6 | 0.007 | 0.027 | 0.017 | 0.010 | 0.006 |
7 | 0.008 | 0.011 | 0.012 | 0.008 | 0.004 |
8 | 0.003 | 0.017 | 0.013 | 0.011 | 0.000 |
9 | 0.004 | 0.011 | 0.008 | 0.006 | 0.001 |
10 | 0.004 | 0.005 | 0.011 | 0.004 | 0.003 |
11 | 0.005 | 0.009 | 0.006 | 0.006 | 0.002 |
12 | 0.001 | 0.003 | 0.006 | 0.006 | 0.002 |
13 | 0.001 | 0.008 | 0.007 | 0.004 | 0.000 |
14 | 0.003 | 0.009 | 0.005 | 0.007 | 0.002 |
15 | 0.002 | 0.008 | 0.007 | 0.003 | 0.001 |
16 | 0.001 | 0.004 | 0.001 | 0.000 | 0.000 |
17 | 0.000 | 0.002 | 0.003 | 0.002 | 0.001 |
18 | 0.001 | 0.001 | 0.003 | 0.002 | 0.000 |
19 | 0.002 | 0.000 | 0.001 | 0.001 | 0.000 |
20 | 0.000 | 0.002 | 0.003 | 0.001 | 0.000 |
21 | 0.000 | 0.003 | 0.000 | 0.000 | 0.000 |
22 | 0.002 | 0.001 | 0.001 | 0.000 | 0.000 |
23 | 0.000 | 0.000 | 0.002 | 0.001 | 0.001 |
24 | 0.000 | 0.001 | 0.001 | 0.001 | 0.000 |
25 | 0.000 | 0.002 | 0.000 | 0.001 | 0.001 |
26 | 0.000 | 0.000 | 0.000 | 0.001 | 0.000 |
28 | 0.001 | 0.000 | 0.000 | 0.000 | 0.001 |
29 | 0.001 | 0.000 | 0.000 | 0.000 | 0.000 |
32 | 0.000 | 0.001 | 0.000 | 0.000 | 0.000 |
35 | 0.001 | 0.000 | 0.001 | 0.000 | 0.000 |
36 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 |
37 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 |
39 | 0.000 | 0.000 | 0.000 | 0.001 | 0.000 |
41 | 0.000 | 0.001 | 0.001 | 0.000 | 0.000 |
53 | 0.001 | 0.000 | 0.000 | 0.000 | 0.000 |
T and W
N <- 1000
n <- 4
costs <- c(9, 7, 5)
n_day_cost <- function(x){
sum <- 0
for(i in x){
sum <- sum + costs[i]
}
return(sum)
}
sim <- function(P, n, N) {
counts <- matrix(ncol = 1)
combos <- matrix(ncol = n+1)
for(i in 1:N) {
lunch <- c(3)
prev = 3
count <- 0
while(prev != 1 | count < n+1) {
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
if(count <= n-1) {
lunch <- append(lunch, choice)
}
if(prev != 1) {
prev <- choice
fin_count <- count + 1
}
count <- count + 1
}
combos <- rbind(combos, lunch)
counts <- rbind(counts, fin_count)
}
combos <- combos[-1,]
counts <- counts[-1,]
result <- apply(combos, 1, n_day_cost)
result <- data.frame(T = counts, V = result)
joint_dist <- table(result)
return(list(result, joint_dist))
}
m <- sim(P, n, N)
levelplot(m[[2]], col.regions=heat.colors(100, rev=TRUE),
xlab = "Days Until First Sandwich",
ylab = "Total Lunch Cost of Week ($)",
main = paste("Join Distribution for T and W (N = ", N, ")",
sep=""))
print(paste("Correlation between T and W:", round(cor(m[[1]])[1,2], 4)))
## [1] "Correlation between T and W: -0.4421"
- This heat map shows us that more expensive lunches need to involve at least one sandwich, which makes sense because sandwich is the most expensive food item. With a correlation of roughly -0.45, we can say that as the number of days it takes us to eat our first sandwich increases, the price of our lunch for the first five days decreases.
kable(prop.table(m[[2]]))
25 | 27 | 29 | 31 | 33 | 35 | |
---|---|---|---|---|---|---|
1 | 0.000 | 0.000 | 0.033 | 0.080 | 0.073 | 0.024 |
2 | 0.000 | 0.000 | 0.029 | 0.065 | 0.028 | 0.005 |
3 | 0.000 | 0.000 | 0.026 | 0.045 | 0.030 | 0.004 |
4 | 0.000 | 0.000 | 0.020 | 0.048 | 0.017 | 0.008 |
5 | 0.010 | 0.019 | 0.016 | 0.007 | 0.001 | 0.000 |
6 | 0.008 | 0.020 | 0.020 | 0.015 | 0.003 | 0.000 |
7 | 0.008 | 0.016 | 0.017 | 0.018 | 0.004 | 0.000 |
8 | 0.006 | 0.013 | 0.017 | 0.005 | 0.001 | 0.000 |
9 | 0.004 | 0.012 | 0.022 | 0.006 | 0.002 | 0.000 |
10 | 0.002 | 0.010 | 0.010 | 0.005 | 0.004 | 0.000 |
11 | 0.007 | 0.016 | 0.010 | 0.008 | 0.000 | 0.000 |
12 | 0.003 | 0.004 | 0.011 | 0.005 | 0.001 | 0.000 |
13 | 0.002 | 0.009 | 0.005 | 0.002 | 0.000 | 0.000 |
14 | 0.000 | 0.005 | 0.004 | 0.005 | 0.001 | 0.000 |
15 | 0.001 | 0.001 | 0.006 | 0.001 | 0.001 | 0.000 |
16 | 0.001 | 0.001 | 0.005 | 0.000 | 0.001 | 0.000 |
17 | 0.001 | 0.002 | 0.003 | 0.000 | 0.000 | 0.000 |
18 | 0.000 | 0.003 | 0.001 | 0.001 | 0.000 | 0.000 |
19 | 0.000 | 0.002 | 0.001 | 0.000 | 0.001 | 0.000 |
20 | 0.000 | 0.002 | 0.003 | 0.000 | 0.000 | 0.000 |
21 | 0.001 | 0.000 | 0.005 | 0.001 | 0.001 | 0.000 |
22 | 0.000 | 0.002 | 0.000 | 0.001 | 0.000 | 0.000 |
23 | 0.000 | 0.002 | 0.002 | 0.001 | 0.000 | 0.000 |
24 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 | 0.000 |
25 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 | 0.000 |
26 | 0.000 | 0.002 | 0.001 | 0.000 | 0.000 | 0.000 |
27 | 0.001 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
28 | 0.000 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 |
30 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 | 0.000 |
36 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 | 0.000 |
38 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 | 0.000 |
40 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 | 0.000 |
51 | 0.000 | 0.000 | 0.001 | 0.000 | 0.000 | 0.000 |
W and V
N <- 1000
n <- 4
costs <- c(9, 7, 5)
burrito_count <- function(x){
sum(x == 2)
}
n_day_cost <- function(x){
sum <- 0
for(i in x){
sum <- sum + costs[i]
}
return(sum)
}
sim <- function(P, n, N) {
combos <- matrix(ncol = n+1)
for(i in 1:N) {
lunch <- c(3)
prev = 3
for(j in 1:n){
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
lunch <- append(lunch, choice)
prev <- choice
}
combos <- rbind(combos, lunch)
}
combos <- combos[-1,]
W <- apply(combos, 1, n_day_cost)
V <- apply(combos, 1, burrito_count)
result <- data.frame(W = W, V = V)
joint_dist <- table(result)
return(list(result, joint_dist))
}
m <- sim(P, n, N)
levelplot(m[[2]], col.regions=heat.colors(100, rev=TRUE),
xlab = "Total Lunch Cost of Week ($)",
ylab = "Days Burrito was Chosen within a Week",
main = paste("Join Distribution for W and V (N = ", N, ")",
sep=""))
print(paste("Correlation between W and V:", round(cor(m[[1]])[1,2], 4)))
## [1] "Correlation between W and V: 0.4479"
- This heat map shows us that there are very specific total lunch costs for a week you can get depending on the number of burritos you eat. We can see that the most common event is spending between $27 and $31 on lunch, and selecting burrito one or two times. With a correlation of roughly 0.5, we can say that as the amount of money we spend on lunch in a week increases, the amount of burritos we eat that week also increases. This makes sense because burritos are the second most expensive food item, so larger totals require more burritos.
kable(prop.table(m[[2]]))
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
25 | 0.059 | 0.000 | 0.000 | 0.000 | 0.000 |
27 | 0.000 | 0.141 | 0.000 | 0.000 | 0.000 |
29 | 0.122 | 0.000 | 0.161 | 0.000 | 0.000 |
31 | 0.000 | 0.194 | 0.000 | 0.075 | 0.000 |
33 | 0.028 | 0.000 | 0.116 | 0.000 | 0.017 |
35 | 0.000 | 0.040 | 0.000 | 0.034 | 0.000 |
37 | 0.000 | 0.000 | 0.013 | 0.000 | 0.000 |
Question 3
N <- 1000
burrito_count <- function(x){
sum(x == 2)
}
sim <- function(P, N) {
combos <- matrix(ncol = 3)
for(i in 1:N) {
lunch <- c()
prev <- 3
count <- 1
while(count <= 3){
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
if(choice != 1){
lunch <- append(lunch, choice)
prev <- choice
count <- count + 1
}
}
combos <- rbind(combos, lunch)
}
combos <- combos[-1,]
result <- apply(combos, 1, burrito_count)
cost_tbl <- table(result)
return(list(prop.table(cost_tbl), result))
}
m <- sim(P, N)
data.frame(c = m[[2]]) %>%
ggplot() +
geom_bar(aes(c)) +
theme_bw() +
labs(x = "Days",
y = "",
title = paste("Days Burrito was Chosen within a Week Given T = 4 (N=", N, ")", sep=""))
- This bar plot shows us that given we eat our first sandwich on Friday (and eat pizza Monday), we will tend to eat one burrito that week.
Conditional Distribution:
kable(m[[1]], col.names = c('Days', 'P(V|T=4)'))
Days | P(V|T=4) |
---|---|
0 | 0.246 |
1 | 0.408 |
2 | 0.269 |
3 | 0.077 |
Expected Value:
EV <- round(mean(m[[2]]), 2)
print(paste(EV, " days", sep=""))
## [1] "1.18 days"
Standard Deviation:
SD <- round(sd(m[[2]]), 4)
print(paste(SD, " days", sep=""))
## [1] "0.8902 days"
Question 4
- I will compare the two distributions of:
- Y = X_0 + … + X_365 be your total lunch cost for this year.
- The conditional distribution of Y given you never eat the same food item more than 3 times in a row.
N <- 10000
n <- 364
costs <- c(9, 7, 5)
n_day_cost <- function(x){
sum <- 0
for(i in x){
sum <- sum + costs[i]
}
return(sum)
}
sim <- function(P, n, N) {
combos <- matrix(ncol = n+1)
for(i in 1:N) {
lunch <- c(3)
prev = 3
count <- 0
while(count < n) {
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
lunch <- append(lunch, choice)
prev <- choice
count <- count + 1
}
combos <- rbind(combos, lunch)
}
combos <- combos[-1,]
result <- apply(combos, 1, n_day_cost)
cost_tbl <- table(result)
return(list(prop.table(cost_tbl), result))
}
m1 <- sim(P, n, N)
p1 <- data.frame(c = m1[[2]]) %>%
ggplot() +
geom_histogram(aes(c)) +
theme_bw() +
labs(x = "Cost of Lunch ($)",
y = "",
title = paste("Total Lunch Cost of the Year (N=", N, ")", sep=""))
N <- 10000
n <- 364
costs <- c(9, 7, 5)
n_day_cost <- function(x){
sum <- 0
for(i in x){
sum <- sum + costs[i]
}
return(sum)
}
sim <- function(P, n, N) {
combos <- matrix(ncol = n+1)
for(i in 1:N) {
lunch <- c(3)
prev = 3
prev_2 = 0
prev_3 = 0
count <- 0
while(count < n) {
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
while(choice == prev_3 & prev_3 == prev_2 & prev_2 == prev){
choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
}
lunch <- append(lunch, choice)
prev_3 <- prev_2
prev_2 <- prev
prev <- choice
count <- count + 1
}
combos <- rbind(combos, lunch)
}
combos <- combos[-1,]
result <- apply(combos, 1, n_day_cost)
cost_tbl <- table(result)
return(list(prop.table(cost_tbl), result))
}
m2 <- sim(P, n, N)
p2 <- data.frame(c = m2[[2]]) %>%
ggplot() +
geom_histogram(aes(c)) +
theme_bw() +
labs(x = "Cost of Lunch ($)",
y = "",
title = paste("Total Lunch Cost of the Year Given No Same of Three in a Row (N=", N, ")", sep=""))
ggplot() +
geom_histogram(data=data.frame(c = m1[[2]]), aes(c), fill = 'blue', alpha=.7) +
geom_histogram(data=data.frame(c = m2[[2]]), aes(c), fill = 'red', alpha=.7) +
theme_bw() +
labs(x = "Cost of Lunch ($)",
y = "",
title = paste("Total Lunch Cost of the Year (N=", N, ")", sep=""))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
EV1 <- round(mean(m1[[2]]), 2)
EV2 <- round(mean(m2[[2]]), 2)
print(paste(EV1, " days", sep=""))
## [1] "2288.38 days"
print(paste(EV2, " days", sep=""))
## [1] "2313.78 days"
- From these histograms we can see that when we can not select the same food item more than three times in a row, we spend more on lunch throughout the year. This makes sense because the cheapest food item is pizza, and we are also most likely to select pizza after any other food item (0.5 across the board). Thus, this limitation forces us to select more expensive food items more often.