Markov Chains

Andrew Kerr

2023-04-18

#steps <- data.frame("1" = double(), "2" = double(), "3" = double())
#for(i in 1:n){
#  steps[as.character(i),] <- pi_0 %*% (P %^% i)
#}

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.1     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors

library(expm)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Attaching package: 'expm'
## 
## The following object is masked from 'package:Matrix':
## 
##     expm

library(kableExtra)

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(lattice)
library(gridExtra)

## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

Problem Background

state_names <- c("sandwich", "burrito", "pizza")

P <- rbind(
  c(0, 0.5, 0.5),
  c(0.1, 0.4, 0.5),
  c(0.2, 0.3, 0.5)
)

pi_0 = c(0, 0, 1)

Write code to setup and run a simulation to investigate the following.

Question 1

X_4

N <- 1000
n <- 4
costs <- c(9, 7, 5)

sim <- function(P, n, N) {
  combos <- matrix(ncol = 1)
  for(i in 1:N) {
    prev <- 3
    for(j in 1:n){
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      prev <- choice
    }
    combos <- rbind(combos, prev)
  }
  combos <- combos[-1,]
  result <- replace(combos, combos == 1, 9)
  result <- replace(result, result == 2, 7)
  result <- replace(result, result == 3, 5)
  
  cost_tbl <- table(result)
  return(list(prop.table(cost_tbl), result))
}

m<- sim(P, n, N)

data.frame(c = m[[2]]) %>%
  ggplot() +
  geom_bar(aes(c)) +
  theme_bw() +
  scale_x_continuous(breaks=as.numeric(names(m[[1]]))) +
  labs(x = "Cost of Lunch ($)",
       y = "",
       title = paste("Lunch Cost of Friday (N=", N, ")", sep=""))

This bar plot shows us that pizza is most likely to be bought on Friday while a sandwich is the least likely. This makes sense because the probability of choosing pizza after any lunch item is 0.5 - the largest probability across all lunch items. Additionally, sandwich has the smallest probably of being chosen with just 0.1, 0.2, and even 0.

Marginal Distribution:

kable(m[1], col.names = c('Cost', 'Probability'))

Cost	Probability
5	0.505
7	0.352
9	0.143

Expected Value:

EV <- round(mean(m[[2]]), 2)
print(paste("$", EV, sep=""))

## [1] "$6.28"

Standard Deviation:

SD <- round(sd(m[[2]]), 2)
print(paste("$", SD, sep=""))

## [1] "$1.44"

T

N <- 1000

sim <- function(P, N) {
  result <- matrix(ncol = 1)
  for(i in 1:N) {
    prev = 3
    count <- 0
    while(prev != 1) {
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      prev <- choice
      count <- count + 1
    }
    result <- rbind(result, count)
  }
  result <- result[-1,]
  
  cost_tbl <- table(result)

  return(list(prop.table(cost_tbl), result))
}

m <- sim(P, N)

data.frame(c = m[[2]]) %>%
  ggplot() +
  geom_histogram(aes(c)) +
  theme_bw() +
  labs(x = "Days",
       y = "",
       title = paste("Days Until First Sandwich (N=", N, ")", sep=""))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

This histogram is right skewed with a center of about 5 days, telling us that the first day of selecting a sandwich is more likely to be earlier on than later. In other words, it is unlikely that you will not eat a sandwich within the first two five-day weeks.

Marginal Distribution:

kable(m[[1]], col.names = c('Days', 'Probability'))

Days	Probability
1	0.202
2	0.118
3	0.124
4	0.085
5	0.078
6	0.060
7	0.050
8	0.047
9	0.037
10	0.025
11	0.037
12	0.024
13	0.020
14	0.011
15	0.018
16	0.013
17	0.008
18	0.007
19	0.007
20	0.006
21	0.002
22	0.003
23	0.001
24	0.005
25	0.004
26	0.002
27	0.001
29	0.001
30	0.001
31	0.001
36	0.001
37	0.001

Expected Value:

EV <- round(mean(m[[2]]), 2)
print(paste(EV, " days", sep=""))

## [1] "5.9 days"

Standard Deviation:

SD <- round(sd(m[[2]]), 4)
print(paste(SD, " days", sep=""))

## [1] "5.4251 days"

V

N <- 1000
n <- 4

burrito_count <- function(x){
  sum(x == 2)
}


sim <- function(P, n, N) {
  combos <- matrix(ncol = n)
  for(i in 1:N) {
    lunch <- c()
    prev <- 3
    for(j in 1:n){
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      lunch <- append(lunch, choice)
      prev <- choice
    }
    combos <- rbind(combos, lunch)
  }
  combos <- combos[-1,]
  result <- apply(combos, 1, burrito_count)
  
  cost_tbl <- table(result)
  
  return(list(prop.table(cost_tbl), result))
}

m <- sim(P, n, N)

data.frame(c = m[[2]]) %>%
    ggplot() +
    geom_bar(aes(c)) +
    theme_bw() +
    labs(x = "Days",
         y = "",
         title = paste("Days Burrito was Chosen within a Week (N=", N, ")", sep=""))

This bar plot is slightly right skewed, with a center of around 1.5 days. This tells us that we usually will eat a burrito either once or twice a week or not at all.

Marginal Distribution:

kable(m[[1]], col.names = c('Days', 'Probability'))

Days	Probability
0	0.202
1	0.350
2	0.308
3	0.116
4	0.024

Expected Value:

EV <- round(mean(m[[2]]), 2)
print(paste(EV, " days", sep=""))

## [1] "1.41 days"

Standard Deviation:

SD <- round(sd(m[[2]]), 4)
print(paste(SD, " days", sep=""))

## [1] "1.0114 days"

W

N <- 1000
n <- 4
costs <- c(9, 7, 5)

n_day_cost <- function(x){
  sum <- 5  # cost of pizza day 1
  for(i in x){
    sum <- sum + costs[i]
  }
  return(sum)
}


sim <- function(P, n, N) {
  combos <- matrix(ncol = n)
  for(i in 1:N) {
    lunch <- c()
    prev <- 3
    for(j in 1:n){
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      lunch <- append(lunch, choice)
      prev <- choice
    }
    combos <- rbind(combos, lunch)
  }
  combos <- combos[-1,]
  result <- apply(combos, 1, n_day_cost)
  
  cost_tbl <- table(result)
  
  return(list(prop.table(cost_tbl), result))
}

m <- sim(P, n, N)

data.frame(c = m[[2]]) %>%
    ggplot() +
    geom_bar(aes(c)) +
    theme_bw() +
    scale_x_continuous(breaks=as.numeric(names(m[[1]]))) +
    labs(x = "Cost of Lunch ($)",
         y = "",
         title = paste("Total Lunch Cost of Week (N=", N, ")", sep=""))

This bar plot is roughly normal with a center at about $30. This tells us that each week we will spend roughly $30 on lunch, varying around $3 each week.

Marginal Distribution:

kable(m[[1]], col.names = c('Cost', 'Probability'))

Cost	Probability
25	0.058
27	0.147
29	0.264
31	0.287
33	0.175
35	0.058
37	0.011

Expected Value:

EV <- round(mean(m[[2]]), 2)
print(paste("$", EV, sep=""))

## [1] "$30.18"

Standard Deviation:

SD <- round(sd(m[[2]]), 2)
print(paste("$", SD, sep=""))

## [1] "$2.62"

Question 2

X_4 and X_5

N <- 1000
n <- 5
costs <- c(9, 7, 5)

sim <- function(P, n, N) {
  combos <- matrix(ncol = n)
  for(i in 1:N) {
    lunch <- c()
    prev <- 3
    for(j in 1:n){
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      lunch <- append(lunch, choice)
      prev <- choice
    }
    combos <- rbind(combos, lunch)
  }
  combos <- combos[-1,]
  result <- replace(combos, combos == 1, 9)
  result <- replace(result, result == 2, 7)
  result <- replace(result, result == 3, 5)
  result <- data.frame(X_4 = result[, 4], X_5 = result[, 5])
  joint_dist <- table(result)
  
  return(list(cor(result)[1, 2], joint_dist))
}

m <- sim(P, n, N)

levelplot(m[[2]], col.regions=heat.colors(100, rev=TRUE),
                xlab = "Cost of Lunch Friday ($)",
                ylab = "Cost of Lunch Saturday ($)",
                main = paste("Lunch Cost on Friday and Saturday (N = ", N, ")", 
                             sep=""))

print(paste("Correlation between X_4 and X_5:", round(m[[1]], 4)))

## [1] "Correlation between X_4 and X_5: -0.064"

In this heat map, we can see that the most common pair is (5, 5), which is pizza both days. Furthermore, as the pairs of prices get higher, the probability of seeing said pair decreases. Additionally with a correlation of roughly -0.1, this tells us that there is not a relationship between the price of our food on day 4 and the price of our food on day 5. Although we know that these two steps are dependent, this is likely because the probabilities for each state are similar across the rows of P.

kable(prop.table(m[[2]]))

	5	7	9
5	0.254	0.157	0.101
7	0.179	0.160	0.036
9	0.047	0.066	0.000

T and V

N <- 1000
n <- 4

burrito_count <- function(x){
  sum(x == 2)
}

sim <- function(P, n, N) {
  counts <- matrix(ncol = 1)
  combos <- matrix(ncol = n+1)
  for(i in 1:N) {
    lunch <- c(3)
    prev = 3
    count <- 0
    while(prev != 1 | count < n+1) {
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      if(count <= n-1) {
        lunch <- append(lunch, choice)
      }
      if(prev != 1) {
        prev <- choice
        fin_count <- count + 1
      }
      count <- count + 1
    }
    combos <- rbind(combos, lunch)
    counts <- rbind(counts, fin_count)
  }
  combos <- combos[-1,]
  counts <- counts[-1,]
  result <- apply(combos, 1, burrito_count)
  
  result <- data.frame(T = counts, V = result)
  joint_dist <- table(result)
  
  return(list(result, joint_dist))
}

m <- sim(P, n, N)

levelplot(m[[2]], col.regions=heat.colors(100, rev=TRUE),
                xlab = "Days Until First Sandwich",
                ylab = "Days Burrito was Chosen within a Week",
                main = paste("Join Distribution for T and V (N = ", N, ")", 
                             sep=""))

print(paste("Correlation between T and V:", round(cor(m[[1]])[1,2], 4)))

## [1] "Correlation between T and V: 0.125"

This heat map shows us that if we first choose a sandwich early (within 1 to 4 days), we tend to have eaten between 1 and 2 burritos that week. This makes sense because we are more likely to go from pizza to sandwich than from burrito to sandwich, so if we chose burrito more we are were likely to choose sandwich. With a correlation of roughly 0.1, we can say that there is not a relationship between the first day you have a sandwich and the number of burritos you eat in 5 days.

kable(prop.table(m[[2]]))

	0	1	2	3	4
1	0.024	0.082	0.077	0.030	0.000
2	0.019	0.062	0.038	0.005	0.000
3	0.018	0.052	0.024	0.006	0.000
4	0.026	0.039	0.030	0.001	0.000
5	0.008	0.026	0.028	0.016	0.003
6	0.007	0.027	0.017	0.010	0.006
7	0.008	0.011	0.012	0.008	0.004
8	0.003	0.017	0.013	0.011	0.000
9	0.004	0.011	0.008	0.006	0.001
10	0.004	0.005	0.011	0.004	0.003
11	0.005	0.009	0.006	0.006	0.002
12	0.001	0.003	0.006	0.006	0.002
13	0.001	0.008	0.007	0.004	0.000
14	0.003	0.009	0.005	0.007	0.002
15	0.002	0.008	0.007	0.003	0.001
16	0.001	0.004	0.001	0.000	0.000
17	0.000	0.002	0.003	0.002	0.001
18	0.001	0.001	0.003	0.002	0.000
19	0.002	0.000	0.001	0.001	0.000
20	0.000	0.002	0.003	0.001	0.000
21	0.000	0.003	0.000	0.000	0.000
22	0.002	0.001	0.001	0.000	0.000
23	0.000	0.000	0.002	0.001	0.001
24	0.000	0.001	0.001	0.001	0.000
25	0.000	0.002	0.000	0.001	0.001
26	0.000	0.000	0.000	0.001	0.000
28	0.001	0.000	0.000	0.000	0.001
29	0.001	0.000	0.000	0.000	0.000
32	0.000	0.001	0.000	0.000	0.000
35	0.001	0.000	0.001	0.000	0.000
36	0.000	0.000	0.001	0.000	0.000
37	0.000	0.000	0.001	0.000	0.000
39	0.000	0.000	0.000	0.001	0.000
41	0.000	0.001	0.001	0.000	0.000
53	0.001	0.000	0.000	0.000	0.000

T and W

N <- 1000
n <- 4
costs <- c(9, 7, 5)

n_day_cost <- function(x){
  sum <- 0
  for(i in x){
    sum <- sum + costs[i]
  }
  return(sum)
}

sim <- function(P, n, N) {
  counts <- matrix(ncol = 1)
  combos <- matrix(ncol = n+1)
  for(i in 1:N) {
    lunch <- c(3)
    prev = 3
    count <- 0
    while(prev != 1 | count < n+1) {
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      if(count <= n-1) {
        lunch <- append(lunch, choice)
      }
      if(prev != 1) {
        prev <- choice
        fin_count <- count + 1
      }
      count <- count + 1
    }
    combos <- rbind(combos, lunch)
    counts <- rbind(counts, fin_count)
  }
  combos <- combos[-1,]
  counts <- counts[-1,]
  result <- apply(combos, 1, n_day_cost)
  
  result <- data.frame(T = counts, V = result)
  joint_dist <- table(result)
  
  return(list(result, joint_dist))
}

m <- sim(P, n, N)

levelplot(m[[2]], col.regions=heat.colors(100, rev=TRUE),
                xlab = "Days Until First Sandwich",
                ylab = "Total Lunch Cost of Week ($)",
                main = paste("Join Distribution for T and W (N = ", N, ")", 
                             sep=""))

print(paste("Correlation between T and W:", round(cor(m[[1]])[1,2], 4)))

## [1] "Correlation between T and W: -0.4421"

This heat map shows us that more expensive lunches need to involve at least one sandwich, which makes sense because sandwich is the most expensive food item. With a correlation of roughly -0.45, we can say that as the number of days it takes us to eat our first sandwich increases, the price of our lunch for the first five days decreases.

kable(prop.table(m[[2]]))

	25	27	29	31	33	35
1	0.000	0.000	0.033	0.080	0.073	0.024
2	0.000	0.000	0.029	0.065	0.028	0.005
3	0.000	0.000	0.026	0.045	0.030	0.004
4	0.000	0.000	0.020	0.048	0.017	0.008
5	0.010	0.019	0.016	0.007	0.001	0.000
6	0.008	0.020	0.020	0.015	0.003	0.000
7	0.008	0.016	0.017	0.018	0.004	0.000
8	0.006	0.013	0.017	0.005	0.001	0.000
9	0.004	0.012	0.022	0.006	0.002	0.000
10	0.002	0.010	0.010	0.005	0.004	0.000
11	0.007	0.016	0.010	0.008	0.000	0.000
12	0.003	0.004	0.011	0.005	0.001	0.000
13	0.002	0.009	0.005	0.002	0.000	0.000
14	0.000	0.005	0.004	0.005	0.001	0.000
15	0.001	0.001	0.006	0.001	0.001	0.000
16	0.001	0.001	0.005	0.000	0.001	0.000
17	0.001	0.002	0.003	0.000	0.000	0.000
18	0.000	0.003	0.001	0.001	0.000	0.000
19	0.000	0.002	0.001	0.000	0.001	0.000
20	0.000	0.002	0.003	0.000	0.000	0.000
21	0.001	0.000	0.005	0.001	0.001	0.000
22	0.000	0.002	0.000	0.001	0.000	0.000
23	0.000	0.002	0.002	0.001	0.000	0.000
24	0.000	0.000	0.001	0.000	0.000	0.000
25	0.000	0.000	0.001	0.000	0.000	0.000
26	0.000	0.002	0.001	0.000	0.000	0.000
27	0.001	0.000	0.000	0.000	0.000	0.000
28	0.000	0.000	0.000	0.001	0.000	0.000
30	0.000	0.000	0.001	0.000	0.000	0.000
36	0.000	0.000	0.001	0.000	0.000	0.000
38	0.000	0.000	0.001	0.000	0.000	0.000
40	0.000	0.000	0.001	0.000	0.000	0.000
51	0.000	0.000	0.001	0.000	0.000	0.000

W and V

N <- 1000
n <- 4
costs <- c(9, 7, 5)

burrito_count <- function(x){
  sum(x == 2)
}

n_day_cost <- function(x){
  sum <- 0
  for(i in x){
    sum <- sum + costs[i]
  }
  return(sum)
}

sim <- function(P, n, N) {
  combos <- matrix(ncol = n+1)
  for(i in 1:N) {
    lunch <- c(3)
    prev = 3
    for(j in 1:n){
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      lunch <- append(lunch, choice)
      prev <- choice
    }
    combos <- rbind(combos, lunch)
  }
  combos <- combos[-1,]
  W <- apply(combos, 1, n_day_cost)
  V <- apply(combos, 1, burrito_count)
  
  result <- data.frame(W = W, V = V)
  joint_dist <- table(result)
  
  return(list(result, joint_dist))
}

m <- sim(P, n, N)

levelplot(m[[2]], col.regions=heat.colors(100, rev=TRUE),
                xlab = "Total Lunch Cost of Week ($)",
                ylab = "Days Burrito was Chosen within a Week",
                main = paste("Join Distribution for W and V (N = ", N, ")", 
                             sep=""))

print(paste("Correlation between W and V:", round(cor(m[[1]])[1,2], 4)))

## [1] "Correlation between W and V: 0.4479"

This heat map shows us that there are very specific total lunch costs for a week you can get depending on the number of burritos you eat. We can see that the most common event is spending between $27 and $31 on lunch, and selecting burrito one or two times. With a correlation of roughly 0.5, we can say that as the amount of money we spend on lunch in a week increases, the amount of burritos we eat that week also increases. This makes sense because burritos are the second most expensive food item, so larger totals require more burritos.

kable(prop.table(m[[2]]))

	0	1	2	3	4
25	0.059	0.000	0.000	0.000	0.000
27	0.000	0.141	0.000	0.000	0.000
29	0.122	0.000	0.161	0.000	0.000
31	0.000	0.194	0.000	0.075	0.000
33	0.028	0.000	0.116	0.000	0.017
35	0.000	0.040	0.000	0.034	0.000
37	0.000	0.000	0.013	0.000	0.000

Question 3

N <- 1000

burrito_count <- function(x){
  sum(x == 2)
}


sim <- function(P, N) {
  combos <- matrix(ncol = 3)
  for(i in 1:N) {
    lunch <- c()
    prev <- 3
    count <- 1
    while(count <= 3){
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      if(choice != 1){
        lunch <- append(lunch, choice)
        prev <- choice
        count <- count + 1
      }
    }
    combos <- rbind(combos, lunch)
  }
  combos <- combos[-1,]
  result <- apply(combos, 1, burrito_count)
  
  cost_tbl <- table(result)

  return(list(prop.table(cost_tbl), result))
}

m <- sim(P, N)

data.frame(c = m[[2]]) %>%
  ggplot() +
  geom_bar(aes(c)) +
  theme_bw() +
  labs(x = "Days",
       y = "",
       title = paste("Days Burrito was Chosen within a Week Given T = 4 (N=", N, ")", sep=""))

This bar plot shows us that given we eat our first sandwich on Friday (and eat pizza Monday), we will tend to eat one burrito that week.

Conditional Distribution:

kable(m[[1]], col.names = c('Days', 'P(V|T=4)'))

Days	P(V\|T=4)
0	0.246
1	0.408
2	0.269
3	0.077

Expected Value:

EV <- round(mean(m[[2]]), 2)
print(paste(EV, " days", sep=""))

## [1] "1.18 days"

Standard Deviation:

SD <- round(sd(m[[2]]), 4)
print(paste(SD, " days", sep=""))

## [1] "0.8902 days"

Question 4

I will compare the two distributions of:
- Y = X_0 + … + X_365 be your total lunch cost for this year.
- The conditional distribution of Y given you never eat the same food item more than 3 times in a row.

N <- 10000
n <- 364
costs <- c(9, 7, 5)

n_day_cost <- function(x){
  sum <- 0
  for(i in x){
    sum <- sum + costs[i]
  }
  return(sum)
}

sim <- function(P, n, N) {
  combos <- matrix(ncol = n+1)
  for(i in 1:N) {
    lunch <- c(3)
    prev = 3
    count <- 0
    while(count < n) {
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      lunch <- append(lunch, choice)
      prev <- choice
      count <- count + 1
    }
    combos <- rbind(combos, lunch)
  }
  combos <- combos[-1,]
  result <- apply(combos, 1, n_day_cost)
  cost_tbl <- table(result)

  return(list(prop.table(cost_tbl), result))
}

m1 <- sim(P, n, N)

p1 <- data.frame(c = m1[[2]]) %>%
  ggplot() +
  geom_histogram(aes(c)) +
  theme_bw() +
  labs(x = "Cost of Lunch ($)",
       y = "",
       title = paste("Total Lunch Cost of the Year (N=", N, ")", sep=""))

N <- 10000
n <- 364
costs <- c(9, 7, 5)

n_day_cost <- function(x){
  sum <- 0
  for(i in x){
    sum <- sum + costs[i]
  }
  return(sum)
}

sim <- function(P, n, N) {
  combos <- matrix(ncol = n+1)
  for(i in 1:N) {
    lunch <- c(3)
    prev = 3
    prev_2 = 0
    prev_3 = 0
    count <- 0
    while(count < n) {
      choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      
      while(choice == prev_3 & prev_3 == prev_2 & prev_2 == prev){
         choice <- sample(c(1, 2, 3), size=1, prob=P[prev,])
      }
        
      lunch <- append(lunch, choice)
      prev_3 <- prev_2
      prev_2 <- prev
      prev <- choice
      count <- count + 1
    }
    combos <- rbind(combos, lunch)
  }
  combos <- combos[-1,]
  result <- apply(combos, 1, n_day_cost)
  cost_tbl <- table(result)

  return(list(prop.table(cost_tbl), result))
}

m2 <- sim(P, n, N)

p2 <- data.frame(c = m2[[2]]) %>%
  ggplot() +
  geom_histogram(aes(c)) +
  theme_bw() +
  labs(x = "Cost of Lunch ($)",
       y = "",
       title = paste("Total Lunch Cost of the Year Given No Same of Three in a Row (N=", N, ")", sep=""))

ggplot() +
  geom_histogram(data=data.frame(c = m1[[2]]), aes(c), fill = 'blue', alpha=.7) +
  geom_histogram(data=data.frame(c = m2[[2]]), aes(c), fill = 'red', alpha=.7) +
  theme_bw() +
  labs(x = "Cost of Lunch ($)",
       y = "",
       title = paste("Total Lunch Cost of the Year (N=", N, ")", sep=""))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

EV1 <- round(mean(m1[[2]]), 2)
EV2 <- round(mean(m2[[2]]), 2)
print(paste(EV1, " days", sep=""))

## [1] "2288.38 days"

print(paste(EV2, " days", sep=""))

## [1] "2313.78 days"

From these histograms we can see that when we can not select the same food item more than three times in a row, we spend more on lunch throughout the year. This makes sense because the cheapest food item is pizza, and we are also most likely to select pizza after any other food item (0.5 across the board). Thus, this limitation forces us to select more expensive food items more often.