# R simulation of waiting time statistics
# based on a script by Jon Baron, in review of Sun and Wang, Gambler's fallacy, hot hand belief, and the time of patterns
# extended to simulate mean and variances of interarrival times, vary probability of heads, Frequency-Delay (Sharpe) ratio, plots

rm(list = ls())

###### parameters

# N = length of the global sequence to be searched.
# When simulating waiting time, if a sequence of length N does not contain the desired pattern, search is stopped at the end, 
# and the first arrival time is recorded as N.
# Setting the limit for N may not be necessary, but it may save simulation time in case of "rare" patterns.
# To approach analytical precision, N needs to be reasonably large, say, 5 times of the waiting time to be simulated.
# However, N does not need to be vary large, since analytically, p_first (probability of first arrival) diminishes quickly as N grows.
# e.g., when p_head = .5, integration of (p_first * n), from n=1 to N=300, obtains WT(hhhh) = 29.9944, WT(hhht)=15.9999.
# analytically, these values are WT(hhhh) = 30, WT(hhht)=16.

N <- 1000

# number of simulation runs = number of Global Sequences sampled
# 10k runs may take a few minutes, but >= 100000 is preferred, for precision ~ the 2nd decimal place.
# e.g., at p_head=.5, with N = 1000, NumSim = 100000: one batch simulation gives WT(hhhh) = 30.0023, WT(hhht)=15.96924

NumSim <- 1000

# choose two patterns to simulate (Pattern1 will be plotted in red)
h <- TRUE
t <- FALSE

Pattern1 <- c(h, h, h, h)
Pattern2 <- c(h, h, h, t)


###### variable names

# pattern length, at this moment, only patterns of the same length are compared
r <- length(Pattern1)

# default probability of heads, will vary in a For loop
p_head <- .5

# arrays to store results for each simulation run
T_First1 <- T_First2 <- rep(NA, NumSim)	# Time for the First arrival (T*) 
T_Inter1 <- T_Inter2 <- rep(NA, NumSim)	# Time for Inter-arrivals (T) 

# arrays for statistics that vary with probability of heads (p_head)
P_Head  <- rep(NA, 100)	# probability of heads
WT1 <- WT2  <- rep(NA, 100)	# Waiting Time = E[T*]
MT1 <- MT2  <- rep(NA, 100)	# Mean Time = E[T]
RR1 <- RR2  <- rep(NA, 100)	# Frequency-Delay ratio = 100 /(E[T]* SD(T))

# arrays for statistics that vary with the length of global sequence (N)
GL <- rep(NA, 100) 		# Global sequence Length
P_Once1 <- P_Once2 <- rep(NA, 100) # Probability of occurrence at least Once


###### simulation

#### Mean Time (MT), Waiting Time (WT), and Frequency-Delay ratio (RR), vary with probability of heads (p_head)

#**** for loop (vary p_head) Begins
for (n in seq(25, 75, by=5)) {		# vary p_head from .3 to .7, by 0.05
	P_Head[n] <- p_head <- n/100
	print (p_head)

### first arrival - pattern1
for (i in 1:NumSim) { # sample each global sequence
	Window <- runif(r)<p_head 	# randomly fill the initial observational window of length r, with p_head
	counter <- r # time of first arrival, starting from pattern length r

	# stop upon the first arrival or till the end of the global sequence N
	while (!identical(Window,Pattern1) & counter < N) 	
		{counter <- counter+1 # increment counter
		Window <- c(Window[2:r],runif(1)<p_head)} # trim off the first element, fill one additional element (indepedently)

	T_First1[i] <- counter # store simulation result in the ith run
}
# waiting time of pattern1 E[T*], and variance Var(T*)
#print (c(mean(T_First1), var(T_First1), sd(T_First1)))
WT1[n] <- mean(T_First1)

# repeat for pattern2
for (i in 1:NumSim) { # sample each global sequence
	Window <- runif(r)<p_head 	# randomly fill the initial observational window of length r, with p_head
	counter <- r # time of first arrival, starting from pattern length r
	
	# stop upon the first arrival or till the end of the global sequence N
	while (!identical(Window,Pattern2) & counter < N) 	
		{counter <- counter+1 # increment counter
		Window <- c(Window[2:r],runif(1)<p_head)} # trim off the first element, fill one additional element (indepedently)

	T_First2[i] <- counter # store simulation result in the ith run
}
# waiting time of pattern2 E[T*], and variance Var(T*)
#print (c(mean(T_First2), var(T_First2), sd(T_First2)))
WT2[n] <- mean(T_First2)


#### interarrival times and mean time
# assume the pattern has occurred
Window <- Pattern1
# find (NumSim) interarrivals
j<- 1	
while (j <= NumSim){
	Window <- c(Window[2:r],runif(1)<p_head)	# global sequence grows by one element
	counter <- 1	# time for next arrival
	while (!identical(Window, Pattern1))
		{counter <- counter + 1
		Window <- c(Window[2:r],runif(1)<p_head)}
	T_Inter1[j] <- counter
	j <- j+1
}
# mean time of pattern1 E[T], and variance Var(T)
#print (c(mean(T_Inter1), var(T_Inter1), sd(T_Inter1)))
MT1[n] = mean(T_Inter1)
RR1[n] = 100/(mean(T_Inter1)*sd(T_Inter1))

# repeat for pattern2
Window <- Pattern2
# find (NumSim) interarrivals
j<- 1	
while (j <= NumSim){
	Window <- c(Window[2:r],runif(1)<p_head)	# global sequence grows by one element
	counter <- 1	# time for next arrival
	while (!identical(Window, Pattern2))
		{counter <- counter + 1
		Window <- c(Window[2:r],runif(1)<p_head)}
	T_Inter2[j] <- counter
	j <- j+1
}
# mean time of pattern2 E[T], and variance Var(T)
#print (c(mean(T_Inter2), var(T_Inter2), sd(T_Inter2)))
MT2[n] = mean(T_Inter2)
RR2[n] = 100/(mean(T_Inter2)*sd(T_Inter2))
} 
#**** for loop (vary p_head) ENDs



#### probability of occurrence at least once (P_Once) when p_head=.5
p_head <- .5
# vary GL from r to 50, step by r
for (n in seq(r,50, by=r)){
	GL[n] <- N <- n 	# when r=4, N varies by step = 4
	counter1 <- counter2 <- 0	# number of global sequences that contain the pattern at least once

	for (i in 1:NumSim) { # sample each global sequence

		Window <- runif(r)<p_head 	# randomly fill the initial observational window of length r, at p_head
		# search and stop upon the first arrival or till the end of the global sequence N
		counter <- r	# current position within the global sequence
		while (!identical(Window,Pattern1) & counter <= N){ 	
			Window <- c(Window[2:r],runif(1)<p_head)# trim off the first element, fill one additional element
			counter <- counter +1} # increment counter
		if (counter <= N)	counter1 <- counter1+1	# at least one occurrence is found at the end

		# repeat for pattern2
		Window <- runif(r)<p_head 	# randomly fill the initial observational window of length r, at p_head
		# search and stop upon the first arrival or till the end of the global sequence N
		counter <- r	# current position within the global sequence
		while (!identical(Window,Pattern2) & counter <= N){ 	
			Window <- c(Window[2:r],runif(1)<p_head)# trim off the first element, fill one additional element
			counter <- counter +1} # increment counter
		if (counter <= N)	counter2 <- counter2+1	# at least one occurrence is found at the end
	}
	P_Once1[n] = counter1 / NumSim
	P_Once2[n] = counter2 / NumSim
}



###### plotting results

# get pattern name as string (shown in legends)
# "h" = TURE, "t" = FALSE
PName1 <- paste(letters[20-12*as.integer(Pattern1)], collapse=" ")
PName2 <- paste(letters[20-12*as.integer(Pattern2)], collapse=" ")

### probability at least once ~ N 
postscript("P_Once_S.eps", horizontal=FALSE, onefile=FALSE, width=6, height=6, pointsize=10)
plot(c(r, 45), c(0,1), type = "n", xlab = "N (number of tosses)", ylab = "Probability of Occurrence At Least Once", cex.lab=1.2)
lines(spline(GL, P_Once2), col="blue", lwd = 2, lty = 1)
lines(spline(GL, P_Once1), col="red", lwd = 2, lty = 1)
points(GL, P_Once2, pch=21, cex = 1.5, col = "blue", bg="white")
points(GL, P_Once1, pch=19, cex = 1.5, col = "red")
legend("bottomright", legend=c(PName2, PName1), pch=c(21,19), col = c("blue", "red"), 
	lwd = 2, pt.bg=c("white", "white"), pt.lwd=c(1.5,1.5), lty=c(1,1),bty="n",cex=1.5)
dev.off()

### waiting time ~ p_head, 
# two patterns are distinguishable around p_head=.5, and WT(HHHH) drops more rapidly
postscript("Waiting Time_S.eps", horizontal=FALSE, onefile=FALSE, width=6, height=6, pointsize=10)
plot(c(0.3, 0.7), c(0,180), type = "n", xlab = "Probability of Heads", ylab = "Waiting Time", cex.lab=1.2)
lines(spline(P_Head, WT2), col="blue", lwd = 2, lty = 1)
lines(spline(P_Head, WT1), col="red", lwd = 2, lty = 1)
points(P_Head, WT2, pch=21, cex = 1.5, col = "blue", bg="white")
points(P_Head, WT1, pch=19, cex = 1.5, col = "red")
legend("topright", legend=c(PName2, PName1), pch=c(21, 19), col = c("blue", "red"), 
	lwd = 2, pt.bg=c("white", "white"), pt.lwd=c(1.5,1.5), lty=c(1,1),bty="n",cex=1.5)
dev.off()

### mean time ~ p_head (in the scale of waiting time plot)
# two patterns are "indifferent" around p_head=.5
postscript("Mean Time_S.eps", horizontal=FALSE, onefile=FALSE, width=6, height=6, pointsize=10)
plot(c(0.3, 0.7), c(0,180), type = "n", xlab = "Probability of Heads", ylab = "Mean Time", cex.lab=1.2)
lines(spline(P_Head, MT2), col="blue", lwd = 2, lty = 1)
lines(spline(P_Head, MT1), col="red", lwd = 2, lty = 1)
points(P_Head, MT2, pch=21, cex = 1.5, col = "blue", bg="white")
points(P_Head, MT1, pch=19, cex = 1.5, col = "red")
legend("topright", legend=c(PName2, PName1), pch=c(21, 19), col = c("blue", "red"), 
	lwd = 2, pt.bg=c("white", "white"), pt.lwd=c(1.5,1.5), lty=c(1,1),bty="n",cex=1.5)
dev.off()

### Frequency-Delay Ratio ~ p_head
# two patterns are "indifferent" around p_head= .61~.62 (values confirmed by analytical results)
postscript("Frequency-Delay Ratio_S.eps", horizontal=FALSE, onefile=FALSE, width=6, height=6, pointsize=10)
plot(c(0.3, 0.7), c(0,4), type = "n", xlab = "Probability of Heads", ylab = "Frequency-Delay Ratio 100/(mu*sigma)", cex.lab=1.2)
lines(spline(P_Head, RR2), col="blue", lwd = 2, lty = 1)
lines(spline(P_Head, RR1), col="red", lwd = 2, lty = 1)
points(P_Head, RR2, pch=21, cex = 1.5, col = "blue", bg="white")
points(P_Head, RR1, pch=19, cex = 1.5, col = "red")
legend("topleft", legend=c(PName2, PName1), pch=c(21, 19), col = c("blue", "red"), 
	lwd = 2, pt.bg=c("white", "white"), pt.lwd=c(1.5,1.5), lty=c(1,1),bty="n",cex=1.5)
dev.off()