# R simulation of waiting time statistics # based on a script by Jon Baron, in review of Sun and Wang, Gambler's fallacy, hot hand belief, and the time of patterns # extended to simulate mean and variances of interarrival times, vary probability of heads, Frequency-Delay (Sharpe) ratio, plots rm(list = ls()) ###### parameters # N = length of the global sequence to be searched. # When simulating waiting time, if a sequence of length N does not contain the desired pattern, search is stopped at the end, # and the first arrival time is recorded as N. # Setting the limit for N may not be necessary, but it may save simulation time in case of "rare" patterns. # To approach analytical precision, N needs to be reasonably large, say, 5 times of the waiting time to be simulated. # However, N does not need to be vary large, since analytically, p_first (probability of first arrival) diminishes quickly as N grows. # e.g., when p_head = .5, integration of (p_first * n), from n=1 to N=300, obtains WT(hhhh) = 29.9944, WT(hhht)=15.9999. # analytically, these values are WT(hhhh) = 30, WT(hhht)=16. N <- 1000 # number of simulation runs = number of Global Sequences sampled # 10k runs may take a few minutes, but >= 100000 is preferred, for precision ~ the 2nd decimal place. # e.g., at p_head=.5, with N = 1000, NumSim = 100000: one batch simulation gives WT(hhhh) = 30.0023, WT(hhht)=15.96924 NumSim <- 1000 # choose two patterns to simulate (Pattern1 will be plotted in red) h <- TRUE t <- FALSE Pattern1 <- c(h, h, h, h) Pattern2 <- c(h, h, h, t) ###### variable names # pattern length, at this moment, only patterns of the same length are compared r <- length(Pattern1) # default probability of heads, will vary in a For loop p_head <- .5 # arrays to store results for each simulation run T_First1 <- T_First2 <- rep(NA, NumSim) # Time for the First arrival (T*) T_Inter1 <- T_Inter2 <- rep(NA, NumSim) # Time for Inter-arrivals (T) # arrays for statistics that vary with probability of heads (p_head) P_Head <- rep(NA, 100) # probability of heads WT1 <- WT2 <- rep(NA, 100) # Waiting Time = E[T*] MT1 <- MT2 <- rep(NA, 100) # Mean Time = E[T] RR1 <- RR2 <- rep(NA, 100) # Frequency-Delay ratio = 100 /(E[T]* SD(T)) # arrays for statistics that vary with the length of global sequence (N) GL <- rep(NA, 100) # Global sequence Length P_Once1 <- P_Once2 <- rep(NA, 100) # Probability of occurrence at least Once ###### simulation #### Mean Time (MT), Waiting Time (WT), and Frequency-Delay ratio (RR), vary with probability of heads (p_head) #**** for loop (vary p_head) Begins for (n in seq(25, 75, by=5)) { # vary p_head from .3 to .7, by 0.05 P_Head[n] <- p_head <- n/100 print (p_head) ### first arrival - pattern1 for (i in 1:NumSim) { # sample each global sequence Window <- runif(r)