#----------------------------------------------------------
#  Simple application of the Metropolis algoritm
#  -> infer m,c,sigma in a linear fit
#
#  GdA 9/4/2016
#-----------------------------------------------------------

# to pause between plots
pause <- function() { cat ("\n >> Press Enter to continue\n"); scan() }

# 'likelihood' 
lik <- function(m, c, sigma) {
   # print(m); print(c); print(sigma)
  #print(sxy)  
   sigma^(-sxy$n) * exp(- sxy$n / (2*sigma^2) *
                         ( sxy$y2.m + m^2 * sxy$x2.m - 2 * m * sxy$xy.m
                           + c^2 - 2 * c * sxy$y.m + 2 * m * c * sxy$x.m) )
}

# very rough prior
prior <- function(m, c, sigma) ifelse(sigma > 0, 1, 0)

# not normalized posterior
nn.post <- function(m, c, sigma) lik(m, c, sigma) * prior(m, c, sigma) 

# proposal function
prop <- function(m, c, sigma) {
   list( m=rnorm(1,m,0.5), c=rnorm(1,c,0.5), sigma=rnorm(1,sigma,0.5) )
}

# Metropolis algorithm for linear fit
fit.metropolis <- function(x, y, n, theta0, nn.post, prop) {
    
  # calculate all averages once  [Note "<<-"  Very dangerous!!]
  sxy <<- list( n = length(x), x.m = mean(x), x2.m = mean(x^2),
                 xy.m = mean(x*y), y.m = mean(y), y2.m=mean(y^2) )
    
  m = c = sigma = rep(0,n)
    
  m[1] <- theta0$m
  c[1] <- theta0$c
  sigma[1] <- theta0$sigma

  for (t in 2:n) {
    theta.p <- prop (m[t-1], c[t-1], sigma[t-1])
    A <- nn.post(theta.p$m, theta.p$c, theta.p$sigma) /
         nn.post(m[t-1], c[t-1], sigma[t-1])
    if(is.nan(A)) A <- 0
    accept <- runif(1) <= A
    m[t] <- ifelse (accept, theta.p$m, m[t-1])
    c[t] <- ifelse (accept, theta.p$c, c[t-1])
    sigma[t] <- ifelse (accept, theta.p$sigma, sigma[t-1])
  }
  return( list(m=m, c=c, sigma=sigma) )
}

# simulate a straight line
m.true = 2
c.true = 3
sigma.true = 1
n.p <- 10
x <- 3 + c(1:n.p) 
y <- c.true + m.true * x + rnorm(n.p, 0, sigma.true)

plot(x, y, xlim=c(0, max(x)*1.4), ylim=c(0, max(y)*1.4), pch=19, col='blue' )

# initial state
theta0 <- list(m=2, c=7, sigma=2)

n <- 30000
theta <-  fit.metropolis (x, y, n, theta0, nn.post, prop) 

n0 <- 1000   # first events to discard
n1 <- n0 + 1
E.c   <- mean(theta$c[n1:n])
std.c <- sd(theta$c[n1:n])
cat(sprintf("E[c] = %f\n", E.c ))
cat(sprintf("std[c] = %f\n\n", std.c ))

E.m   <- mean(theta$m[n1:n])
std.m <- sd(theta$m[n1:n])
cat(sprintf("E[m] = %f\n", E.m ))
cat(sprintf("std[m] = %f\n\n", std.m ))

rho.mc   <- cor(theta$m[n1:n], theta$c[n1:n])
cat(sprintf("rho(m,c) = %f\n", rho.mc ))

E.sigma   <- mean(theta$sigma[n1:n])
std.sigma <- sd(theta$sigma[n1:n])
cat(sprintf("E[sigma] = %f\n", E.sigma ))
cat(sprintf("std[sigma] = %f\n\n", std.sigma ))

abline(E.c, E.m, col='blue')


# interpolations and extrapolations (using MCMC chain)
xf     <- seq(0, max(x)*1.4, len=10)   # equally spaced values 
E.yf   <- rep(0, length(xf) )
std.yf <- rep(0, length(xf) )
for (i in 1:length(xf)) {
   j = 0
   y.chain <- rep(0, n-n0) 
   for (t in n1:n) {
      j <- j + 1 
      y.chain[j] <- theta$m[t]*xf[i] + theta$c[t] + rnorm(1,0, theta$sigma[t])
   }
   E.yf[i] <- mean(y.chain)
   std.yf[i] <- sd(y.chain)
}
points(xf, E.yf, pch=4, col='blue')
points(xf, E.yf - std.yf, ty='l', lty=2, col='blue')
points(xf, E.yf + std.yf, ty='l', lty=2, col='blue')
points(xf, E.yf - 2*std.yf, ty='l', lty=2, col='blue')
points(xf, E.yf + 2*std.yf, ty='l', lty=2, col='blue')


# fit with lm() [least squares] ------------------------------------
lm.res <- lm(y~x)
abline(lm.res, col='red', lty=2)
cat(sprintf("Result from lm(): \n"))
print(lm.res)
sigma.residuals <- sqrt( sum(as.vector(lm.res$residuals)^2) / (n.p - 2) )
cat(sprintf("sigma extimated from residuals: %f\n\n", sigma.residuals))

# interpolations and extrapolations (from lm() and residuals)
yf.lm <- lm.res$coefficients[2] * xf + lm.res$coefficients[1]
points(xf, yf.lm - sigma.residuals, ty='l', lty=2, col='red')
points(xf, yf.lm + sigma.residuals, ty='l', lty=2, col='red')
points(xf, yf.lm - 2*sigma.residuals, ty='l', lty=2, col='red')
points(xf, yf.lm + 2*sigma.residuals, ty='l', lty=2, col='red')


#pause()
#par(mfrow=c(3,1))
#plot(theta$m, ylim=c(0,max(theta$m)), pch=19, cex=0.5)
#plot(theta$c, ylim=c(0,max(theta$c)),  pch=19, cex=0.5)
#plot(theta$sigma, ylim=c(0,max(theta$sigma)),  pch=19, cex=0.5)
#par(mfrow=c(1,1))