# plots that show how the estimation of regression parameters gets better with larger sample size # red line: true regression line # green line: one estimated regression line from one sample # green dots: one sample drawn from the population # black lines: all estimated regression lines (how much lines are drawn is defined in line 12) # for more R-scripts go to www.random-stuff.de # parameter Pop.N <- 10000 n <- c(3, 5, 10, 20, 30, 40, 50, 60) # number of samples/plots trials <- 1000 # number of trials per plot par(mfrow=c(length(n)/2, 2), bty="n", oma=c(0,0,0,0), mgp=c(0,0,0), mar=c(0,0,0,0)) # population U <- rnorm(Pop.N, 0, 1) V <- rnorm(Pop.N, 0, 1) r <- 0.4 # correlation in the population a <- sqrt((1 - r) / (1 + r)) r <- (1 - a^2) / (1 + a^2) X <- 0 + 2 * (U + V * a) Y <- 0 + 2 * (U - V * a) for(j in 1:length(n)) { # plot-loop # sample-draws plot(c(), c(), xlim=c(-10,10), ylim=c(-10,10), xaxt="n", yaxt="n", xlab="", ylab="") for(i in 1:trials) { sample <- sample(1:Pop.N, n[j]) x.Stpr <- X[sample] y.Stpr <- Y[sample] abline(lm(x.Stpr ~ y.Stpr), col=rgb(0,0,0, 0.05)) } sample <- sample(1:Pop.N, n[j]) x.Stpr <- X[sample] y.Stpr <- Y[sample] abline(lm(x.Stpr ~ y.Stpr), col="green", lwd=2) points(y.Stpr, x.Stpr, col="green", pch=19, cex=0.6) abline(lm(X ~ Y), col="red", lwd=2) mtext(paste("n = ", n[j]), 1, line = -3, cex=0.9) }