# code for # C. Kooperberg, T. G. Fazzio, J. J. Delrow, and T. Tskuiyama (2002) # "Improved background correction for spotted DNA microarrays" # Journal of Computational Biology, 9, 55-66. # Copyright (C) 2001--2002 Charles Kooperberg # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # The text of the GNU General Public License, version 2, is available # as http://www.gnu.org/copyleft or by writing to the Free Software # Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # 1) rewrite organize.data to fit your own data structure # 2) run # > orgdata <- organize.data(mydata) # > bcorrecteddata <- bground(orgdata) # 3) column 1 of bcorrecteddata has the background corrected data # on a log(2) scale (if log2=TRUE) or a regular scale (if log2=FALSE) # for the 635nm channel; column 2 has the same data for the 532nm channel # column 3 contains the quantity in equation (5) of Kooperberg et al. (2002) # for whichever channel this quantity was largest # 4) note that the log2 option doesn't just take the log-base 2 of the data # from when log2=FALSE, as log2(E(X)) does not equal E(log2(X)) # # Charles Kooperberg, 12/11/2002, clk@fhcrc.org # bground <- function(x,log2=TRUE) { dx <- mtopfunct(x,mvarfactor(x),log2)[,c(4,11,18)] if(log2)dx[,1:2] <- dx[,1:2]/log(2) dx <- data.frame(dx) names(dx) <- c("x635","x532","warning") dx } organize.data <- function(datafile) { xx <- datafile[,c(1,2,3,9,11,12,14,18,20,21,23,34,35)] #1#column 1 - block #2#column 2 - column nr #3#column 3 - row nr #4#column 9 - foreground 635nm - median #5#column 11 - foreground 635nm - SD #6#column 12 - background 635nm - median #7#column 14 - background 635nm - SD #8#column 18 - foreground 532nm - median #9#column 20 - foreground 532nm - SD #10#column 21 - background 532nm - median #11#column 23 - background 532nm - SD #12#column 34 - forground pixels #13#column 35 - background pixels yy <- is.na(xx[,1]) for(i in 2:13) yy <- yy+is.na(xx[,i]) xx <- xx[yy==0,] xx } mvarfactor <- function(x) { xmm <- max(x[,1]) b1 <- varaux1(x,6,xmm) b2 <- varaux1(x,10,xmm) c1 <- x[, 7]/sqrt(x[, 13]) c2 <- x[, 11]/sqrt(x[, 13]) m1 <- lm(b1 ~ c1 - 1, weights = 1/(c1 + 1)) m2 <- lm(b2 ~ c2 - 1, weights = 1/(c2 + 1)) c(m1$coef,m2$coef) } varaux1 <- function(x, j,xmm) { uu <- varaux2(x, 1, j) for(i in 2:xmm) uu <- c(uu, varaux2(x, i, j)) uu } varaux2 <- function(x, i, j) { v1 <- x[x[, 1] == i, c(2, 3, j)] mm <- max(c(x[,2],x[,3]))+5 v2 <- rep(-100, mm*mm) v3 <- 1 + v1[, 1] + v1[, 2] * mm v2[v3] <- v1[, 3] mm1 <- mm-1 v4 <- matrix(v2, ncol = mm) v4a <- v4[c(-1, -mm), c(-1, -mm)] v4b <- v4[c(-1, -2), c(-1, -mm)] v4c <- v4[c(-1, -mm), c(-1, -2)] v4d <- v4[c(-mm1, -mm), c(-1, -mm)] v4e <- v4[c(-1, -mm), c(-mm1, -mm)] v4x <- cbind(as.vector(v4a), as.vector(v4b), as.vector(v4c), as.vector(v4d), as.vector(v4e)) v4x[v4x > -100] <- v4x[v4x > -100] + 1 v4x[v4x == -100] <- 0 v5 <- apply(v4x, 1, sum) v6 <- apply(v4x != 0, 1, sum) v5 <- v5/v6 v7 <- v4x - v5 v7[v4x == 0] <- 0 v8 <- apply(v7^2, 1, sum)/(v6 - 1) v9 <- matrix(0, ncol = mm, nrow = mm) v9[2:mm1, 2:mm1] <- v8 v9[v4 <= 0] <- 0 v10 <- as.vector(v9[v4 > 0]) sqrt(v10) } mtopfunct <- function(x,mx,log2) { y1 <- x[, c(4, 5, 12, 6, 7, 13)] y2 <- x[, c(8, 9, 12, 10, 11, 13)] y1[, c(2, 5)] <- y1[, c(2, 5)] * mx[1] y2[, c(2, 5)] <- y2[, c(2, 5)] * mx[2] t1 <- vtopaux1(y1,log2) t2 <- vtopaux1(y2,log2) x2 <- cbind(t1, t2) vv <- x2[, 7] vv[x2[, 14] > vv] <- x2[x2[, 14] > vv, 14] vv[vv < 0] <- 0 x2 <- cbind(x2, x2[, 4] - x2[, 11], sqrt(x2[, 5]^2 + x2[, 12]^2), 1 * (x2[ , 6] + x2[, 13] > 0), vv) x2 } vtopaux1 <- function(x,log2) { ww7 <- rep(0,32) yy7 <- rep(0,32) ww7[1 ]<- 0.00178328072169643; yy7[1 ]<- 0.99930504173577217; ww7[2 ]<- 0.00414703326056247; yy7[2 ]<- 0.99634011677195533; ww7[3 ]<- 0.00650445796897836; yy7[3 ]<- 0.99101337147674429; ww7[4 ]<- 0.00884675982636395; yy7[4 ]<- 0.98333625388462598; ww7[5 ]<- 0.01116813946013113; yy7[5 ]<- 0.97332682778991098; ww7[6 ]<- 0.01346304789671864; yy7[6 ]<- 0.96100879965205377; ww7[7 ]<- 0.01572603047602472; yy7[7 ]<- 0.94641137485840277; ww7[8 ]<- 0.01795171577569734; yy7[8 ]<- 0.92956917213193957; ww7[9 ]<- 0.02013482315353021; yy7[9 ]<- 0.91052213707850282; ww7[10]<- 0.02227017380838325; yy7[10]<- 0.88931544599511414; ww7[11]<- 0.02435270256871087; yy7[11]<- 0.86599939815409277; ww7[12]<- 0.02637746971505466; yy7[12]<- 0.84062929625258032; ww7[13]<- 0.02833967261425948; yy7[13]<- 0.81326531512279754; ww7[14]<- 0.03023465707240248; yy7[14]<- 0.78397235894334139; ww7[15]<- 0.03205792835485155; yy7[15]<- 0.75281990726053194; ww7[16]<- 0.03380516183714161; yy7[16]<- 0.71988185017161088; ww7[17]<- 0.03547221325688239; yy7[17]<- 0.68523631305423327; ww7[18]<- 0.03705512854024005; yy7[18]<- 0.64896547125465731; ww7[19]<- 0.03855015317861563; yy7[19]<- 0.61115535517239328; ww7[20]<- 0.03995374113272034; yy7[20]<- 0.57189564620263400; ww7[21]<- 0.04126256324262353; yy7[21]<- 0.53127946401989457; ww7[22]<- 0.04247351512365359; yy7[22]<- 0.48940314570705296; ww7[23]<- 0.04358372452932345; yy7[23]<- 0.44636601725346409; ww7[24]<- 0.04459055816375657; yy7[24]<- 0.40227015796399163; ww7[25]<- 0.04549162792741814; yy7[25]<- 0.35722015833766813; ww7[26]<- 0.04628479658131442; yy7[26]<- 0.31132287199021097; ww7[27]<- 0.04696818281621002; yy7[27]<- 0.26468716220876742; ww7[28]<- 0.04754016571483031; yy7[28]<- 0.21742364374000708; ww7[29]<- 0.04799938859645831; yy7[29]<- 0.16964442042399283; ww7[30]<- 0.04834476223480295; yy7[30]<- 0.12146281929612056; ww7[31]<- 0.04857546744150343; yy7[31]<- 0.07299312178779904; ww7[32]<- 0.04869095700913972; yy7[32]<- 0.02435029266342443; ww7 <- c(ww7,ww7) yy7 <- (c(yy7,-yy7))/2+0.5 yy700 <- yy7 for(i in 1:99)yy700 <- c(yy700,yy7+i) xx <- x[, 1] xy <- x[, 4] xs1 <- x[, 2]/sqrt(x[, 3]) xs2 <- x[, 5]/sqrt(x[, 6]) t1 <- rep(0, length(xx)) t1 <- cbind(t1, t1, t1, t1, t1, t1, t1) for(i in 1:length(xx)) { t1[i, ] <- vtopaux2(xx[i], xy[i], xs1[i], xs2[i],ww7,yy700,log2) if(i/1000 == floor(i/1000)) print(i) } t1 } vtopaux2 <- function(x, y, sig1, sig2,ww7,yy7x,log2) { upp <- 10 * sig1 + x nl <- 11 wwh <- rep(ww7,nl) yyh <- (yy7x[1:(nl*length(ww7))])/nl b1 <- yyh * upp x1 <- vtopaux4(x, y, sig1, sig2) c1 <- (vtopaux3(b1, x, y, sig1, sig2) * upp)/x1 c1w <- c1*wwh mwwh <- mean(wwh) m1 <- mean(c1w)/mwwh m1w <- m1*mwwh m2 <- mean(c1w * b1)/m1w tmp <- b1-m2 m3 <- mean(c1w * tmp * tmp)/m1w if(log2){ b2 <- log(b1) } else { b2 <- b1 } m4 <- mean(c1w * b2)/(m1*mwwh) tmp <- b2-m4 m5 <- sqrt(mean(c1w * tmp * tmp)/m1w) m6 <- 1 * ((abs(m1) - 1) > 0.05) m7 <- (y - x)/sqrt(sig1^2 + sig2^2) c(m1, m2, sqrt(m3), m4, m5, m6, m7) } vtopaux3 <- function(a, x, y, sig1, sig2) { u <- x - a v <- y s <- sig1^2 t <- sig2^2 d10 <- 1/sqrt(s + t) * dnorm((u - v)/sqrt(s + t)) d4 <- (u * t + v * s)/(s + t) d5 <- sqrt((s * t)/(s + t)) d6 <- d10 * (1 - pnorm(0, d4, d5)) d6 } vtopaux4 <- function(x, y, sig1, sig2) { upp <- 4 * sig2 + y b1 <- (((1:10000) - 0.5) * upp)/10000 m1 <- mean(vtopaux5(b1, x, y, sig1, sig2)) * upp m1 } vtopaux5 <- function(x, y1, y2, sig1, sig2) { (1 - pnorm((x - y1)/sig1)) * dnorm((x - y2), sd = sig2) }