Download Discriminant Analysis: Linear and Quadratic Methods with R and more Study notes Descriptive statistics in PDF only on Docsity! R15: Discriminant Analysis 1. Linear Discriminant Analysis > library(MASS) > > C1 <- mvrnorm(30, c(0,-1), matrix(c(1,.8,.8,1),2)) > C2 <- mvrnorm(30, c(0,1), matrix(c(1,.8,.8,1),2)) > C3 <- rbind(C1,C2) > C <- data.frame(X1=C3[,1], X2=C3[,2] ) > C.class <- as.factor(rep(c("a","b"),each=30)) > plot(X2~X1,pch=unclass(C.class),col=unclass(C.class), data=C) > C.lda <- lda(C, C.class) > C.lda Call: lda(C, C.class) Prior probabilities of groups: a b 0.5 0.5 Group means: X1 X2 a 0.2855102 -0.8219685 b 0.2097435 1.1948177 Coefficients of linear discriminants: LD1 X1 -1.178503 X2 1.689796 > C.pred <- predict(C.lda, C) > plot(X2~X1,pch=unclass(C.class),col=unclass(C.pred$class),data=C) > source("E:\\Stat 5600\\predplot.txt") > predplot(C.lda, C, C.class) > table(Actual=C.class, Classified=C.pred$class) Classified Actual a b a 28 2 b 0 30 > C.lda$scaling LD1 X1 -1.178503 X2 1.689796 > C.disc <- as.matrix(C) %*% C.lda$scaling > stripchart(C.disc~C.class, "jitter") > boxplot(C.disc~C.class) 2. Different Variance Matrices > D1 <- mvrnorm(30, c(0,-1), matrix(c(1,.8,.8,1),2)) > D2 <- mvrnorm(30, c(-1,2), matrix(c(.25,-.2,-.2,.25),2)) > D3 <- rbind(D1,D2) > D <- data.frame(X1=D3[,1], X2=D3[,2] ) > plot(cush.ltp,pch=unclass(cush.class),col=unclass(cush.pred.l$class)) > predplot(cush.lda, cush.ltp, cush.class) > cush.qda <- qda(cush.ltp, cush.class) > cush.pred.q <- predict(cush.qda, cush.ltp) > plot(cush.ltp,pch=unclass(cush.class),col=unclass(cush.pred.q$class)) > predplot(cush.qda, cush.ltp, cush.class) > table(Actual=cush.class, Classified=cush.pred.q$class) Classified Actual a b c a 6 0 0 b 0 9 1 c 0 1 4 5. Prediction – Unknown Cases > cush.ltp.u <- log(Cushings[unk, 1:2]) > cush.pred.u.q <- predict(cush.qda, cush.ltp.u) > predplot(cush.qda, cush.ltp, cush.class) > points(cush.ltp.u, pch="u", col=unclass(cush.pred.u.q$class)) 6. Misclassification Rates: Cross-Validation > cush.qda.cv <- qda(cush.ltp, cush.class, CV=T) > > table(Actual=cush.class, Classified=cush.qda.cv$class) Classified Actual a b c a 5 0 1 b 1 8 1 c 0 2 3 7. Crabs Data 5 physical measurements on 2 forms (species? “blue” / “orange”), both sexes > data(crabs) > crabs[sample(200,10),] sp sex index FL RW CL CW BD 64 B F 14 11.6 11.0 24.6 28.5 10.4 187 O F 37 19.9 16.6 39.4 43.9 17.9 71 B F 21 12.8 11.7 27.1 31.2 11.9 48 B M 48 19.8 14.2 43.2 49.7 18.6 103 O M 3 10.7 8.6 20.7 22.7 9.2 135 O M 35 18.6 13.4 37.8 41.9 17.3 63 B F 13 11.5 11.0 24.7 29.2 10.1 22 B M 22 14.6 11.3 31.9 36.4 13.7 121 O M 21 15.1 11.5 30.9 34.0 13.9 61 B F 11 11.0 9.8 22.5 25.7 8.2 > > sp <- crabs$sp > sex <- crabs$sex > crabs <- crabs[,4:8] > pairs(crabs) > pairs(crabs, col=c("blue","orange")[unclass(sp)], pch=unclass(sp)) 7.1 Principal Components > crabs.pca <- prcomp(crabs,scale=T) > plot(crabs.pca) > pairs(crabs.pca$x) > plot(crabs.pca$x[,2:3], pch=as.character(sex), col=c("blue","orange")[unclass(sp)]) > sex.lda <- lda(crabs,sex) > sex.pred.l <- predict(sex.lda, crabs) > table(Actual = sex, Classified = sex.pred.l$class) Classified Actual F M F 95 5 M 3 97 > sex.lda Call: lda(crabs, sex) Prior probabilities of groups: F M 0.5 0.5 Group means: FL RW CL CW BD F 15.432 13.487 31.360 35.830 13.724 M 15.734 11.990 32.851 36.999 14.337 Coefficients of linear discriminants: LD1 FL -0.17509926 RW -1.61455655 CL 0.90033985 CW -0.27294518 BD 0.08261892 > sex.disc <- as.matrix(crabs) %*% sex.lda$scaling > stripchart(sex.disc~sex, "jitter") > boxplot(sex.disc~sex) 7.4 LDA for all four categories (with holdout validation) > spsex <- factor(paste(sp, sex, sep="")) > spsex [1] BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM : : : [176] OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF Levels: BF BM OF OM > spsex.lda <- lda(crabs,spsex) > spsex.pred.l <- predict(spsex.lda, crabs) > table(Actual = spsex, Classified = spsex.pred.l$class) Classified Actual BF BM OF OM BF 50 0 0 0 BM 5 45 0 0 OF 0 0 47 3 OM 0 0 0 50 > spsex.lda Call: lda(crabs, spsex) Prior probabilities of groups: BF BM OF OM 0.25 0.25 0.25 0.25 Group means: FL RW CL CW BD BF 13.270 12.138 28.102 32.624 11.816 BM 14.842 11.718 32.014 36.810 13.350 OF 17.594 14.836 34.618 39.036 15.632 OM 16.626 12.262 33.688 37.188 15.324 Coefficients of linear discriminants: LD1 LD2 LD3 FL -1.5543139 -0.1951885 1.6667377 RW -0.6247546 -1.5394972 -0.4558782 CL -0.1875489 1.0953923 -0.6807588 CW 1.5156077 -0.6435178 0.6548549 BD -1.3551090 0.5153193 -1.2859743 Proportion of trace: LD1 LD2 LD3 0.6861 0.2995 0.0144 > spsex.disc <- as.matrix(crabs) %*% spsex.lda$scaling > plot(spsex.disc[,1:2], pch=as.character(sex), col=c("cyan","blue","orange","red")[unclass(spsex)]) > plot(spsex.disc[,1:2], pch=unclass(spsex), col=c("cyan","blue","orange","red")[unclass(spsex.pred.l$class)]) > spsex.lda.cv <- lda(crabs, spsex, CV=T) > table(Actual=spsex, Classified=spsex.lda.cv$class) Classified Actual BF BM OF OM BF 49 1 0 0 BM 5 45 0 0 OF 0 0 46 4 OM 0 0 0 50 > spsex.t <- spsex[training] > spsex.c <- spsex[-training] > spsex.lda.t <- lda(crabs.t, spsex.t) > spsex.pred.lt <- predict(spsex.lda.t, crabs.t, prior=c(.25,.25,.25,.25)) > table(Actual = spsex.t, Classified = spsex.pred.lt$class) Classified Actual BF BM OF OM BF 20 0 0 0 BM 2 25 0 0 OF 0 0 25 3 OM 0 0 0 25 > spsex.pred.lc <- predict(spsex.lda.t, crabs.c, prior=c(.25,.25,.25,.25)) > table(Actual = spsex.c, Classified = spsex.pred.lc$class) Classified Actual BF BM OF OM BF 30 0 0 0 BM 4 19 0 0 OF 0 0 21 1 OM 0 0 0 25 8. The Importance of Verification Purely random - no structure > A <- matrix(rnorm(200),ncol=10) > A.class <- rep(c(1,2), each=10) > A.lda <- lda(A, A.class) > A.pred.l <- predict(A.lda, A) > table(Actual = A.class, Classified = A.pred.l$class) Classified Actual 1 2 1 10 0 2 1 9 > A.lda.cv <- lda(A, A.class, CV=T) > table(Actual=A.class, Classified=A.lda.cv$class) Classified Actual 1 2 1 8 2 2 5 5