Docsity
Docsity

Prepare for your exams
Prepare for your exams

Study with the several resources on Docsity


Earn points to download
Earn points to download

Earn points by helping other students or get them with a premium plan


Guidelines and tips
Guidelines and tips

Discriminant Analysis: Linear and Quadratic Methods with R, Study notes of Descriptive statistics

An r code example for discriminant analysis, including linear discriminant analysis (lda) and quadratic discriminant analysis (qda). The analysis is performed on synthetic data and real-world data from cushings syndrome and crabs species classification. Data visualization, prediction, and misclassification rates.

Typology: Study notes

Pre 2010

Uploaded on 07/30/2009

koofers-user-7t5
koofers-user-7t5 🇺🇸

10 documents

1 / 12

Toggle sidebar

Related documents


Partial preview of the text

Download Discriminant Analysis: Linear and Quadratic Methods with R and more Study notes Descriptive statistics in PDF only on Docsity! R15: Discriminant Analysis 1. Linear Discriminant Analysis > library(MASS) > > C1 <- mvrnorm(30, c(0,-1), matrix(c(1,.8,.8,1),2)) > C2 <- mvrnorm(30, c(0,1), matrix(c(1,.8,.8,1),2)) > C3 <- rbind(C1,C2) > C <- data.frame(X1=C3[,1], X2=C3[,2] ) > C.class <- as.factor(rep(c("a","b"),each=30)) > plot(X2~X1,pch=unclass(C.class),col=unclass(C.class), data=C) > C.lda <- lda(C, C.class) > C.lda Call: lda(C, C.class) Prior probabilities of groups: a b 0.5 0.5 Group means: X1 X2 a 0.2855102 -0.8219685 b 0.2097435 1.1948177 Coefficients of linear discriminants: LD1 X1 -1.178503 X2 1.689796 > C.pred <- predict(C.lda, C) > plot(X2~X1,pch=unclass(C.class),col=unclass(C.pred$class),data=C) > source("E:\\Stat 5600\\predplot.txt") > predplot(C.lda, C, C.class) > table(Actual=C.class, Classified=C.pred$class) Classified Actual a b a 28 2 b 0 30 > C.lda$scaling LD1 X1 -1.178503 X2 1.689796 > C.disc <- as.matrix(C) %*% C.lda$scaling > stripchart(C.disc~C.class, "jitter") > boxplot(C.disc~C.class) 2. Different Variance Matrices > D1 <- mvrnorm(30, c(0,-1), matrix(c(1,.8,.8,1),2)) > D2 <- mvrnorm(30, c(-1,2), matrix(c(.25,-.2,-.2,.25),2)) > D3 <- rbind(D1,D2) > D <- data.frame(X1=D3[,1], X2=D3[,2] ) > plot(cush.ltp,pch=unclass(cush.class),col=unclass(cush.pred.l$class)) > predplot(cush.lda, cush.ltp, cush.class) > cush.qda <- qda(cush.ltp, cush.class) > cush.pred.q <- predict(cush.qda, cush.ltp) > plot(cush.ltp,pch=unclass(cush.class),col=unclass(cush.pred.q$class)) > predplot(cush.qda, cush.ltp, cush.class) > table(Actual=cush.class, Classified=cush.pred.q$class) Classified Actual a b c a 6 0 0 b 0 9 1 c 0 1 4 5. Prediction – Unknown Cases > cush.ltp.u <- log(Cushings[unk, 1:2]) > cush.pred.u.q <- predict(cush.qda, cush.ltp.u) > predplot(cush.qda, cush.ltp, cush.class) > points(cush.ltp.u, pch="u", col=unclass(cush.pred.u.q$class)) 6. Misclassification Rates: Cross-Validation > cush.qda.cv <- qda(cush.ltp, cush.class, CV=T) > > table(Actual=cush.class, Classified=cush.qda.cv$class) Classified Actual a b c a 5 0 1 b 1 8 1 c 0 2 3 7. Crabs Data 5 physical measurements on 2 forms (species? “blue” / “orange”), both sexes > data(crabs) > crabs[sample(200,10),] sp sex index FL RW CL CW BD 64 B F 14 11.6 11.0 24.6 28.5 10.4 187 O F 37 19.9 16.6 39.4 43.9 17.9 71 B F 21 12.8 11.7 27.1 31.2 11.9 48 B M 48 19.8 14.2 43.2 49.7 18.6 103 O M 3 10.7 8.6 20.7 22.7 9.2 135 O M 35 18.6 13.4 37.8 41.9 17.3 63 B F 13 11.5 11.0 24.7 29.2 10.1 22 B M 22 14.6 11.3 31.9 36.4 13.7 121 O M 21 15.1 11.5 30.9 34.0 13.9 61 B F 11 11.0 9.8 22.5 25.7 8.2 > > sp <- crabs$sp > sex <- crabs$sex > crabs <- crabs[,4:8] > pairs(crabs) > pairs(crabs, col=c("blue","orange")[unclass(sp)], pch=unclass(sp)) 7.1 Principal Components > crabs.pca <- prcomp(crabs,scale=T) > plot(crabs.pca) > pairs(crabs.pca$x) > plot(crabs.pca$x[,2:3], pch=as.character(sex), col=c("blue","orange")[unclass(sp)]) > sex.lda <- lda(crabs,sex) > sex.pred.l <- predict(sex.lda, crabs) > table(Actual = sex, Classified = sex.pred.l$class) Classified Actual F M F 95 5 M 3 97 > sex.lda Call: lda(crabs, sex) Prior probabilities of groups: F M 0.5 0.5 Group means: FL RW CL CW BD F 15.432 13.487 31.360 35.830 13.724 M 15.734 11.990 32.851 36.999 14.337 Coefficients of linear discriminants: LD1 FL -0.17509926 RW -1.61455655 CL 0.90033985 CW -0.27294518 BD 0.08261892 > sex.disc <- as.matrix(crabs) %*% sex.lda$scaling > stripchart(sex.disc~sex, "jitter") > boxplot(sex.disc~sex) 7.4 LDA for all four categories (with holdout validation) > spsex <- factor(paste(sp, sex, sep="")) > spsex [1] BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM BM : : : [176] OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF OF Levels: BF BM OF OM > spsex.lda <- lda(crabs,spsex) > spsex.pred.l <- predict(spsex.lda, crabs) > table(Actual = spsex, Classified = spsex.pred.l$class) Classified Actual BF BM OF OM BF 50 0 0 0 BM 5 45 0 0 OF 0 0 47 3 OM 0 0 0 50 > spsex.lda Call: lda(crabs, spsex) Prior probabilities of groups: BF BM OF OM 0.25 0.25 0.25 0.25 Group means: FL RW CL CW BD BF 13.270 12.138 28.102 32.624 11.816 BM 14.842 11.718 32.014 36.810 13.350 OF 17.594 14.836 34.618 39.036 15.632 OM 16.626 12.262 33.688 37.188 15.324 Coefficients of linear discriminants: LD1 LD2 LD3 FL -1.5543139 -0.1951885 1.6667377 RW -0.6247546 -1.5394972 -0.4558782 CL -0.1875489 1.0953923 -0.6807588 CW 1.5156077 -0.6435178 0.6548549 BD -1.3551090 0.5153193 -1.2859743 Proportion of trace: LD1 LD2 LD3 0.6861 0.2995 0.0144 > spsex.disc <- as.matrix(crabs) %*% spsex.lda$scaling > plot(spsex.disc[,1:2], pch=as.character(sex), col=c("cyan","blue","orange","red")[unclass(spsex)]) > plot(spsex.disc[,1:2], pch=unclass(spsex), col=c("cyan","blue","orange","red")[unclass(spsex.pred.l$class)]) > spsex.lda.cv <- lda(crabs, spsex, CV=T) > table(Actual=spsex, Classified=spsex.lda.cv$class) Classified Actual BF BM OF OM BF 49 1 0 0 BM 5 45 0 0 OF 0 0 46 4 OM 0 0 0 50 > spsex.t <- spsex[training] > spsex.c <- spsex[-training] > spsex.lda.t <- lda(crabs.t, spsex.t) > spsex.pred.lt <- predict(spsex.lda.t, crabs.t, prior=c(.25,.25,.25,.25)) > table(Actual = spsex.t, Classified = spsex.pred.lt$class) Classified Actual BF BM OF OM BF 20 0 0 0 BM 2 25 0 0 OF 0 0 25 3 OM 0 0 0 25 > spsex.pred.lc <- predict(spsex.lda.t, crabs.c, prior=c(.25,.25,.25,.25)) > table(Actual = spsex.c, Classified = spsex.pred.lc$class) Classified Actual BF BM OF OM BF 30 0 0 0 BM 4 19 0 0 OF 0 0 21 1 OM 0 0 0 25 8. The Importance of Verification Purely random - no structure > A <- matrix(rnorm(200),ncol=10) > A.class <- rep(c(1,2), each=10) > A.lda <- lda(A, A.class) > A.pred.l <- predict(A.lda, A) > table(Actual = A.class, Classified = A.pred.l$class) Classified Actual 1 2 1 10 0 2 1 9 > A.lda.cv <- lda(A, A.class, CV=T) > table(Actual=A.class, Classified=A.lda.cv$class) Classified Actual 1 2 1 8 2 2 5 5
Docsity logo



Copyright © 2024 Ladybird Srl - Via Leonardo da Vinci 16, 10126, Torino, Italy - VAT 10816460017 - All rights reserved