########################################################################### # evaluation-script.R # # # # Comparing Anomaly Detectors for Keystroke Biometrics # # Evaluation Proceedure # # R Script # # # # by: Kevin Killourhy # # date: May 19, 2009 # ########################################################################### # This script demonstrates how the DSL-StrongPassword data set can be # used to train and evaluate three anomaly detectors, reproducing the # results for those anomaly detectors reported in: # K.S. Killourhy and R.A. Maxion. "Comparing Anomaly Detectors for # Keystroke Biometrics," in Proceedings of the 39th Annual # Dependable Systems and Networks Conference, pages 125-134, # Estoril, Lisbon, Portugal, June 29-July 2, 2009. IEEE Computer # Society Press, Los Alamitos, California. 2009. # The script is shared in order to promote open scientific discourse, # and to encourage other researchers to reproduce and expand upon our # results (e.g., by evaluating their own, novel anomaly detectors) # using our data and procedures. ########################################################################### # Table of Contents ########################################################################### # 1. Anomaly detectors # 2. Evaluation functions # 3. Main procedure library( MASS ); library( ROCR ); library( stats ); ########################################################################### # 1. Anomaly detectors ########################################################################### # The euclideanTrain and euclideanScore functions comprise the # Euclidean anomaly detector. During training, the detector takes a # set of password-timing vectors (encoded as rows in a matrix) and # calculates the mean vector. This mean vector is returned as the # detection model. During scoring, the detector takes the detection # model and a new set of password-timing vectors (also encoded as rows # in a matrix) and calculates the squared Euclidean distance between # the mean vector and each of the new password-timing vectors. These # scores are returned in a vector whose length is equal to the number # of password-timing vectors in the scoring matrix. euclideanTrain <- function( YTrain ) { dmod <- list ( mean = colMeans( YTrain ) ); return( dmod ); } euclideanScore <- function( dmod, YScore ) { p <- length( dmod$mean ); n <- nrow( YScore ); if( ncol(YScore) != p ) stop("Training/test feature length mismatch "); meanMatrix <- matrix( dmod$mean, byrow=TRUE, nrow=n, ncol=p ); scores <- rowSums( ( YScore - meanMatrix )^2 ); return( scores ); } # The manhattanTrain and manhattanScore functions comprise the # Manhattan anomaly detector. During training, the detector takes a # set of password-timing vectors (encoded as rows in a matrix) and # calculates the mean vector. This mean vector is returned as the # detection model. During scoring, the detector takes the detection # model and a new set of password-timing vectors (also encoded as rows # in a matrix) and calculates the Manhattan distance between the mean # vector and each of the new password-timing vectors. These scores # are returned in a vector whose length is equal to the number of # password-timing vectors in the scoring matrix. manhattanTrain <- function( YTrain ) { dmod <- list ( mean = colMeans( YTrain ) ); return( dmod ); } manhattanScore <- function( dmod, YScore ) { p <- length( dmod$mean ); n <- nrow( YScore ); if( ncol(YScore) != p ) stop("Training/test feature length mismatch "); meanMatrix <- matrix( dmod$mean, byrow=TRUE, nrow=n, ncol=p ); scores <- rowSums( abs( YScore - meanMatrix ) ); return( scores ); } # The mahalanobisTrain and mahalanobisScore functions comprise the # Mahalanobis anomaly detector. During training, the detector takes a # set of password-timing vectors (encoded as rows in a matrix) and # calculates the mean vector and also the inverse of the covariance # matrix. This vector and matrix are returned as the detection model. # During scoring, the detector takes the detection model and a new set # of password-timing vectors (also encoded as rows in a matrix) and # calculates the squared Mahalanobis distance between the mean vector # and each of the new password-timing vectors. These scores are # returned in a vector whose length is equal to the number of # password-timing vectors in the scoring matrix. mahalanobisTrain <- function( YTrain ) { dmod <- list( mean = colMeans( YTrain ), covInv = ginv( cov( YTrain ) ) ); return( dmod ); } mahalanobisScore <- function( dmod, YScore ) { p <- length( dmod$mean ); n <- nrow( YScore ); if( ncol(YScore) != p ) stop("Training/test feature length mismatch "); scores <- mahalanobis( YScore, dmod$mean, dmod$covInv, inverted=TRUE ); return( scores ); } # The detectorSet data structure compiles the train and score # functions for each detector into a named list, with the name # corresponding to the name of the detector. detectorSet = list( Euclidean = list( train = euclideanTrain, score = euclideanScore ), Manhattan = list( train = manhattanTrain, score = manhattanScore ), Mahalanobis = list( train = mahalanobisTrain, score = mahalanobisScore ) ); ########################################################################### # 2. Evaluation procedures ########################################################################### # The calculateEqualError function takes a set of user scores and # impostor scores, makes an ROC curve using the ROCR functionality, # and then geometrically calculates the point at which the miss and # false-alarm (i.e., false-negative and false-positive) rates are # equal. calculateEqualError <- function( userScores, impostorScores ) { predictions <- c( userScores, impostorScores ); labels <- c( rep( 0, length( userScores ) ), rep( 1, length( impostorScores ) ) ); pred <- prediction( predictions, labels ); missrates <- pred@fn[[1]] / pred@n.pos[[1]]; farates <- pred@fp[[1]] / pred@n.neg[[1]]; # Find the point on the ROC with miss slightly >= fa, and the point # next to it with miss slightly < fa. dists <- missrates - farates; idx1 <- which( dists == min( dists[ dists >= 0 ] ) ); idx2 <- which( dists == max( dists[ dists < 0 ] ) ); stopifnot( length( idx1 ) == 1 ); stopifnot( length( idx2 ) == 1 ); stopifnot( abs( idx1 - idx2 ) == 1 ); # Extract the two points as (x) and (y), and find the point on the # line between x and y where the first and second elements of the # vector are equal. Specifically, the line through x and y is: # x + a*(y-x) for all a, and we want a such that # x[1] + a*(y[1]-x[1]) = x[2] + a*(y[2]-x[2]) so # a = (x[1] - x[2]) / (y[2]-x[2]-y[1]+x[1]) x <- c( missrates[idx1], farates[idx1] ); y <- c( missrates[idx2], farates[idx2] ); a <- ( x[1] - x[2] ) / ( y[2] - x[2] - y[1] + x[1] ); eer <- x[1] + a * ( y[1] - x[1] ); return( eer ); } # The evaluateSubject function takes a password-timing data frame, an # ID for one of the subjects, and an anomaly detector's training and # scoring functions. It performs the training/scoring procedure for # that subject, extracting the appropriate password-timing matrices, # running the training and scoring functions, and then running the # analysis to calculate the equal-error rates. The equal-error rate # for the subject is returned. evaluateSubject <- function( X, evalSubject, detectorTrain, detectorScore ) { # Extract the training, user scoring, and impostor scoring matricies # for the subject. The training matrix is the first 200 password # repetitions for the subject, corresponding to the first 4 sessions # of passwords. The user scoring matrix is the last 200 password # repetitions, and the impostor scoring matrix is the first 5 # repetitions from all the other subjects. YTrain <- as.matrix( subset( X, subset = ( subject == evalSubject & sessionIndex <= 4 ), select = -c( subject, sessionIndex, rep ) ) ); YScore0 <- as.matrix( subset( X, subset = ( subject == evalSubject & sessionIndex > 4 ), select = -c( subject, sessionIndex, rep ) ) ); YScore1 <- as.matrix( subset( X, subset = ( subject != evalSubject & sessionIndex == 1 & rep <= 5 ), select = -c( subject, sessionIndex, rep ) ) ); # Run the training and scoring procedures on the appropriate matrices # to obtain the user and impostor scores. dmod <- detectorTrain( YTrain ); userScores <- detectorScore( dmod, YScore0 ); impostorScores <- detectorScore( dmod, YScore1 ); # Use the user and impostor scores to calculate the detector's equal # error rate for the evaluation subject. eer <- calculateEqualError( userScores, impostorScores ); return( eer ); } ########################################################################### # 3. Main procedure ########################################################################### ####################### cat("Loading the data file\n"); datafile <- 'DSL-StrongPasswordData.txt'; if( ! file.exists(datafile) ) stop( "Password data file ",datafile," does not exist"); # Retrieve the data and the list of subjects X <- read.table( datafile, header = TRUE ); subjects <- sort( levels( X$subject ) ); # For each of the detectors, evaluate the detector on each subject, # and record the equal-error rates in a data frame. eers <- list(); for( detectorName in names( detectorSet ) ) { ####################### cat("Evaluating the",detectorName,"detector\n"); detectorTrain = detectorSet[[ detectorName ]]$train; detectorScore = detectorSet[[ detectorName ]]$score; eers[[ detectorName ]] <- rep( NA, length(subjects) ); n <- length(subjects); for( i in 1:n ) { eer <- evaluateSubject( X, subjects[i], detectorTrain = detectorTrain, detectorScore = detectorScore ); eers[[ detectorName ]][i] <- eer; cat("\r ",i,"/",n,":",eer); } cat("\r average equal-error:",mean(eers[[detectorName]]),"\n"); } ####################### cat("Tabulating results:\n"); eers <- data.frame( eers ); rownames( eers ) <- subjects; res <- data.frame(eer.mean = colMeans(eers), eer.sd = apply( eers, 2, sd )); print( round( res, digits = 3 ) );