-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathknn.r
42 lines (39 loc) · 1.27 KB
/
knn.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
knnCNCacheFile <- "./cache/knnCNCacheFile.rdata"
knnModelCacheFile <- "./cache/knnModelCacheFile.rdata"
getKNNTrainedSentimentAnalysisModel <- function(useCache = TRUE){
if(file.exists(knnModelCacheFile) && useCache){
load(knnModelCacheFile)
return(fit)
}
df <- getTrainingData()
fit <- train(s~., df, method="knn")
save(fit, file = knnModelCacheFile)
return(fit)
}
knnPredict <- function(text) {
text <- cleanText(text)
corpus <- VCorpus(VectorSource(c(text)))
tdm <- DocumentTermMatrix(corpus, control = list(dictionary = getDictionary()))
test <- as.matrix(tdm)
predictions <- predict(getKNNTrainedSentimentAnalysisModel(useCache = TRUE), newdata=test)
return(predictions)
}
getKNNConfusionMatrix <- function(useCache = TRUE) {
if(file.exists(knnCNCacheFile) && !useCache) {
file.remove(knnCNCacheFile)
}
if(file.exists(knnCNCacheFile) && useCache) {
load(knnCNCacheFile)
return(cm)
}
df <- getTrainingData()
trainDataIndex <- createDataPartition(df$s, p=0.6, list = FALSE)
trainDf <- df[trainDataIndex, ]
testDf <- df[-trainDataIndex, ]
set.seed(1337)
fit <- train(s~., trainDf, method="knn")
predictions <- predict(fit, testDf)
cm <- confusionMatrix(predictions, testDf$s)
save(cm, file = knnCNCacheFile)
return(cm)
}