-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path3_mergetables.R
41 lines (35 loc) · 920 Bytes
/
3_mergetables.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
library(sqldf)
library(data.table)
mergewords <- function(df1,df2)
{
overlap <- intersect(df1$word,df2$word)
if (length(overlap) == 0)
{
mergeDF <- rbind(df1,df2)
} else
{
df1[df1$word %in% overlap,2] <- df1[df1$word %in% overlap,2]+ df2[df2$word %in% overlap,2]
mergeDF <- rbind(df1,df2[-(which(df2$word %in% overlap,1)),])
}
mergeDF <- setDT(mergeDF)
return(mergeDF)
}
############################
load('dfnews3.rda')
dfnews3 <- dffile
rm(dffile)
load('dftweet3.rda')
dftweet3 <- dffile
rm(dffile)
gc(reset = TRUE)
mergedf3 <- mergewords(dfnews3,dftweet3)
uniquewords <- unique(setDT(mergedf3), by = 'word')
rm(dfnews3)
rm(dftweet3)
load('dfblog3.rda')
dfblog3 <- dffile
rm(dffile)
gc()
word3 <- mergewords(mergedf3,dfblog3)
uniquewords <- unique(setDT(word3), by = 'word')
save(word3, file = "word3.Rda")