forked from leisurelicht/WordCloud-CN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
61 lines (51 loc) · 1.77 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# coding: utf-8
from os import path
import numpy as np
import matplotlib.pyplot as plt
# matplotlib.use('qt4agg')
from wordcloud import WordCloud, STOPWORDS
import jieba
class WordCloud_CN:
'''
use package wordcloud and jieba
generating wordcloud for chinese character
'''
def __init__(self, stopwords_file):
self.stopwords_file = stopwords_file
self.text_file = text_file
@property
def get_stopwords(self):
self.stopwords = {}
f = open(self.stopwords_file, 'r')
line = f.readline().rstrip()
while line:
self.stopwords.setdefault(line, 0)
self.stopwords[line.decode('utf-8')] = 1
line = f.readline().rstrip()
f.close()
return self.stopwords
@property
def seg_text(self):
with open(self.text_file) as f:
text = f.readlines()
text = r' '.join(text)
seg_generator = jieba.cut(text)
self.seg_list = [
i for i in seg_generator if i not in self.get_stopwords]
self.seg_list = [i for i in self.seg_list if i != u' ']
self.seg_list = r' '.join(self.seg_list)
return self.seg_list
def show(self):
# wordcloud = WordCloud(max_font_size=40, relative_scaling=.5)
wordcloud = WordCloud(font_path=u'./static/simheittf/simhei.ttf',
background_color="black", margin=5, width=1800, height=800)
wordcloud = wordcloud.generate(self.seg_text)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
if __name__ == '__main__':
stopwords_file = u'./static/stopwords.txt'
text_file = u'./demo/情书(Baidu).txt'
generater = WordCloud_CN(stopwords_file)
generater.show()