1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
| def TextRank():
window = 3
win_dict = {}
filter_word = Filter_word(text)
print(filter_word)
length = len(filter_word)
for word in filter_word:
index = filter_word.index(word)
if word not in win_dict:
left = index - window + 1
right = index + window
if left < 0:
left = 0
if right >= length:
right = length
words = set()
for i in range(left, right):
if i == index:
continue
words.add(filter_word[i])
win_dict[word] = words
word_dict = list(set(filter_word))
lengths = len(set(filter_word))
matrix = pd.DataFrame(np.zeros([lengths,lengths]))
for word in win_dict:
for value in win_dict[word]:
index1 = word_dict.index(word)
index2 = word_dict.index(value)
matrix.iloc[index1, index2] = 1
matrix.iloc[index2, index1] = 1
summ = 0
cols = matrix.shape[1]
rows = matrix.shape[0]
for j in range(cols):
for i in range(rows):
summ += matrix.iloc[i, j]
matrix[j] /= summ
d = 0.85
iter_num = 700
word_textrank = {}
textrank = np.ones([lengths, 1])
for i in range(iter_num):
textrank = (1 - d) + d * np.dot(matrix, textrank)
for i in range(len(textrank)):
word = word_dict[i]
word_textrank[word] = textrank[i, 0]
keyword = 10
print('------------------------------')
print('textrank模型结果:')
for key, value in sorted(word_textrank.items(), key=operator.itemgetter(1),
reverse=True)[:keyword]:
print(key + '/', end='')
|