From 0dd9368d7ef6ba136e83ace932716bbd1d3ab6ea Mon Sep 17 00:00:00 2001 From: sudoskys Date: Wed, 6 Sep 2023 20:01:19 +0800 Subject: [PATCH] jieba -> cjieba --- middleware/filter/reduce.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/middleware/filter/reduce.py b/middleware/filter/reduce.py index 78950ff..092153c 100644 --- a/middleware/filter/reduce.py +++ b/middleware/filter/reduce.py @@ -5,7 +5,7 @@ # @Software: PyCharm from typing import List, Any -import jieba +import cjieba import numpy as np from sklearn.cluster import Birch from sklearn.feature_extraction.text import CountVectorizer @@ -26,7 +26,7 @@ def init(self, sentence_list): for line in sentence_list: title = line.strip() self.title_dict[index] = title - output = ' '.join(['%s' % x for x in list(jieba.cut(title, cut_all=False))]).encode('utf-8') # 空格拼接 + output = ' '.join(['%s' % x for x in list(cjieba.cut(title, cut_all=False))]).encode('utf-8') # 空格拼接 index += 1 corpus.append(output.strip()) _vectorizer = CountVectorizer()