多个句子词频统计
使用collections.Counter计算词频,顺便去除只出现1次的单词。
from collections import Counter cc = Counter() def get_count(text): try: text_split = text.strip().split(' ') count = Counter(text_split) cc.update(count) return text_split except: return text def remove_one(text): try: text = u" ".join([x for x in [y for y in text.strip().split(u" ")] if cc[x] > 1]) return text except: return text df['text_split'] = df['text'].apply(lambda x: get_count(x)) df['text'] = df['text'].apply(lambda x: remove_one(x))