import stringdef read_file(file):"""接收文件名为参数,将文件中的内容读为字符串,只保留文件中的英文字母和西文符号,过滤掉中文所有字符转为小写,将其中所有标点、符号替换为空格,返回字符串"""########## Begin ##########with open (file) as f :txt = f.read().lower()for i in ',."-':txt = txt.replace(i,' ')return txt########## End ##########if __name__ == '__main__':filename = 'Who Moved My Cheese.txt' # 文件名content = read_file(filename) # 调用函数返回字典类型的数据n = int(input())print(content[:n])
第2关 统计单词数量
本关任务:编写一个能计算单词数量的小程序。
import stringdef count_of_words(txt):"""接收去除标点、符号的字符串,统计并返回其中单词数量和不重复的单词数量"""########## Begin ##########txt = txt.split()counts = {}for i in txt:counts[i] = counts.get(i,0) + 1return len(txt),len(counts)########## End ##########def read_file(file):"""接收文件名为参数,将文件中的内容读为字符串,只保留文件中的英文字母和西文符号,过滤掉中文所有字符转为小写,将其中所有标点、符号替换为空格,返回字符串"""with open(file, 'r', encoding='utf-8') as novel:txt = novel.read()english_only_txt = ''.join(x for x in txt if ord(x) < 256)english_only_txt = english_only_txt.lower()for character in string.punctuation:english_only_txt = english_only_txt.replace(character, ' ')return english_only_txtif __name__ == '__main__':filename = 'Who Moved My Cheese.txt' # 文件名content = read_file(filename) # 调用函数返回字典类型的数据amount_results = count_of_words(content)print('文章共有单词{}个,其中不重复单词{}个'.format(*amount_results))
第3关 统计单词出现的次数
预期输出:
the 369
he 337
to 333
and 312
cheese 214
it 187
they 166
of 158
a 146
had 142
import stringdef word_frequency(txt):"""接收去除标点、符号的字符串,统计并返回每个单词出现的次数返回值为字典类型,单词为键,对应出现的次数为值"""########## Begin ##########txt = txt.split()counts = {}for i in txt:counts[i] = counts.get(i,0) + 1return counts########## End ##########def top_ten_words(frequency, cnt):"""接收词频字典,输出出现次数最多的cnt个单词及其出现次数"""########## Begin ##########dic = sorted(frequency.items(),key = lambda x: x[1], reverse = True)for i in dic[0:cnt]:print(*i)########## End ##########def read_file(file):"""接收文件名为参数,将文件中的内容读为字符串,只保留文件中的英文字母和西文符号,过滤掉中文所有字符转为小写,将其中所有标点、符号替换为空格,返回字符串"""with open(file, 'r', encoding='utf-8') as novel:txt = novel.read()english_only_txt = ''.join(x for x in txt if ord(x) < 256)english_only_txt = english_only_txt.lower()for character in string.punctuation:english_only_txt = english_only_txt.replace(character, ' ')return english_only_txtif __name__ == '__main__':filename = 'Who Moved My Cheese.txt' # 文件名content = read_file(filename) # 调用函数返回字典类型的数据frequency_result = word_frequency(content) # 统计词频n = int(input())top_ten_words(frequency_result, n)