Find all the Chinese characters in a file mixed with Chinese, Japanese and English
# coding: UTF-8 import re def find_chinese_characters(file_path): with open(file_path, 'r') as file: text = file.read() pattern = re.compile(u'[\u4e00-\u9fff]+') result = pattern.findall(text) return result print(find_chinese_characters('text.txt'))