Export all of the Japanese characters in the html file into a text file
from bs4 import BeautifulSoup import re from urllib.request import urlopen def export(url): soup = BeautifulSoup(urlopen(url), 'html.parser') japanese_characters = soup.find_all('div', {'class': 'text'}) with open('japanese.txt', 'w') as f: for x in japanese_characters: f.write(x.get_text()) f.write('\n\n')