def get_needed_posts(query): site = pd.DataFrame() for q in query: URL = parseurl+'search/' params = { 'q': q } req = requests.get(URL, params=params) soup = BeautifulSoup(req.text) articles = soup.find_all('article', class_='tm-articles-list__item') for article in articles: try: title = article.find('h2', class_='tm-article-snippet__title').text date = article.find('span', class_='tm-article-snippet__datetime-published').text.strip() link = article.find('h2', class_='tm-article-snippet__title').find('a').get('href') except: pass row = {'date': date, 'title': title, 'link': 'https://habr.com'+link} site = pd.concat([site, pd.DataFrame([row])]) return site.reset_index(drop=True) Modify the function above so that if it finds a repeated title or link, it will not add it to the return
def get_needed_posts(query): site = pd.DataFrame() links = [] titles = [] for q in query: URL = parseurl+'search/' params = { 'q': q } req = requests.get(URL, params=params) soup = BeautifulSoup(req.text) articles = soup.find_all('article', class_='tm-articles-list__item') for article in articles: try: title = article.find('h2', class_='tm-article-snippet__title').text date = article.find('span', class_='tm-article-snippet__datetime-published').text.strip() link = article.find('h2', class_='tm-article-snippet__title').find('a').get('href') if link not in links and title not in titles: titles.append(title) links.append(link) row = {'date': date, '