def get_needed_posts(query): site = pd.DataFrame() for q in query: URL = parseurl+'search/' params = { 'q': q } req = requests.get(URL, params=params) soup = BeautifulSoup(req.text) articles = soup.find_all('article', class_='tm-articles') for article in articles: try: title = article.find('h2', class_='tm-article').text date = article.find('span', class_='tm-article').text.strip() link = article.find('h2', class_='tm-article').find('a').get('href') except: pass row = {'date': date, 'title': title, 'link': 'https://habr.com'+link} site = pd.concat([site, pd.DataFrame([row])]) return site.reset_index(drop=True) Modify the Python function above so that if it finds a repeated title or link, it will not add it to the return
def get_needed_posts(query): site = pd.DataFrame(columns=['date', 'title', 'link']) for q in query: URL = parseurl+'search/' params = { 'q': q } req = requests.get(URL, params=params) soup = BeautifulSoup(req.text) articles = soup.find_all('article', class_='tm-articles') for article in articles: try: title = article.find('h2', class_='tm-article').text date = article.find('span', class_='tm-article').text.strip() link = article.find('h2', class_='tm-article').find('a').get('href') except: pass if link not in site.link.values: row = {'date': date, 'title': title, 'link': 'https://habr.com'+link} site = pd.concat([site, pd