There is a function for Pandas dataframe: dfp = df.copy() dfp.sort_values(by='performer', inplace=True) def group_by_performer(data): dfp['hits'] = dfp.groupby('performer')['song'].transform(lambda x: ', '.join(x.unique())) dfp['time_on_chart'] = dfp.groupby('performer')['time_on_chart'].transform(lambda x: x.sum()) dfp['consecutive_weeks'] = dfp.groupby('performer')['consecutive_weeks'].transform(lambda x: x.sum()) dfp.drop_duplicates(subset='performer', inplace=True) dfp.reset_index(inplace=True) return dfp group_by_performer(dfp) Add a function to it to convert the values in the chart_debut column from the format '1966-06-25' to the format 1966, that is, just the year
def get_year(data): data['chart_debut'] = pd.to_datetime(data['chart_debut'], format='%Y-%m-%d').dt.year return data dfp = get_year(dfp)