There is a df_new Pandas dataframe: id route operator group_name district injury_result incident_type victim_category victim_age 2 3.0 1 London General Go-Ahead Southwark NaN Onboard Injuries 7 10 3 4.0 4 Metroline Metroline Islington NaN Onboard Injuries 7 2 4 5.0 5 East London Stagecoach Havering NaN Onboard Injuries 7 8 5 6.0 5 East London Stagecoach None London Borough NaN Onboard Injuries 7 8 Count which routes had the most incident_types with a value >= 5. Sort them in descending order by incident_type column
def df_incident_type(df): df = df.groupby(['route', 'operator', 'group_name', 'incident_type'])[['incident_type']].count() df = df.sort_values('incident_type', ascending=False) df.reset_index(inplace=True) df = df.drop_duplicates(subset=['route'], keep='first') return df