The code below gives a ValueError: Input variables with an inconsistent number of samples were found: [8082, 5572]. Fix it! from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(tfidf, cats, test_size=0.3, random_state=42)
import pandas as pd def split_data(data): # data is a list of tuples # return a tuple of four lists # X_train, X_test, y_train, y_test df = pd.DataFrame(data, columns=['X', 'y']) train, test= train_test_split(df, test_size=0.3, random_state=42) return (train['X'].tolist(), test['X'].tolist(), train['y'].tolist(), test['y'].tolist()) split_data([(1, 2), (3, 4), (5, 6)])