# Train/Test split (manual)
def train_test_split(X, y, split_size=0.2):
    # Generate shuffled indices
    num_data_points = X.shape[0]
    shuffled_indices = np.random.permutation(num_data_points)

    # Split indices into test and train
    test_set_size = int(num_data_points * split_size)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]

    # Use the indices to split the data
    X_train = X.iloc[train_indices]
    X_test = X.iloc[test_indices]
    y_train = y.iloc[train_indices]
    y_test = y.iloc[test_indices]

    return X_train, X_test, y_train, y_test
# Z-score standardization
def standardize(X_train, X_test):
    # Calculate the mean and standard deviation of the training set
    mean = np.mean(X_train, axis=0)
    std = np.std(X_train, axis=0)

    # To prevent division by zero, set std=1 where std is zero
    std[std == 0] = 1

    # Standardize the training set
    X_train_scaled = (X_train - mean) / std

    # Use the training set's mean and std to standardize the test set
    X_test_scaled = (X_test - mean) / std

    return X_train_scaled, X_test_scaled