Bias & variance
![]() |
# for building linear regression models from sklearn.linear_model import LinearRegression, Ridge # import lab utility functions in utils.py import utils # Split the dataset into train, cv, and test x_train, y_train, x_cv, y_cv, x_test, y_test = utils.prepare_dataset('data/c2w3_lab2_data1.csv') print(f"the shape of the training set (input) is: {x_train.shape}") print(f"the shape of the training set (target) is: {y_train.shape}\n") print(f"the shape of the cross validation set (input) is: {x_cv.shape}") print(f"the shape of the cross validation set (target) is: {y_cv.shape}\n") # Preview the first 5 rows print(f"first 5 rows of the training inputs (1 feature):\n {x_train[:5]}\n") # Instantiate the regression model class model = LinearRegression() # Train and plot polynomial regression models utils.train_plot_poly(model, x_train, y_train, x_cv, y_cv, max_degree=10, baseline=400) # Train and plot polynomial regression models. Bias is defined lower. utils.train_plot_poly(model, x_train, y_train, x_cv, y_cv, max_degree=10, baseline=250) x_train, y_train, x_cv, y_cv, x_test, y_test = utils.prepare_dataset('data/c2w3_lab2_data2.csv') print(f"the shape of the training set (input) is: {x_train.shape}") print(f"the shape of the training set (target) is: {y_train.shape}\n") print(f"the shape of the cross validation set (input) is: {x_cv.shape}") print(f"the shape of the cross validation set (target) is: {y_cv.shape}\n") # Preview the first 5 rows print(f"first 5 rows of the training inputs (2 features):\n {x_train[:5]}\n") # Instantiate the model class model = LinearRegression() # Train and plot polynomial regression models. Dataset used has two features. utils.train_plot_poly(model, x_train, y_train, x_cv, y_cv, max_degree=6, baseline=250) # Define lambdas to plot reg_params = [10, 5, 2, 1, 0.5, 0.2, 0.1] # Define degree of polynomial and train for each value of lambda utils.train_plot_reg_params(reg_params, x_train, y_train, x_cv, y_cv, degree= 4, baseline=250) # Define lambdas to plot reg_params = [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1] # Define degree of polynomial and train for each value of lambda utils.train_plot_reg_params(reg_params, x_train, y_train, x_cv, y_cv, degree= 4, baseline=250) # Prepare dataset with randomID feature x_train, y_train, x_cv, y_cv, x_test, y_test = utils.prepare_dataset('data/c2w3_lab2_data2.csv') # Preview the first 5 rows print(f"first 5 rows of the training set with 2 features:\n {x_train[:5]}\n") # Prepare dataset with randomID feature x_train, y_train, x_cv, y_cv, x_test, y_test = utils.prepare_dataset('data/c2w3_lab2_data3.csv') # Preview the first 5 rows print(f"first 5 rows of the training set with 3 features (1st column is a random ID):\n {x_train[:5]}\n") # Define the model model = LinearRegression() # Define properties of the 2 datasets file1 = {'filename':'data/c2w3_lab2_data3.csv', 'label': '3 features', 'linestyle': 'dotted'} file2 = {'filename':'data/c2w3_lab2_data2.csv', 'label': '2 features', 'linestyle': 'solid'} files = [file1, file2] # Train and plot for each dataset utils.train_plot_diff_datasets(model, files, max_degree=4, baseline=250) # Prepare the dataset x_train, y_train, x_cv, y_cv, x_test, y_test = utils.prepare_dataset('data/c2w3_lab2_data4.csv') print(f"the shape of the entire training set (input) is: {x_train.shape}") print(f"the shape of the entire training set (target) is: {y_train.shape}\n") print(f"the shape of the entire cross validation set (input) is: {x_cv.shape}") print(f"the shape of the entire cross validation set (target) is: {y_cv.shape}\n") # Instantiate the model class model = LinearRegression() # Define the degree of polynomial and train the model using subsets of the dataset. utils.train_plot_learning_curve(model, x_train, y_train, x_cv, y_cv, degree= 4, baseline=250)















Yorumlar
Yorum Gönder