Source code for WPT_Transfer_Learning_2case

"""
Transfer Learning On Two Cases Using WPT
----------------------------------------

This fuction implement transfer learning by training a classifier on two different 
data sets and testing it on remaining two different data sets. Stickout lengths of 
each data set should be determined by user. It asks for file paths to training and test set files. 
It assumes that reconstructed wavelet packets and frequency domain features are available in the specified data file folder.
Algorithm performs classfication based on chosen classifier and returns the results in an array. 
It also prints the total elapsed time.  

"""
import time
import numpy as np
import scipy.io as sio
from scipy.stats import skew
from sklearn.feature_selection import RFE
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from matplotlib import rc
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
import matplotlib
import os
matplotlib.rcParams.update({'font.size': 14})
rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)
#%%
[docs]def WPT_Transfer_Learning_2case(stickout_lengths,WPT_Level,Classifier): """ :param str (stickout_lengths): Stickout length for the training and test set in a np.array([]) format.First two stickout length are considered as training set data and the remaining ones are test set data. * if stickout length is 2 inch, '2' * if stickout length is 2.5 inch, '2p5' * if stickout length is 3.5 inch, '3p5' * if stickout length is 4.5 inch, '4p5' :param int (WPT_Level): Level of Wavelet Packet Decomposition :param str (Classifier): Classifier defined by user * Support Vector Machine: 'SVC' * Logistic Regression: 'LR' * Random Forest Classification: 'RF' * Gradient Boosting: 'GB' :Returns: :results: (np.array([])) Classification results for training and test set for all combination of ranked features and devition for both set. * first column: mean accuracies for training set * second column: deviation for training set accuracies * third column: mean accuracies for test set * fourth column: deviation for test set accuracies :time: (str) Elapsed time during feature matrix generation and classification :Example: .. doctest:: >>> from WPT_Transfer_Learning_2case import WPT_Transfer_Learning_2case #parameters >>> stickout_lengths = ['2','2p5','3p5','4p5'] >>> WPT_Level=4 >>> Classifier='SVC' >>> results = WPT_Transfer_Learning_2case(stickout_lengths, >>> WPT_Level, Classifier) Enter the path of first training set data files: >>> D\...\cutting_tests_processed\data_2inch_stickout Enter the path of second training set data files: >>> D\...\cutting_tests_processed\data_2p5inch_stickout Enter the path of first test set data files: >>> D\...\cutting_tests_processed\data_3p5inch_stickout Enter the path of second test set data files: >>> D\...\cutting_tests_processed\data_4p5inch_stickout """ #%% get the path to data files from user user_input_train1 = input("Enter the path of first training set data files: ") assert os.path.exists(user_input_train1), "Specified file does not exist at, "+str(user_input_train1) user_input_train2 = input("Enter the path of second training set data files: ") assert os.path.exists(user_input_train2), "Specified file does not exist at, "+str(user_input_train2) user_input_test1 = input("Enter the path of first test set data files: ") assert os.path.exists(user_input_test1), "Specified file does not exist at, "+str(user_input_test1) user_input_test2 = input("Enter the path of second test set data files: ") assert os.path.exists(user_input_test2), "Specified file does not exist at, "+str(user_input_test2) folderToLoad1 = os.path.join(user_input_train1) folderToLoad2 = os.path.join(user_input_train2) folderToLoad3 = os.path.join(user_input_test1) folderToLoad4 = os.path.join(user_input_test2) #%% start timer start2 = time.time() #%% load data files compute time domain features and combine them with frequency domain features computed in Matlab previously n_feature=14 # import the list including the name of the time series of the chosen case label={} feature_mat={} for i in range(0,4): if i==0: folderToLoad = folderToLoad1 elif i==1: folderToLoad = folderToLoad2 elif i==2: folderToLoad = folderToLoad3 elif i==3: folderToLoad = folderToLoad4 file_name = 'time_series_name_'+stickout_lengths[i]+'inch.txt' # folderToLoad_name = 'folderToLoad'+str(i+1) # exec("folderToLoad = %s" %(folderToLoad_name),globals()) file_path = os.path.join(folderToLoad, file_name) f = open(file_path,'r',newline='\n') #save the time series name into a list namets = [] for line in f: names = line.split("\r\n") namets.append(names[0]) #import the classification labels label_file_name = stickout_lengths[i]+'_inch_Labels_2Class.npy' label_path = os.path.join(folderToLoad, label_file_name) label[i] = np.load(label_path) # load datasets numberofcase = len(namets) feature_mat[i]=np.zeros((numberofcase,10)) for j in range(numberofcase): nameofdata = 'WPT_Level%s_Recon_%sinch_%s' %(str(WPT_Level),stickout_lengths[i],namets[j]) pathofdata = os.path.join(folderToLoad, nameofdata) ts = sio.loadmat(pathofdata) ts= ts["recon"] feature_mat[i][j,0] = np.average(ts) feature_mat[i][j,1] = np.std(ts) feature_mat[i][j,2] = np.sqrt(np.mean(ts**2)) feature_mat[i][j,3] = max(abs(ts)) feature_mat[i][j,4] = skew(ts) L=len(ts) feature_mat[i][j,5] = sum(np.power(ts-feature_mat[i][j,0],4)) / ((L-1)*np.power(feature_mat[i][j,1],4)) feature_mat[i][j,6] = feature_mat[i][j,3]/feature_mat[i][j,2] feature_mat[i][j,7] = feature_mat[i][j,3]/np.power((np.average(np.sqrt(abs(ts)))),2) feature_mat[i][j,8] = feature_mat[i][j,2]/(np.average((abs(ts)))) feature_mat[i][j,9] = feature_mat[i][j,3]/(np.average((abs(ts)))) freq_feature_file_name = 'WPT_Level%d_Freq_Features_%sinch.mat'%(WPT_Level,stickout_lengths[i]) file_path_Ff = os.path.join(folderToLoad, freq_feature_file_name) freq_features = sio.loadmat(file_path_Ff) freq_features = freq_features['Freq_Features'] #concatanate the frequency and time domain features feature_mat[i]=np.concatenate((feature_mat[i], freq_features),axis = 1) #concatanate the frequency and time domain features featuremat_training = np.concatenate((feature_mat[0], feature_mat[1]),axis = 0) featuremat_test = np.concatenate((feature_mat[2], feature_mat[3]),axis = 0) # labels of training set and test sets label_train = np.concatenate((label[0],label[1]),axis=0) label_test = np.concatenate((label[2],label[3]),axis=0) #%% #creating train, test, accuracy, meanscore and deviation matrices accuracy1 = np.zeros((n_feature,10)) accuracy2 = np.zeros((n_feature,10)) deviation1 = np.zeros((n_feature,1)) deviation2 = np.zeros((n_feature,1)) meanscore1 = np.zeros((n_feature,1)) meanscore2 = np.zeros((n_feature,1)) duration1 = np.zeros((n_feature,10)) meanduration = np.zeros((n_feature,1)) #repeat the procedure ten times Rank=[] RankedList=[] for o in range(0,10): #split into test and train set F_Training_Train,F_Training_Test,Label_Training_Train,Label_Training_Test= train_test_split(featuremat_training, label_train, test_size=0.33) F_Test_Train,F_Test_Test,Label_Test_Train,Label_Test_Test= train_test_split(featuremat_test,label_test, test_size=0.70) #classifier if Classifier=='SVC': clf = SVC(kernel='linear') elif Classifier=='LR': clf = LogisticRegression() elif Classifier=='RF': clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0) elif Classifier=='GB': clf = GradientBoostingClassifier() #recursive feature elimination selector = RFE(clf, 1, step=1) Label_train=np.ravel(Label_Training_Train) Label_test =np.ravel(Label_Test_Test) selector = selector.fit(F_Training_Train, Label_train) rank = selector.ranking_ Rank.append(rank) rank = np.asarray(rank) #create a list that contains index number of ranked features rankedlist = np.zeros((14,1)) #finding index of the ranked features and creating new training and test sets with respect to this ranking for m in range (1,15): k=np.where(rank==m) rankedlist[m-1]=k[0][0] F_Training_Train[:,m-1] = F_Training_Train[:,int(rankedlist[m-1][0])] F_Test_Test[:,m-1] = F_Test_Test[:,int(rankedlist[m-1][0])] RankedList.append(rankedlist) #trying various combinations of ranked features such as ([1],[1,2],[1,2,3]...) for p in range(0,14): start1 = time.time() clf.fit(F_Training_Train[:,0:p+1],Label_train) score1=clf.score(F_Test_Test[:,0:p+1],Label_test) score2=clf.score(F_Training_Train[:,0:p+1],Label_train) accuracy1[p,o]=score1 accuracy2[p,o]=score2 end1=time.time() duration1[p,o] = end1 - start1 #computing mean score and deviation for each combination tried above for n in range(0,14): deviation1[n,0]=np.std(accuracy1[n,:]) deviation2[n,0]=np.std(accuracy2[n,:]) meanscore1[n,0]=np.mean(accuracy1[n,:]) meanscore2[n,0]=np.mean(accuracy2[n,:]) meanduration[n,0]=np.mean(duration1[n,:]) results = np.concatenate((meanscore1,deviation1,meanscore2,deviation2),axis=1) results = 100*results #total duration for algorithm end2 = time.time() duration2 = end2-start2 return results,print('Total elapsed time: {}'.format(duration2)) ,featuremat_training, featuremat_test