-
728x90
# 4/14
train.shape
train.drop_duplicates()
train.shape
desired_apriori=0.10
idx_0 = train[train.target == 0].index
idx_1 = train[train.target == 1].index
nb_0 = len(train.loc[idx_0])
nb_1 = len(train.loc[idx_1])
undersampling_rate = ((1-desired_apriori)*nb_1)/(nb_0*desired_apriori)
undersampled_nb_0 = int(undersampling_rate*nb_0)
undersampled_idx = shuffle(idx_0, random_state=37, n_samples=undersampled_nb_0)
idx_list = list(undersampled_idx) + list(idx_1)
train = train.loc[idx_list].reset_index(drop=True)
missings = train[train['target'] == -1] # df
missings = train[train['target'] == -1]['target'] # series
missings = train[train['target'] == -1]['target'].count()
dict.items() # (key, value)로 따로따로 묶어줌
모델 -
df -> array -> kfold728x90