Commit b499dbd5 authored by Ngan Thi Dong's avatar Ngan Thi Dong

add evaluation code for epmda

parent fd9f6e75
This diff is collapsed.
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
import numpy as np
from sklearn import metrics
import argparse
from utility.utils import *
np.random.seed(1337)
classifiers = ['MLP', 'linear']
import os
def eval_fold(featue_directory, foldIdx, train_pair_list, train_lbl_list, test_pair_list, test_lbl_list, clf, score_save_path=None, numFeatures=7):
if not os.path.exists(featue_directory + "feature" + str(foldIdx) + ".npy"):
return None
features = np.load(featue_directory + "feature" + str(foldIdx) + ".npy")
print('feature.shape: ', features.shape)
X_train = list()
X_test = list()
for pair in train_pair_list:
X_train.append(features[pair[0]][pair[1]][:numFeatures])
X_train = np.array(X_train)
for pair in test_pair_list:
X_test.append(features[pair[0]][pair[1]][:numFeatures])
X_test = np.array(X_test)
return clf_eval_fold(X_train, train_lbl_list, X_test, test_lbl_list, score_save_path, classifier=clf)
def eval_dir(args):
data_dir, fold_dir, ranseed = args.data_dir, args.fold_dir, args.randseed
result_dir = standardize_dir(args.result_dir)
feature_dir = data_dir + str(ranseed) + 'features5FoldCV/'
foldsData = load_fold_data(data_dir, fold_dir, ranseed)
writers = dict()
for clf in classifiers:
result_path = result_dir + str(ranseed) + '_' + clf + '_epmda_'+ args.sim_type +'_balance_results.csv'
if args.faulty:
result_dir.replace('epmda', '_faulty')
writers[clf] = open(result_path, 'w')
writers[clf].write('Fold,AUC,AUPR\n')
foldIdx = 0
for train_pair_list, train_lbl_list, test_pair_list, test_lbl_list, _, _ in foldsData:
foldIdx += 1
for clf in classifiers:
if args.save_score:
score_save_path = standardize_dir(result_dir + str(args.randseed)) + str(foldIdx) + '_' + clf + '_epmda_'+ args.sim_type +'_balance.csv'
else:
score_save_path = None
if args.faulty:
score_save_path = score_save_path.replace('epmda', '_faulty')
auc, aupr = eval_fold(feature_dir, foldIdx, train_pair_list, train_lbl_list, test_pair_list, test_lbl_list, clf,
score_save_path=score_save_path, numFeatures=args.numFeatures)
writers[clf].write(',' + str(round(auc,4)) + ',' + str(round(aupr,4)) + '\n')
for clf in classifiers:
writers[clf].close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='EPMDA original evaluation method')
parser.add_argument('--data_dir', default='/home/dong/Desktop/miRNA-disease/mirna-disease/epmda/data/hmdd2_numpy/', help='dataset directory')
parser.add_argument('--fold_dir', default='data/hmdd2/folds/', help='dataset directory')
parser.add_argument('--result_dir', default='data/hmdd2/results/', help='saved result directory')
parser.add_argument('--save_score', default=False, help='whether to save the predicted score or not')
parser.add_argument('--randseed', default=123, help='the random seed')
parser.add_argument('--numFeatures', default=7, help='the random seed')
parser.add_argument('--numFold', default=5, help='value of K for K-foldCV, default is 5')
parser.add_argument('--neg_rate', default=1.0, help='the negative sampling rate')
parser.add_argument('--faulty', default=False, help='Faulty calculation or not')
parser.add_argument('--sim_type', default='gip', help='Type of input similarities')
args = parser.parse_args()
args.save_score = True if str(args.save_score) == 'True' else False
args.faulty = True if str(args.faulty) == 'True' else False
args.data_dir = standardize_dir(args.data_dir)
args.result_dir = standardize_dir(args.result_dir)
eval_dir(args)
# hmdd2_dir_path = '/home/dong/EPMDA/data/hmdd2_numpy/'
# faulty_hmdd2_dir_path = '/home/dong/EPMDA/data/faultyHmdd2_numpy/'
# hmdd2_fold_path = '/home/dong/mirna-disease/folds_hmdd2/'
# hmdd3_dir_path = '/home/dong/EPMDA/data/hmdd3_numpy/'
# hmdd3_fold_path = '/home/dong/mirna-disease/folds/'
# hmdd2_func_path = '/home/dong/EPMDA/data/hmdd2_func/'
# hmdd2_seq_path = 'epmda/data/hmdd2_seq/'
# eval_dir(hmdd2_dir_path.replace('/home/dong/EPMDA/', './'), hmdd2_fold_path.replace('mirna-disease', 'Desktop/miRNA-disease/mirna-disease'))
# # eval_dir(hmdd2_func_path.replace('/home/dong/EPMDA/', './'), hmdd2_fold_path.replace('mirna-disease', 'Desktop/miRNA-disease/mirna-disease'))
# print('finish hmd2 func')
# # eval_dir(hmdd2_seq_path, hmdd2_fold_path.replace('mirna-disease', 'Desktop/miRNA-disease/mirna-disease'))
# print('finish HMDD2 seq')
# # eval_dir(hmdd3_dir_path.replace('/home/dong/EPMDA/', './'), hmdd3_fold_path.replace('mirna-disease', 'Desktop/miRNA-disease/mirna-disease'))
# print('finish hmdd3')
# # eval_dir(faulty_hmdd2_dir_path.replace('/home/dong/EPMDA/', './'), hmdd2_fold_path.replace('mirna-disease', 'Desktop/miRNA-disease/mirna-disease'))
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment