Commit f906f1d6 authored by Ngan Thi Dong's avatar Ngan Thi Dong

Renaming, add code to generate fold data

parent ef908513
import pandas as pd
import numpy as np
import numpy.linalg as LA
def calculate_gip(matrix):
"""
calculate gip in regard for disease and miRNA
The row of the matrix is miRNA, the columns represent diseases
:param matrix: numpy array
:return: tuple for first disease gip and then miRNA gip
"""
A = matrix
n_miRNA = A.shape[0]
n_disease = A.shape[1]
# calculate GIP for miRNA
A = np.asmatrix(A)
gamd = n_miRNA / (LA.norm(A, 'fro') ** 2)
km = np.mat(np.zeros((n_miRNA, n_miRNA)))
D = A * A.T
for i in range(n_miRNA):
for j in range(i, n_miRNA):
km[j, i] = np.exp(-gamd * (D[i, i] + D[j, j] - 2 * D[i, j]))
km = km + km.T - np.diag(np.diag(km))
KM = np.asarray(km)
# calculate GIP for disease
gamm = n_disease / (LA.norm(A, 'fro') ** 2)
kd = np.mat(np.zeros((n_disease, n_disease)))
E = A.T * A
for i in range(n_disease):
for j in range(i, n_disease):
kd[j, i] = np.exp(-gamm * (E[i, i] + E[j, j] - 2 * E[i, j]))
kd = kd + kd.T - np.diag(np.diag(kd))
KD = np.asarray(kd)
return KM, KD
from epmda.GIP import gen_gip
import argparse
import os
from utility import Paths
def apply_to_hmdd3():
adj_path = Paths.hmdd3['adj']
out_path = Paths.hmdd3['gip_out']
disease_out_path = Paths.hmdd3['gip_disease']
miRNA_out_path = Paths.hmdd3['gip_mirna']
gen_gip(adj_path, out_path, miRNA_out_path, disease_out_path)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("ADJ", type=str, help="adjacent matrix path")
parser.add_argument("OUT", type=str, help="output path")
parser.add_argument("DISEASE_OUT", type=str, help="disease output path")
parser.add_argument("MIRNA_OUT", type=str, help="miRNA output path")
args = parser.parse_args()
gen_gip(args.ADJ, args.OUT, args.MIRNA_OUT, args.DISEASE_OUT)
\ No newline at end of file
import pandas as pd
import random
import os
import numpy as np
from GIP import *
from miRNA_sim import *
def adjMatrix2list(csv_path):
vals = pd.read_csv(csv_path).values
n_miRNA = vals.shape[0]
n_disease = vals.shape[1]
# get possivite known association, put into pos_assoc
tmp_res1 = np.where(vals == 1.0)
pos_assoc = list(zip(tmp_res1[0], tmp_res1[1]))
# put the rest into the neg_assoc list
tmp_res2 = np.where(vals == 0.0)
neg_assoc = list(zip(tmp_res2[0], tmp_res2[1]))
return pos_assoc, neg_assoc, n_miRNA, n_disease
def assocList2adjMat(pos_assoc, n_miRNA, n_disease):
mat = np.zeros((n_miRNA, n_disease))
for pair in pos_assoc:
mat[pair[pair[0], pair[1]]] = 1.0
return mat
def gen_fold(data_dir, save_dir, numFold=5, negative_rate=1.0, randomseed=123):
# assoc_list, neglist
adj_path = data_dir + 'm-d.csv'
disease_sim_path = data_dir + 'disease_sim.csv'
disease_sim_path2 = data_dir + 'disease_sim2.csv'
disease_not_found_path = data_dir + 'disease_not_found_list.csv'
mirna_func_path = 'mirna_func.csv'
mirna_func_path2 = 'mirna_func2.csv'
mirna_gip_path = 'mirna_gip.csv'
disease_gip_path = 'disease_gip.csv'
pos_assoc, neg_assoc, n_miRNA, n_disease = adjMatrix2list(adj_path)
print('len(pos_assoc:', len(pos_assoc), 'len(neg_assoc):', len(neg_assoc))
N = len(pos_assoc)
randomIndex = [i for i in range(N)]
print('Number of known association: ', N)
random.seed(randomseed)
random.shuffle(randomIndex)
neglist_lbl = [0] * len(neg_assoc)
# make the neccessary folder if not exist
if not os.path.exists(save_dir):
os.mkdir(save_dir)
for i in range(numFold):
test_lbl_list = list()
pos_train_pair_list = list()
pos_test_pair_list = list()
# Get the list of sample in the test set
if i < numFold - 1:
index = randomIndex[int(N / numFold * i): int(N / numFold * (i+1))]
else:
index = randomIndex[int(N / numFold * i):]
for idx, edge in enumerate(pos_assoc):
if idx in index: # if idx in the test index, put it in the test set
pos_test_pair_list.append(edge)
test_lbl_list.append(1)
else:
pos_train_pair_list.append(edge)
# the saving prefix: randomseed_foldIdx_
saving_prefix = save_dir + str(randomseed) + '_' + str(i) + '_'
# generate the similarity
train_adj = assocList2adjMat(pos_train_pair_list, n_miRNA, n_disease)
# miRNA functional with disease semantic only
train_miRNA_func_sim = cal_miRNA_func_sim(train_adj, disease_sim_path, disease_not_found_path, disease_sim_path)
save2File(train_miRNA_func_sim, saving_prefix + mirna_func_path)
train_miRNA_func_sim2 = cal_miRNA_func_sim(train_adj, disease_sim_path2, disease_not_found_path, disease_sim_path)
save2File(train_miRNA_func_sim2, saving_prefix + mirna_func_path2)
train_miRNA_gip, train_disease_gip = calculate_gip(train_adj)
save2File(train_miRNA_gip, saving_prefix + mirna_gip_path)
save2File(train_disease_gip, saving_prefix + disease_gip_path)
# get the negative sampling set of training set
train_pair_list, train_pair_lbl = negative_sampling(pos_train_pair_list, n_miRNA, n_disease, negative_rate)
test_pair_list = pos_test_pair_list
test_pair_list.extend(neg_assoc)
test_pair_lbl = test_lbl_list
test_pair_lbl.extend(neglist_lbl)
save2File(train_pair_list, saving_prefix + 'train.csv')
save2File(train_pair_lbl, saving_prefix + 'train_lbl.csv')
save2File(test_pair_list, saving_prefix + 'test.csv')
save2File(test_pair_lbl, saving_prefix + 'test_lbl.csv')
def save2File(input, out_path):
df = pd.DataFrame(np.array(input) if type(input) == list else input)
df.to_csv(out_path, header=False, index=False)
def negative_sampling(pos_samples, n_miRNA, n_disease, negative_rate):
size_of_batch = len(pos_samples)
print(size_of_batch)
if negative_rate > 0:
num_to_generate = size_of_batch * (negative_rate)
values1 = np.random.randint(n_miRNA, size=int(num_to_generate * 1.2))
values2 = np.random.randint(n_disease, size=int(num_to_generate * 1.2))
labels = [1] * len(pos_samples)
neg_samples = list()
#
# if train_pair_list != None:
# print(train_pair_list)
if negative_rate > 0:
for i in range(len(values1)):
pair = [values1[i], values2[i]]
if pair in neg_samples or pair in pos_samples:
continue
neg_samples.append(pair)
labels.append(0)
if len(labels) == size_of_batch + num_to_generate:
break
else:
for i in range(n_miRNA):
for j in range(n_disease):
pair = [i,j]
if pair not in pos_samples:
neg_samples.append(pair)
labels.append(0)
new_pair_list = pos_samples
new_pair_list.extend(neg_samples)
print('len new_pair_list: ', len(new_pair_list))
# print('len(labels): ', len(labels))
return np.asarray(new_pair_list), np.asarray(labels)
random_seeds=[123,456,789,101,112]
hmdd2_dir = '../hmdd2/'
hmdd3_dir = '../hmdd3/'
hmdd2_savedir = hmdd2_dir + 'folds/'
hmdd3_savedir = hmdd3_dir + 'folds/'
gen_fold(hmdd2_dir, hmdd2_savedir)
gen_fold(hmdd3_dir, hmdd3_savedir)
\ No newline at end of file
......@@ -34,7 +34,7 @@ def get_dgroup_dist(disease_sim, group1, group2):
# The adj matrix is in the form of disease x miRNA
def cal_sim(adj_matrix, disease_semantic_sim_path, disease_semantic_missing_list, ori_disease_sim_path):
def cal_miRNA_func_sim(adj_matrix, disease_semantic_sim_path, disease_semantic_missing_list, ori_disease_sim_path):
GD, GM = calculate_gip(adj_matrix)
disease_sim = pd.read_csv(disease_semantic_sim_path, header=None).values
ori_disease_sim = pd.read_csv(ori_disease_sim_path, header=None).values
......@@ -79,4 +79,4 @@ if __name__ == '__main__':
args = parser.parse_args()
cal_sim(args.ADJ, args.DISEASE_SEMANTIC_SIM, args.NOT_FOUND_LIST, args.DISEASE_SIM)
cal_miRNA_func_sim(args.ADJ, args.DISEASE_SEMANTIC_SIM, args.NOT_FOUND_LIST, args.DISEASE_SIM)
import torch as t
from torch import nn, optim
from utility.utils import *
from data.hmdd2.miRNA_sim import *
import numpy as np
from nimcgcn.code.model2 import *
from multiprocess import Process, Queue
import pandas as pd
import os
from os import path
np.random.seed(1337)
random_seeds = [123, 456, 789, 101, 112]
sysdevice = t.device('cuda' if t.cuda.is_available() else 'cpu')
class Sizes(object):
def __init__(self, n_miRNA, n_disease):
self.m = n_miRNA
self.d = n_disease
self.fg = 256
self.fd = 256
self.k = 32
def get_edge_index(matrix):
edge_index = [[], []]
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if matrix[i][j] != 0:
edge_index[0].append(i)
edge_index[1].append(j)
returnTensor = t.LongTensor(edge_index)
if t.cuda.is_available():
returnTensor = returnTensor.cuda()
return returnTensor
def eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, sizes, epochs,
auc_queue,
auprc_queue, method, score_save_path, disease_sim, miRNA_sim):
print('Starting fold: ', foldIdx)
dataset = dict()
disease_sim_tensor = t.FloatTensor(disease_sim)
if t.cuda.is_available():
disease_sim_tensor = disease_sim_tensor.cuda()
dataset['dd'] = {'data': disease_sim_tensor}
miRNA_sim_tensor = t.FloatTensor(miRNA_sim)
if t.cuda.is_available():
miRNA_sim_tensor = miRNA_sim_tensor.cuda()
dataset['mm'] = {'data': miRNA_sim_tensor}
model = LinearLogistic(sizes)
optimizer = optim.Adam(model.parameters(), lr=0.01)
regression_crit = nn.BCEWithLogitsLoss()
if t.cuda.is_available():
model = model.cuda()
regression_crit = regression_crit.cuda()
model.train()
for epoch in range(0,epochs+1):
model.zero_grad()
train_tensor = t.LongTensor(train_pair_list)
if t.cuda.is_available():
train_tensor = train_tensor.cuda()
score = model(input=[dataset['dd'], dataset['mm']], pairs=train_tensor)
train_target = t.FloatTensor(train_pair_lbl).view(-1, 1)
if t.cuda.is_available():
train_target = train_target.cuda()
loss = regression_crit(score, train_target)
loss.backward()
optimizer.step()
loss_val = loss.item() if not t.cuda.is_available() else loss.cpu().item()
# eval
model.eval()
test_tensor = t.LongTensor(test_pair_list)
if t.cuda.is_available():
test_tensor = test_tensor.cuda()
test_score = model(input=[dataset['dd'], dataset['mm']], pairs=test_tensor).detach().numpy() if not t.cuda.is_available() else model(input=[dataset['dd'], dataset['mm']], pairs=test_tensor).cpu().detach().numpy()
test_pred_lbl = test_score.tolist()
join_list = [[pred,target] for pred, target in zip(test_pred_lbl, test_pair_lbl)]
df = pd.DataFrame(np.array(join_list))
df.to_csv(score_save_path, header=False, index=False) # save the predicted scores
auc_score, auprc_score = get_score(test_pair_lbl, test_pred_lbl)
print('foldIdx: ', foldIdx, 'auc_score: ', auc_score, 'auprc_score: ', auprc_score)
auc_queue.put(auc_score)
auprc_queue.put(auprc_score)
# evaluate nimcgcn model
def eval_nimcgcn(adj_path='/home/dong/mirna-disease/m-d.csv', disease_semantic_sim_path = '/home/dong/mirna-disease/disease_sim2.csv', epochs=3000, method='nimgcn'):
all_result_path = '/home/dong/mirna-disease/results_hmdd2/hmdd2_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
folds_data_dir = '/home/dong/mirna-disease/folds_hmdd2/'
writer = open(all_result_path, 'w')
for randseed in random_seeds:
folds_data = load_fold_data2(folds_data_dir, randseed, disease_semantic_sim_path)
foldIdx = 0
numFold = 5
auc_queue = Queue(numFold)
aupr_queue = Queue(numFold)
processList = list()
for train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, disease_sim, miRNA_sim in folds_data:
n_miRNA = miRNA_sim.shape[0]
n_disease = disease_sim.shape[0]
sizes = Sizes(n_miRNA, n_disease)
if not path.exists('/home/dong/mirna-disease/results_hmdd2'):
os.mkdir('/home/dong/mirna-disease/results_hmdd2')
save_dir = '/home/dong/mirna-disease/results_hmdd2/' + str(randseed) + '/'
if not path.exists(save_dir):
os.mkdir(save_dir)
foldIdx += 1
save_result_path = save_dir + str(foldIdx) + '_hmdd2_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
if t.cuda.is_available():
eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl,
sizes, epochs, auc_queue, aupr_queue, method, save_result_path, disease_sim, miRNA_sim)
else:
process = Process(target=eval_fold, args=(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl,
sizes, epochs, auc_queue, aupr_queue, method, save_result_path, disease_sim, miRNA_sim))
processList.append(process)
process.start()
if not t.cuda.is_available():
for process in processList:
process.join()
auc_list = [auc_queue.get() for i in range(numFold)]
auprc_list = [aupr_queue.get() for i in range(numFold)]
avg_auc = sum(auc_list)/len(auc_list)
avg_auprc = sum(auprc_list)/len(auprc_list)
print('NIMCGCN average performance: auc:', avg_auc, 'auprc: ', avg_auprc)
save_score_path = save_dir + str(randseed) + '_hmdd2_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
with open(save_score_path, 'a+') as f:
f.write('Fold,auc,auprc\n')
writer.write('Fold,auc,auprc\n')
for i in range(5):
f.write(',' + str(auc_list[i]) + ',' + str(auprc_list[i]) + '\n')
writer.write(',' + str(auc_list[i]) + ',' + str(auprc_list[i]) + '\n')
f.write('Average,' + str(avg_auc) + ',' + str(avg_auprc) + '\n')
writer.write('Average,' + str(avg_auc) + ',' + str(avg_auprc) + '\n')
writer.close()
methods = ['logistic']#, 'nimgcn_variance1', 'nimgcn_variance2','nimgcn_variance3'] # 'nimgcn', 'nimgcn_variance1', 'nimgcn_variance2',
for method in methods:
eval_nimcgcn(method=method)
eval_nimcgcn(disease_semantic_sim_path='/home/dong/mirna-disease/disease_sim.csv', method=method)
# correct way of calculation: AUC: 0.8688, AUPRC: 0.1667
import torch as t
from torch import nn, optim
from utility.utils import *
from data.hmdd2.miRNA_sim import *
import numpy as np
from nimcgcn.code.model2 import *
from multiprocess import Process, Queue
import pandas as pd
import os
from os import path
np.random.seed(1337)
random_seeds = [123, 456, 789, 101, 112]
sysdevice = t.device('cuda' if t.cuda.is_available() else 'cpu')
class Sizes(object):
def __init__(self, n_miRNA, n_disease):
self.m = n_miRNA
self.d = n_disease
self.fg = 256
self.fd = 256
self.k = 32
def get_edge_index(matrix):
edge_index = [[], []]
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if matrix[i][j] != 0:
edge_index[0].append(i)
edge_index[1].append(j)
returnTensor = t.LongTensor(edge_index)
if t.cuda.is_available():
returnTensor = returnTensor.cuda()
return returnTensor
def eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, sizes, epochs,
auc_queue,
auprc_queue, method, score_save_path, disease_sim, miRNA_sim):
print('Starting fold: ', foldIdx)
dataset = dict()
disease_sim_tensor = t.FloatTensor(disease_sim)
if t.cuda.is_available():
disease_sim_tensor = disease_sim_tensor.cuda()
dataset['dd'] = {'data': disease_sim_tensor}
miRNA_sim_tensor = t.FloatTensor(miRNA_sim)
if t.cuda.is_available():
miRNA_sim_tensor = miRNA_sim_tensor.cuda()
dataset['mm'] = {'data': miRNA_sim_tensor}
model = LinearLogistic(sizes)
optimizer = optim.Adam(model.parameters(), lr=0.01)
regression_crit = nn.BCEWithLogitsLoss()
if t.cuda.is_available():
model = model.cuda()
regression_crit = regression_crit.cuda()
model.train()
for epoch in range(0,epochs+1):
model.zero_grad()
train_tensor = t.LongTensor(train_pair_list)
if t.cuda.is_available():
train_tensor = train_tensor.cuda()
score = model(input=[dataset['dd'], dataset['mm']], pairs=train_tensor)
train_target = t.FloatTensor(train_pair_lbl).view(-1, 1)
if t.cuda.is_available():
train_target = train_target.cuda()
loss = regression_crit(score, train_target)
loss.backward()
optimizer.step()
loss_val = loss.item() if not t.cuda.is_available() else loss.cpu().item()
# eval
model.eval()
test_tensor = t.LongTensor(test_pair_list)
if t.cuda.is_available():
test_tensor = test_tensor.cuda()
test_score = model(input=[dataset['dd'], dataset['mm']], pairs=test_tensor).detach().numpy() if not t.cuda.is_available() else model(input=[dataset['dd'], dataset['mm']], pairs=test_tensor).cpu().detach().numpy()
test_pred_lbl = test_score.tolist()
join_list = [[pred,target] for pred, target in zip(test_pred_lbl, test_pair_lbl)]
df = pd.DataFrame(np.array(join_list))
df.to_csv(score_save_path, header=False, index=False) # save the predicted scores
auc_score, auprc_score = get_score(test_pair_lbl, test_pred_lbl)
print('foldIdx: ', foldIdx, 'auc_score: ', auc_score, 'auprc_score: ', auprc_score)
auc_queue.put(auc_score)
auprc_queue.put(auprc_score)
# evaluate nimcgcn model
def eval_nimcgcn(adj_path='/home/dong/mirna-disease/m-d.csv', disease_semantic_sim_path = '/home/dong/mirna-disease/disease_sim2.csv', epochs=3000, method='nimgcn'):
n_miRNA, n_disease, one_edge_list, zero_edge_list = load_data(adj_path)
sizes = Sizes(n_miRNA, n_disease)
all_result_path = '/home/dong/mirna-disease/results_hmdd2_fault/hmdd2_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
folds_data_dir = '/home/dong/mirna-disease/folds_hmdd2/'
writer = open(all_result_path, 'w')
n_miRNA, n_disease, one_edge_list, zero_edge_list = load_data(adj_path)
adj, zero_index, one_index = assoc_list_to_adj(n_disease, n_miRNA, one_edge_list, [1] * len(one_edge_list))
disease_sim, miRNA_sim = cal_sim(adj.T, disease_semantic_sim_path=disease_semantic_sim_path)
for randseed in random_seeds:
folds_data = load_fold_data2(folds_data_dir, randseed, disease_semantic_sim_path)
foldIdx = 0
numFold = 5
auc_queue = Queue(numFold)
aupr_queue = Queue(numFold)
processList = list()
for train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, disease_sim_correct, miRNA_sim_correct in folds_data:
if not path.exists('/home/dong/mirna-disease/results_hmdd2_fault'):
os.mkdir('/home/dong/mirna-disease/results_hmdd2_fault')
save_dir = '/home/dong/mirna-disease/results_hmdd2_fault/' + str(randseed) + '/'
if not path.exists(save_dir):
os.mkdir(save_dir)
foldIdx += 1
save_result_path = save_dir + str(foldIdx) + '_hmdd2_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
if t.cuda.is_available():
eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl,
sizes, epochs, auc_queue, aupr_queue, method, save_result_path, disease_sim, miRNA_sim)
else:
process = Process(target=eval_fold, args=(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl,
sizes, epochs, auc_queue, aupr_queue, method, save_result_path, disease_sim, miRNA_sim))
processList.append(process)
process.start()
if not t.cuda.is_available():
for process in processList:
process.join()
auc_list = [auc_queue.get() for i in range(numFold)]
auprc_list = [aupr_queue.get() for i in range(numFold)]
avg_auc = sum(auc_list)/len(auc_list)
avg_auprc = sum(auprc_list)/len(auprc_list)
print('NIMCGCN average performance: auc:', avg_auc, 'auprc: ', avg_auprc)
save_score_path = save_dir + str(randseed) + '_hmdd2_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
with open(save_score_path, 'a+') as f:
f.write('Fold,auc,auprc\n')
writer.write('Fold,auc,auprc\n')
for i in range(5):
f.write(',' + str(auc_list[i]) + ',' + str(auprc_list[i]) + '\n')
writer.write(',' + str(auc_list[i]) + ',' + str(auprc_list[i]) + '\n')
f.write('Average,' + str(avg_auc) + ',' + str(avg_auprc) + '\n')
writer.write('Average,' + str(avg_auc) + ',' + str(avg_auprc) + '\n')
writer.close()
methods = ['logistic']#, 'nimgcn_variance1', 'nimgcn_variance2','nimgcn_variance3'] # 'nimgcn', 'nimgcn_variance1', 'nimgcn_variance2',
for method in methods:
eval_nimcgcn(method=method)
eval_nimcgcn(disease_semantic_sim_path='/home/dong/mirna-disease/disease_sim.csv', method=method)
# correct way of calculation: AUC: 0.8688, AUPRC: 0.1667
import torch as t
from torch import optim
from utility.utils import *
from data.hmdd2.miRNA_sim import *
import numpy as np
from nimcgcn.code.model2 import *
from multiprocess import Process, Queue
import pandas as pd
import os
from os import path
from nimcgcn.code.loss import *
np.random.seed(1337)
random_seeds = [123, 456, 789, 101, 112]
sysdevice = t.device('cuda' if t.cuda.is_available() else 'cpu')
class Sizes(object):
def __init__(self, n_miRNA, n_disease):
self.m = n_miRNA
self.d = n_disease
self.fg = 256
self.fd = 256
self.k = 32
def get_edge_index(matrix):
edge_index = [[], []]
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if matrix[i][j] != 0:
edge_index[0].append(i)
edge_index[1].append(j)
returnTensor = t.LongTensor(edge_index)
if t.cuda.is_available():
returnTensor = returnTensor.cuda()
return returnTensor
def eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, sizes, epochs,
auc_queue,
auprc_queue, method, score_save_path, disease_sim, miRNA_sim):
print('Starting fold: ', foldIdx)
dataset = dict()