Commit 0b32da43 authored by Ngan Thi Dong's avatar Ngan Thi Dong
Browse files

refactor and remove redundant

parent fd451819
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from GIP import * from data.preparation.GIP import *
import argparse import argparse
...@@ -70,13 +70,13 @@ def cal_miRNA_func_sim(adj_matrix, disease_semantic_sim_path, disease_semantic_m ...@@ -70,13 +70,13 @@ def cal_miRNA_func_sim(adj_matrix, disease_semantic_sim_path, disease_semantic_m
return disease_sim, miRNA_sim return disease_sim, miRNA_sim
if __name__ == '__main__': # if __name__ == '__main__':
parser = argparse.ArgumentParser() # parser = argparse.ArgumentParser()
parser.add_argument('ADJ', type=str, help="adjacent matrix disease x miRNA format") # parser.add_argument('ADJ', type=str, help="adjacent matrix disease x miRNA format")
parser.add_argument('DISEASE_SEMANTIC_SIM', type=str, help="disease semantic sim path (disease sim2)") # parser.add_argument('DISEASE_SEMANTIC_SIM', type=str, help="disease semantic sim path (disease sim2)")
parser.add_argument('DISEASE_SIM', type=str, help="disease sim path") # parser.add_argument('DISEASE_SIM', type=str, help="disease sim path")
parser.add_argument('NOT_FOUND_LIST', type=str, help="disease semantic sim missing list path") # parser.add_argument('NOT_FOUND_LIST', type=str, help="disease semantic sim missing list path")
#
args = parser.parse_args() # args = parser.parse_args()
#
cal_miRNA_func_sim(args.ADJ, args.DISEASE_SEMANTIC_SIM, args.NOT_FOUND_LIST, args.DISEASE_SIM) # cal_miRNA_func_sim(args.ADJ, args.DISEASE_SEMANTIC_SIM, args.NOT_FOUND_LIST, args.DISEASE_SIM)
...@@ -2,7 +2,7 @@ import torch as t ...@@ -2,7 +2,7 @@ import torch as t
from torch import nn, optim from torch import nn, optim
from utility.utils import * from utility.utils import *
import numpy as np import numpy as np
from nimcgcn.code.model import * from nimcgcn.model import *
from multiprocess import Process, Queue from multiprocess import Process, Queue
import argparse import argparse
......
...@@ -4,8 +4,11 @@ import random ...@@ -4,8 +4,11 @@ import random
import os import os
import numpy as np import numpy as np
from GIP import * from utility.utils import *
from miRNA_sim import * from data.preparation.GIP import *
from data.preparation.miRNA_sim import *
import argparse
def adjMatrix2list(csv_path): def adjMatrix2list(csv_path):
vals = pd.read_csv(csv_path).values vals = pd.read_csv(csv_path).values
...@@ -29,7 +32,8 @@ def assocList2adjMat(pos_assoc, n_miRNA, n_disease): ...@@ -29,7 +32,8 @@ def assocList2adjMat(pos_assoc, n_miRNA, n_disease):
def gen_fold(data_dir, save_dir, numFold=5, negative_rate=1.0, randomseed=123): def gen_fold(data_dir, save_dir, numFold=5, negative_rate=1.0, randomseed=123):
# assoc_list, neglist data_dir = standardize_dir(data_dir)
save_dir = standardize_dir(save_dir)
adj_path = data_dir + 'm-d.csv' adj_path = data_dir + 'm-d.csv'
onto_disease_sim_path = data_dir + 'disease_sim.csv' onto_disease_sim_path = data_dir + 'disease_sim.csv'
onto_disease_sim_path2 = data_dir + 'disease_sim2.csv' onto_disease_sim_path2 = data_dir + 'disease_sim2.csv'
...@@ -147,11 +151,22 @@ def negative_sampling(pos_samples, n_miRNA, n_disease, negative_rate): ...@@ -147,11 +151,22 @@ def negative_sampling(pos_samples, n_miRNA, n_disease, negative_rate):
return np.asarray(new_pair_list), np.asarray(labels) return np.asarray(new_pair_list), np.asarray(labels)
# random_seeds=[123,456,789,101,112] # random_seeds=[123,456,789,101,112]
random_seeds = [123] # random_seeds = [123]
hmdd2_dir = '../hmdd2/' # hmdd2_dir = 'data/hmdd2/'
hmdd3_dir = '../hmdd3/' # hmdd3_dir = 'data/hmdd3/'
hmdd2_savedir = hmdd2_dir + 'folds/' # hmdd2_savedir = hmdd2_dir + 'folds/'
hmdd3_savedir = hmdd3_dir + 'folds/' # hmdd3_savedir = hmdd3_dir + 'folds/'
for randseed in random_seeds: # for randseed in random_seeds:
gen_fold(hmdd2_dir, hmdd2_savedir, randomseed=randseed) # gen_fold(hmdd2_dir, hmdd2_savedir, randomseed=randseed)
# gen_fold(hmdd3_dir, hmdd3_savedir, randomseed=randseed) # gen_fold(hmdd3_dir, hmdd3_savedir, randomseed=randseed)
parser = argparse.ArgumentParser(description='Neural based matrix completion for virus-host PPI')
parser.add_argument('--data_dir', default='/home/dong/simplifying_mirna_disease/hmdd2/', help='dataset directory')
parser.add_argument('--save_dir', default='/home/dong/simplifying_mirna_disease/hmdd2/folds/', help='dataset directory')
parser.add_argument('--randseed', default=456, help='the random seed')
parser.add_argument('--numFold', default=5, help='value of K for K-foldCV, default is 5')
parser.add_argument('--neg_rate', default=1.0, help='the negative sampling rate')
args = parser.parse_args()
gen_fold(args.data_dir, args.save_dir, args.numFold, args.neg_rate, args.randseed)
Copyright (C) 2019 Jin Li(lijin@ynu.edu.cn)
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, see http://www.gnu.org/licenses/.
Jin Li(lijin@ynu.edu.cn) School of Software, Yunnan University Kunming CHINA, 650000
NIMCGCN
NIMCGCN: Neural inductive matrix completion with graph convolutional networks for miRNA-disease association prediction (Bioinformatics).
Requirements
Pytorch (tested on version 1.1.1)
numpy (tested on version 1.16.2)
sklearn (tested on version 0.20.3)
Quick start
To reproduce our results:
Unzip data.zip in ./data.
Run main.py to RUN NIMCGCN.
Data description
d-d.csv:disease-disease similarity matrix.
m-m.csv: miRNA-miRNA similarity matrix.
disease name.csv: list of disease names.
miRNA name.csv: list of miRNA names
m-d.csv: miRNA-disease association matrix
# NIMCGCN
Copyright (C) 2019 Jin Li(lijin@ynu.edu.cn)
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, see http://www.gnu.org/licenses/.
Jin Li(lijin@ynu.edu.cn) School of Software, Yunnan University, Kunming CHINA, 650000
NIMCGCN
NIMCGCN: Neural inductive matrix completion with graph convolutional networks for miRNA-disease association prediction (Bioinformatics).
Requirements
Pytorch (tested on version 1.1.1)
numpy (tested on version 1.16.2)
sklearn (tested on version 0.20.3)
Quick start
To reproduce our results:
Unzip data.zip in ./data.
Run main.py to RUN NIMCGCN.
Data description
d-d.csv:disease-disease similarity matrix.
m-m.csv: miRNA-miRNA similarity matrix.
disease name.csv: list of disease names.
miRNA name.csv: list of miRNA names
m-d.csv: miRNA-disease association matrix
Citation information :
Jin Li, Sai Zhang, Tao Liu, Chenxi Ning, Zhuoxuan Zhang, Wei Zhou, Neural Inductive Matrix Completion with Graph Convolutional Networks for miRNA-disease Association Prediction, Bioinformatics, , btz965, https://doi.org/10.1093/bioinformatics/btz965
import csv
from ast import literal_eval as make_list
a = open("F:/GCN-IMC-MD/code/result.txt", "r")
strstr = a.readlines()
str1 = make_list(strstr[0])
str2 = make_list(strstr[1])
out1 = open('fpr_model.csv', 'w', newline='')
out2 = open('tpr_model.csv', 'w', newline='')
csv_write1 = csv.writer(out1, dialect="excel")
csv_write2 = csv.writer(out2, dialect="excel")
for i in str1:
csv_write1.writerow([str(i)])
for i in str2:
csv_write2.writerow([str(i)])
out1.close()
out2.close()
\ No newline at end of file
import torch as t
from torch import nn, optim
from utility.utils import *
from data.hmdd2.miRNA_sim import *
import numpy as np
from nimcgcn.code.model import *
from multiprocess import Process, Queue
np.random.seed(1337)
class Sizes(object):
def __init__(self, n_miRNA, n_disease):
self.m = n_miRNA
self.d = n_disease
self.fg = 256
self.fd = 256
self.k = 32
def get_edge_index(matrix):
edge_index = [[], []]
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if matrix[i][j] != 0:
edge_index[0].append(i)
edge_index[1].append(j)
return t.LongTensor(edge_index)
class Myloss(nn.Module):
def __init__(self, alpha=0.4):
super(Myloss, self).__init__()
self.alpha = alpha
def forward(self, one_index, zero_index, target, input):
loss = nn.MSELoss(reduction='none')
loss_sum = loss(input, target)
return (1-self.alpha)*loss_sum[one_index].sum()+self.alpha*loss_sum[zero_index].sum()
def eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, sizes, epochs, disease_sim, miRNA_sim,
auc_queue,
auprc_queue, method):
print('Starting fold: ', foldIdx)
adj, zero_index, one_index = assoc_list_to_adj(n_disease, n_miRNA, train_pair_list, train_pair_lbl)
# as
# disease
zero_tensor = t.LongTensor(zero_index)
one_tensor = t.LongTensor(one_index)
dataset = dict()
dataset['md'] = dict()
dataset['md']['train'] = [one_tensor, zero_tensor]
dd_edge_index = get_edge_index(disease_sim)
dataset['dd'] = {'data': t.FloatTensor(disease_sim), 'edge_index': dd_edge_index}
mm_edge_index = get_edge_index(miRNA_sim)
dataset['mm'] = {'data': t.FloatTensor(miRNA_sim), 'edge_index': mm_edge_index}
if method == 'nimgcn_variance1':
model = SimpleModel1(sizes)
elif method == 'nimgcn_variance2':
model = SimpleModel2(sizes)
elif method == 'nimgcn_variance3':
model = SimpleModel3(sizes)
else:
model = Model(sizes)
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.train()
regression_crit = Myloss()
for epoch in range(0,epochs+1):
model.zero_grad()
score = model([dataset['dd'], dataset['mm']])
loss = regression_crit(one_index, zero_index, t.FloatTensor(adj), score)
loss.backward()
optimizer.step()
print('Epoch: ', epoch, ' loss: ', loss.item()/(len(one_index[0])+len(zero_index[0])))
# eval
model.eval()
test_score = model([dataset['dd'], dataset['mm']]).detach().numpy()
test_pred_lbl = list()
for pair in test_pair_list:
test_pred_lbl.append(test_score[pair[0], pair[1]])
auc_score, auprc_score = get_score(test_pair_lbl, test_pred_lbl)
print('foldIdx: ', foldIdx, 'auc_score: ', auc_score, 'auprc_score: ', auprc_score)
auc_queue.put(auc_score)
auprc_queue.put(auprc_score)
# evaluate nimcgcn model
def eval_nimcgcn(adj_path='./m-d.csv', disease_semantic_sim_path = './disease_sim2.csv', negrate=1.0, epochs=300, method='nimcgcn'):
n_miRNA, n_disease, one_edge_list, zero_edge_list = load_data(adj_path)
adj, zero_index, one_index = assoc_list_to_adj(n_disease, n_miRNA, one_edge_list, [1] * len(one_edge_list))
disease_sim, miRNA_sim = cal_sim(adj.T, disease_semantic_sim_path=disease_semantic_sim_path)# our current adj is miRNA: row, but the function
# take the row
sizes = Sizes(n_miRNA, n_disease)
folds_data = randomSplitTrainTestArray(one_edge_list, zero_edge_list, n_disease, n_miRNA, 5, negrate)
foldIdx = 0
numFold = 5
auc_queue = Queue(numFold)
aupr_queue = Queue(numFold)
processList = list()
for train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl in folds_data:
foldIdx += 1
process = Process(target=eval_fold, args=(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl,
sizes, epochs, disease_sim, miRNA_sim, auc_queue, aupr_queue, method))
processList.append(process)
process.start()
for process in processList:
process.join()
auc_list = [auc_queue.get() for i in range(numFold)]
auprc_list = [aupr_queue.get() for i in range(numFold)]
avg_auc = sum(auc_list)/len(auc_list)
avg_auprc = sum(auprc_list)/len(auprc_list)
print('NIMCGCN average performance: auc:', avg_auc, 'auprc: ', avg_auprc)
with open(method + 'Fault_' + disease_semantic_sim_path[2:], 'w') as f:
f.write('Fold,auc,auprc\n')
for i in range(5):
f.write(',' + str(auc_list[i]) + ',' + str(auprc_list[i]) + '\n')
f.write('Average,' + str(avg_auc) + ',' + str(avg_auprc) + '\n')
methods = ['nimgcn_variance1', 'nimgcn_variance2', 'nimgcn_variance3'] # 'nimgcn'
for method in methods:
eval_nimcgcn(method=method)
eval_nimcgcn(disease_semantic_sim_path='../../../../data/hmdd2/disease_sim.csv', method=method)
# faulty way of calculation NIMCGCN average performance: auc: 0.9103700006895264 auprc: 0.23458280617241423
import torch as t
from torch import nn, optim
from data.preparation.miRNA_sim import *
import numpy as np
from nimcgcn.code.model import *
from multiprocess import Process, Queue
import pandas as pd
import os
import os.path as path
np.random.seed(1337)
random_seeds = [123, 456, 789, 101, 112]
sysdevice = t.device('cuda' if t.cuda.is_available() else 'cpu')
class Sizes(object):
def __init__(self, n_miRNA, n_disease):
self.m = n_miRNA
self.d = n_disease
self.fg = 256
self.fd = 256
self.k = 32
def get_edge_index(matrix):
edge_index = [[], []]
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if matrix[i][j] != 0:
edge_index[0].append(i)
edge_index[1].append(j)
rtensor = t.LongTensor(edge_index)
if t.cuda.is_available():
rtensor = rtensor.cuda()
return rtensor
class Myloss(nn.Module):
def __init__(self, alpha=0.4):
super(Myloss, self).__init__()
self.alpha = alpha
def forward(self, one_index, zero_index, target, input):
loss = nn.MSELoss(reduction='none')
loss_sum = loss(input, target)
return (1-self.alpha)*loss_sum[one_index].sum()+self.alpha*loss_sum[zero_index].sum()
def eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, sizes, epochs,
auc_queue,
auprc_queue, method, save_result_path, disease_sim, miRNA_sim):
print('Starting fold: ', foldIdx)
adj, zero_index, one_index = assoc_list_to_adj(n_disease, n_miRNA, train_pair_list, train_pair_lbl)
# disease_sim, miRNA_sim = cal_sim(adj.T, disease_semantic_sim_path=disease_semantic_sim_path, disease_semantic_missing_list=disease_semantic_missing_list, ori_disease_sim_path = ori_semantic_path)# our current adj is miRNA: row, but the function take the row
# as
# disease
zero_tensor = t.LongTensor(zero_index)
one_tensor = t.LongTensor(one_index)
if t.cuda.is_available():
zero_tensor = zero_tensor.cuda()
one_tensor = one_tensor.cuda()
dataset = dict()
dataset['md'] = dict()
dataset['md']['train'] = [one_tensor, zero_tensor]
dd_edge_index = get_edge_index(disease_sim)
disease_sim_tensor = t.FloatTensor(disease_sim)
if t.cuda.is_available():
disease_sim_tensor = disease_sim_tensor.cuda()
dataset['dd'] = {'data': disease_sim_tensor, 'edge_index': dd_edge_index}
mm_edge_index = get_edge_index(miRNA_sim)
miRNA_sim_tensor = t.FloatTensor(miRNA_sim)
if t.cuda.is_available():
miRNA_sim_tensor = miRNA_sim_tensor.cuda()
dataset['mm'] = {'data': miRNA_sim_tensor, 'edge_index': mm_edge_index}
if method == 'nimgcn_variance1':
model = SimpleModel1(sizes)
elif method == 'nimgcn_variance2':
model = SimpleModel2(sizes)
elif method == 'nimgcn_variance3':
model = SimpleModel3(sizes)
else:
model = Model(sizes)
optimizer = optim.Adam(model.parameters(), lr=0.001)
regression_crit = Myloss()
if t.cuda.is_available():
model = model.cuda()
#optimizer = optimizer.cuda()
regression_crit = regression_crit.cuda()
model.train()
for epoch in range(0,epochs+1):
model.zero_grad()
score = model(input=[dataset['dd'], dataset['mm']], device=sysdevice)
adj_tensor = t.FloatTensor(adj)
if t.cuda.is_available():
adj_tensor = adj_tensor.cuda()
loss = regression_crit(one_index, zero_index, adj_tensor, score)
loss.backward()
optimizer.step()
loss_val = loss.item() if not t.cuda.is_available() else loss.cpu().item()
print('Epoch: ', epoch, ' loss: ', loss_val/(len(one_index[0])+len(zero_index[0])))
# eval
model.eval()
test_score = model([dataset['dd'], dataset['mm']], sysdevice).detach().numpy() if not t.cuda.is_available() else model([dataset['dd'], dataset['mm']], sysdevice).cpu().detach().numpy()
test_pred_lbl = list()
for pair in test_pair_list:
test_pred_lbl.append(test_score[pair[0], pair[1]])
join_list = [[pred,target] for pred, target in zip(test_pred_lbl, test_pair_lbl)]
df = pd.DataFrame(np.array(join_list))
df.to_csv(save_result_path, header=False, index=False) # save the predicted scores
auc_score, auprc_score = get_score(test_pair_lbl, test_pred_lbl)
print('foldIdx: ', foldIdx, 'auc_score: ', auc_score, 'auprc_score: ', auprc_score)
auc_queue.put(auc_score)
auprc_queue.put(auprc_score)
# evaluate nimcgcn model
def eval_nimcgcn(adj_path='/home/dong/mirna-disease/hmdd3/hmdd3assoc.csv', disease_semantic_sim_path = '/home/dong/mirna-disease/hmdd3/hmdd3_disease_sim2.csv', epochs=3000, method='nimgcn'):
n_miRNA, n_disease, one_edge_list, zero_edge_list = load_data(adj_path)
sizes = Sizes(n_miRNA, n_disease)
all_result_path = '/home/dong/mirna-disease/results_hmdd3_fault/hmdd3_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
folds_data_dir = '/home/dong/mirna-disease/folds/'
writer = open(all_result_path, 'w')
n_miRNA, n_disease, one_edge_list, zero_edge_list = load_data(adj_path)
adj, zero_index, one_index = assoc_list_to_adj(n_disease, n_miRNA, one_edge_list, [1] * len(one_edge_list))
disease_sim, miRNA_sim = cal_sim(adj.T, disease_semantic_sim_path=disease_semantic_sim_path)
for randseed in random_seeds:
folds_data = load_fold_data2(folds_data_dir, randseed, disease_semantic_sim_path)
foldIdx = 0
numFold = 5
auc_queue = Queue(numFold)
aupr_queue = Queue(numFold)
processList = list()
for train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, disease_sim_correct, miRNA_sim_correct in folds_data:
if not path.exists('/home/dong/mirna-disease/results_hmdd3_fault'):
os.mkdir('/home/dong/mirna-disease/results_hmdd3_fault')
save_dir = '/home/dong/mirna-disease/results_hmdd3_fault/' + str(randseed) + '/'
if not path.exists(save_dir):
os.mkdir(save_dir)
foldIdx += 1
save_result_path = save_dir + str(foldIdx) + '_hmdd3_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
if t.cuda.is_available():
eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl,
sizes, epochs, auc_queue, aupr_queue, method, save_result_path, disease_sim, miRNA_sim)
else:
process = Process(target=eval_fold, args=(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl,
sizes, epochs, auc_queue, aupr_queue, method, save_result_path, disease_sim, miRNA_sim))
processList.append(process)
process.start()
if not t.cuda.is_available():
for process in processList:
process.join()
auc_list = [auc_queue.get() for i in range(numFold)]
auprc_list = [aupr_queue.get() for i in range(numFold)]
avg_auc = sum(auc_list)/len(auc_list)
avg_auprc = sum(auprc_list)/len(auprc_list)
print('NIMCGCN average performance: auc:', avg_auc, 'auprc: ', avg_auprc)
save_score_path = save_dir + str(randseed) + '_hmdd3_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
with open(save_score_path, 'a+') as f:
f.write('Fold,auc,auprc\n')
writer.write('Fold,auc,auprc\n')
for i in range(5):
f.write(',' + str(auc_list[i]) + ',' + str(auprc_list[i]) + '\n')
writer.write(',' + str(auc_list[i]) + ',' + str(auprc_list[i]) + '\n')
f.write('Average,' + str(avg_auc) + ',' + str(avg_auprc) + '\n')
writer.write('Average,' + str(avg_auc) + ',' + str(avg_auprc) + '\n')
writer.close()
methods = ['nimgcn_variance1', 'nimgcn_variance2', 'nimgcn_variance3'] # 'nimgcn'
for method in methods:
eval_nimcgcn(method=method)
eval_nimcgcn(disease_semantic_sim_path='../../../../data/hmdd2/disease_sim.csv', method=method)
# faulty way of calculation NIMCGCN average performance: auc: 0.9103700006895264 auprc: 0.23458280617241423
import torch as t
from torch import nn, optim
from utility.utils import *
from data.hmdd2.miRNA_sim import *
import numpy as np
from nimcgcn.code.model2 import *
from multiprocess import Process, Queue
np.random.seed(1337)
class Sizes(object):
def __init__(self, n_miRNA, n_disease):
self.m = n_miRNA
self.d = n_disease
self.fg = 256
self.fd = 256
self.k = 32
def get_edge_index(matrix):
edge_index = [[], []]
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if matrix[i][j] != 0:
edge_index[0].append(i)
edge_index[1].append(j)
return t.LongTensor(edge_index)
class Myloss(nn.Module):
def __init__(self, alpha=0.4):
super(Myloss, self).__init__()
self.alpha = alpha
def forward(self, one_index, zero_index, target, input):
loss = nn.MSELoss(reduction='none')
loss_sum = loss(input, target)
return (1-self.alpha)*loss_sum[one_index].sum()+self.alpha*loss_sum[zero_index].sum()
def eval_fold(foldIdx, disease_sim, miRNA_sim, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, sizes, epochs, disease_semantic_sim_path,
auc_queue,
auprc_queue):
print('Starting fold: ', foldIdx)
# as
# disease
dataset = dict()
dd_edge_index = get_edge_index(disease_sim)
dataset['dd'] = {'data': t.FloatTensor(disease_sim), 'edge_index': dd_edge_index}
mm_edge_index = get_edge_index(miRNA_sim)
dataset['mm'] = {'data': t.FloatTensor(miRNA_sim), 'edge_index': mm_edge_index}