Commit 0b32da43 authored by Ngan Thi Dong's avatar Ngan Thi Dong

refactor and remove redundant

parent fd451819
import pandas as pd
import numpy as np
from GIP import *
from data.preparation.GIP import *
import argparse
......@@ -70,13 +70,13 @@ def cal_miRNA_func_sim(adj_matrix, disease_semantic_sim_path, disease_semantic_m
return disease_sim, miRNA_sim
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('ADJ', type=str, help="adjacent matrix disease x miRNA format")
parser.add_argument('DISEASE_SEMANTIC_SIM', type=str, help="disease semantic sim path (disease sim2)")
parser.add_argument('DISEASE_SIM', type=str, help="disease sim path")
parser.add_argument('NOT_FOUND_LIST', type=str, help="disease semantic sim missing list path")
args = parser.parse_args()
cal_miRNA_func_sim(args.ADJ, args.DISEASE_SEMANTIC_SIM, args.NOT_FOUND_LIST, args.DISEASE_SIM)
# if __name__ == '__main__':
# parser = argparse.ArgumentParser()
# parser.add_argument('ADJ', type=str, help="adjacent matrix disease x miRNA format")
# parser.add_argument('DISEASE_SEMANTIC_SIM', type=str, help="disease semantic sim path (disease sim2)")
# parser.add_argument('DISEASE_SIM', type=str, help="disease sim path")
# parser.add_argument('NOT_FOUND_LIST', type=str, help="disease semantic sim missing list path")
#
# args = parser.parse_args()
#
# cal_miRNA_func_sim(args.ADJ, args.DISEASE_SEMANTIC_SIM, args.NOT_FOUND_LIST, args.DISEASE_SIM)
......@@ -2,7 +2,7 @@ import torch as t
from torch import nn, optim
from utility.utils import *
import numpy as np
from nimcgcn.code.model import *
from nimcgcn.model import *
from multiprocess import Process, Queue
import argparse
......
......@@ -4,8 +4,11 @@ import random
import os
import numpy as np
from GIP import *
from miRNA_sim import *
from utility.utils import *
from data.preparation.GIP import *
from data.preparation.miRNA_sim import *
import argparse
def adjMatrix2list(csv_path):
vals = pd.read_csv(csv_path).values
......@@ -29,7 +32,8 @@ def assocList2adjMat(pos_assoc, n_miRNA, n_disease):
def gen_fold(data_dir, save_dir, numFold=5, negative_rate=1.0, randomseed=123):
# assoc_list, neglist
data_dir = standardize_dir(data_dir)
save_dir = standardize_dir(save_dir)
adj_path = data_dir + 'm-d.csv'
onto_disease_sim_path = data_dir + 'disease_sim.csv'
onto_disease_sim_path2 = data_dir + 'disease_sim2.csv'
......@@ -147,11 +151,22 @@ def negative_sampling(pos_samples, n_miRNA, n_disease, negative_rate):
return np.asarray(new_pair_list), np.asarray(labels)
# random_seeds=[123,456,789,101,112]
random_seeds = [123]
hmdd2_dir = '../hmdd2/'
hmdd3_dir = '../hmdd3/'
hmdd2_savedir = hmdd2_dir + 'folds/'
hmdd3_savedir = hmdd3_dir + 'folds/'
for randseed in random_seeds:
gen_fold(hmdd2_dir, hmdd2_savedir, randomseed=randseed)
# random_seeds = [123]
# hmdd2_dir = 'data/hmdd2/'
# hmdd3_dir = 'data/hmdd3/'
# hmdd2_savedir = hmdd2_dir + 'folds/'
# hmdd3_savedir = hmdd3_dir + 'folds/'
# for randseed in random_seeds:
# gen_fold(hmdd2_dir, hmdd2_savedir, randomseed=randseed)
# gen_fold(hmdd3_dir, hmdd3_savedir, randomseed=randseed)
parser = argparse.ArgumentParser(description='Neural based matrix completion for virus-host PPI')
parser.add_argument('--data_dir', default='/home/dong/simplifying_mirna_disease/hmdd2/', help='dataset directory')
parser.add_argument('--save_dir', default='/home/dong/simplifying_mirna_disease/hmdd2/folds/', help='dataset directory')
parser.add_argument('--randseed', default=456, help='the random seed')
parser.add_argument('--numFold', default=5, help='value of K for K-foldCV, default is 5')
parser.add_argument('--neg_rate', default=1.0, help='the negative sampling rate')
args = parser.parse_args()
gen_fold(args.data_dir, args.save_dir, args.numFold, args.neg_rate, args.randseed)
Copyright (C) 2019 Jin Li(lijin@ynu.edu.cn)
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, see http://www.gnu.org/licenses/.
Jin Li(lijin@ynu.edu.cn) School of Software, Yunnan University Kunming CHINA, 650000
NIMCGCN
NIMCGCN: Neural inductive matrix completion with graph convolutional networks for miRNA-disease association prediction (Bioinformatics).
Requirements
Pytorch (tested on version 1.1.1)
numpy (tested on version 1.16.2)
sklearn (tested on version 0.20.3)
Quick start
To reproduce our results:
Unzip data.zip in ./data.
Run main.py to RUN NIMCGCN.
Data description
d-d.csv:disease-disease similarity matrix.
m-m.csv: miRNA-miRNA similarity matrix.
disease name.csv: list of disease names.
miRNA name.csv: list of miRNA names
m-d.csv: miRNA-disease association matrix
# NIMCGCN
Copyright (C) 2019 Jin Li(lijin@ynu.edu.cn)
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program; if not, see http://www.gnu.org/licenses/.
Jin Li(lijin@ynu.edu.cn) School of Software, Yunnan University, Kunming CHINA, 650000
NIMCGCN
NIMCGCN: Neural inductive matrix completion with graph convolutional networks for miRNA-disease association prediction (Bioinformatics).
Requirements
Pytorch (tested on version 1.1.1)
numpy (tested on version 1.16.2)
sklearn (tested on version 0.20.3)
Quick start
To reproduce our results:
Unzip data.zip in ./data.
Run main.py to RUN NIMCGCN.
Data description
d-d.csv:disease-disease similarity matrix.
m-m.csv: miRNA-miRNA similarity matrix.
disease name.csv: list of disease names.
miRNA name.csv: list of miRNA names
m-d.csv: miRNA-disease association matrix
Citation information :
Jin Li, Sai Zhang, Tao Liu, Chenxi Ning, Zhuoxuan Zhang, Wei Zhou, Neural Inductive Matrix Completion with Graph Convolutional Networks for miRNA-disease Association Prediction, Bioinformatics, , btz965, https://doi.org/10.1093/bioinformatics/btz965
import csv
from ast import literal_eval as make_list
a = open("F:/GCN-IMC-MD/code/result.txt", "r")
strstr = a.readlines()
str1 = make_list(strstr[0])
str2 = make_list(strstr[1])
out1 = open('fpr_model.csv', 'w', newline='')
out2 = open('tpr_model.csv', 'w', newline='')
csv_write1 = csv.writer(out1, dialect="excel")
csv_write2 = csv.writer(out2, dialect="excel")
for i in str1:
csv_write1.writerow([str(i)])
for i in str2:
csv_write2.writerow([str(i)])
out1.close()
out2.close()
\ No newline at end of file
import torch as t
from torch import nn, optim
from utility.utils import *
from data.hmdd2.miRNA_sim import *
import numpy as np
from nimcgcn.code.model import *
from multiprocess import Process, Queue
np.random.seed(1337)
class Sizes(object):
def __init__(self, n_miRNA, n_disease):
self.m = n_miRNA
self.d = n_disease
self.fg = 256
self.fd = 256
self.k = 32
def get_edge_index(matrix):
edge_index = [[], []]
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if matrix[i][j] != 0:
edge_index[0].append(i)
edge_index[1].append(j)
return t.LongTensor(edge_index)
class Myloss(nn.Module):
def __init__(self, alpha=0.4):
super(Myloss, self).__init__()
self.alpha = alpha
def forward(self, one_index, zero_index, target, input):
loss = nn.MSELoss(reduction='none')
loss_sum = loss(input, target)
return (1-self.alpha)*loss_sum[one_index].sum()+self.alpha*loss_sum[zero_index].sum()
def eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, sizes, epochs, disease_sim, miRNA_sim,
auc_queue,
auprc_queue, method):
print('Starting fold: ', foldIdx)
adj, zero_index, one_index = assoc_list_to_adj(n_disease, n_miRNA, train_pair_list, train_pair_lbl)
# as
# disease
zero_tensor = t.LongTensor(zero_index)
one_tensor = t.LongTensor(one_index)
dataset = dict()
dataset['md'] = dict()
dataset['md']['train'] = [one_tensor, zero_tensor]
dd_edge_index = get_edge_index(disease_sim)
dataset['dd'] = {'data': t.FloatTensor(disease_sim), 'edge_index': dd_edge_index}
mm_edge_index = get_edge_index(miRNA_sim)
dataset['mm'] = {'data': t.FloatTensor(miRNA_sim), 'edge_index': mm_edge_index}
if method == 'nimgcn_variance1':
model = SimpleModel1(sizes)
elif method == 'nimgcn_variance2':
model = SimpleModel2(sizes)
elif method == 'nimgcn_variance3':
model = SimpleModel3(sizes)
else:
model = Model(sizes)
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.train()
regression_crit = Myloss()
for epoch in range(0,epochs+1):
model.zero_grad()
score = model([dataset['dd'], dataset['mm']])
loss = regression_crit(one_index, zero_index, t.FloatTensor(adj), score)
loss.backward()
optimizer.step()
print('Epoch: ', epoch, ' loss: ', loss.item()/(len(one_index[0])+len(zero_index[0])))
# eval
model.eval()
test_score = model([dataset['dd'], dataset['mm']]).detach().numpy()
test_pred_lbl = list()
for pair in test_pair_list:
test_pred_lbl.append(test_score[pair[0], pair[1]])
auc_score, auprc_score = get_score(test_pair_lbl, test_pred_lbl)
print('foldIdx: ', foldIdx, 'auc_score: ', auc_score, 'auprc_score: ', auprc_score)
auc_queue.put(auc_score)
auprc_queue.put(auprc_score)
# evaluate nimcgcn model
def eval_nimcgcn(adj_path='./m-d.csv', disease_semantic_sim_path = './disease_sim2.csv', negrate=1.0, epochs=300, method='nimcgcn'):
n_miRNA, n_disease, one_edge_list, zero_edge_list = load_data(adj_path)
adj, zero_index, one_index = assoc_list_to_adj(n_disease, n_miRNA, one_edge_list, [1] * len(one_edge_list))
disease_sim, miRNA_sim = cal_sim(adj.T, disease_semantic_sim_path=disease_semantic_sim_path)# our current adj is miRNA: row, but the function
# take the row
sizes = Sizes(n_miRNA, n_disease)
folds_data = randomSplitTrainTestArray(one_edge_list, zero_edge_list, n_disease, n_miRNA, 5, negrate)
foldIdx = 0
numFold = 5
auc_queue = Queue(numFold)
aupr_queue = Queue(numFold)
processList = list()
for train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl in folds_data:
foldIdx += 1
process = Process(target=eval_fold, args=(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl,
sizes, epochs, disease_sim, miRNA_sim, auc_queue, aupr_queue, method))
processList.append(process)
process.start()
for process in processList:
process.join()
auc_list = [auc_queue.get() for i in range(numFold)]
auprc_list = [aupr_queue.get() for i in range(numFold)]
avg_auc = sum(auc_list)/len(auc_list)
avg_auprc = sum(auprc_list)/len(auprc_list)
print('NIMCGCN average performance: auc:', avg_auc, 'auprc: ', avg_auprc)
with open(method + 'Fault_' + disease_semantic_sim_path[2:], 'w') as f:
f.write('Fold,auc,auprc\n')
for i in range(5):
f.write(',' + str(auc_list[i]) + ',' + str(auprc_list[i]) + '\n')
f.write('Average,' + str(avg_auc) + ',' + str(avg_auprc) + '\n')
methods = ['nimgcn_variance1', 'nimgcn_variance2', 'nimgcn_variance3'] # 'nimgcn'
for method in methods:
eval_nimcgcn(method=method)
eval_nimcgcn(disease_semantic_sim_path='../../../../data/hmdd2/disease_sim.csv', method=method)
# faulty way of calculation NIMCGCN average performance: auc: 0.9103700006895264 auprc: 0.23458280617241423
import torch as t
from torch import nn, optim
from data.preparation.miRNA_sim import *
import numpy as np
from nimcgcn.code.model import *
from multiprocess import Process, Queue
import pandas as pd
import os
import os.path as path
np.random.seed(1337)
random_seeds = [123, 456, 789, 101, 112]
sysdevice = t.device('cuda' if t.cuda.is_available() else 'cpu')
class Sizes(object):
def __init__(self, n_miRNA, n_disease):
self.m = n_miRNA
self.d = n_disease
self.fg = 256
self.fd = 256
self.k = 32
def get_edge_index(matrix):
edge_index = [[], []]
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if matrix[i][j] != 0:
edge_index[0].append(i)
edge_index[1].append(j)
rtensor = t.LongTensor(edge_index)
if t.cuda.is_available():
rtensor = rtensor.cuda()
return rtensor
class Myloss(nn.Module):
def __init__(self, alpha=0.4):
super(Myloss, self).__init__()
self.alpha = alpha
def forward(self, one_index, zero_index, target, input):
loss = nn.MSELoss(reduction='none')
loss_sum = loss(input, target)
return (1-self.alpha)*loss_sum[one_index].sum()+self.alpha*loss_sum[zero_index].sum()
def eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, sizes, epochs,
auc_queue,
auprc_queue, method, save_result_path, disease_sim, miRNA_sim):
print('Starting fold: ', foldIdx)
adj, zero_index, one_index = assoc_list_to_adj(n_disease, n_miRNA, train_pair_list, train_pair_lbl)
# disease_sim, miRNA_sim = cal_sim(adj.T, disease_semantic_sim_path=disease_semantic_sim_path, disease_semantic_missing_list=disease_semantic_missing_list, ori_disease_sim_path = ori_semantic_path)# our current adj is miRNA: row, but the function take the row
# as
# disease
zero_tensor = t.LongTensor(zero_index)
one_tensor = t.LongTensor(one_index)
if t.cuda.is_available():
zero_tensor = zero_tensor.cuda()
one_tensor = one_tensor.cuda()
dataset = dict()
dataset['md'] = dict()
dataset['md']['train'] = [one_tensor, zero_tensor]
dd_edge_index = get_edge_index(disease_sim)
disease_sim_tensor = t.FloatTensor(disease_sim)
if t.cuda.is_available():
disease_sim_tensor = disease_sim_tensor.cuda()
dataset['dd'] = {'data': disease_sim_tensor, 'edge_index': dd_edge_index}
mm_edge_index = get_edge_index(miRNA_sim)
miRNA_sim_tensor = t.FloatTensor(miRNA_sim)
if t.cuda.is_available():
miRNA_sim_tensor = miRNA_sim_tensor.cuda()
dataset['mm'] = {'data': miRNA_sim_tensor, 'edge_index': mm_edge_index}
if method == 'nimgcn_variance1':
model = SimpleModel1(sizes)
elif method == 'nimgcn_variance2':
model = SimpleModel2(sizes)
elif method == 'nimgcn_variance3':
model = SimpleModel3(sizes)
else:
model = Model(sizes)
optimizer = optim.Adam(model.parameters(), lr=0.001)
regression_crit = Myloss()
if t.cuda.is_available():
model = model.cuda()
#optimizer = optimizer.cuda()
regression_crit = regression_crit.cuda()
model.train()
for epoch in range(0,epochs+1):
model.zero_grad()
score = model(input=[dataset['dd'], dataset['mm']], device=sysdevice)
adj_tensor = t.FloatTensor(adj)
if t.cuda.is_available():
adj_tensor = adj_tensor.cuda()
loss = regression_crit(one_index, zero_index, adj_tensor, score)
loss.backward()
optimizer.step()
loss_val = loss.item() if not t.cuda.is_available() else loss.cpu().item()
print('Epoch: ', epoch, ' loss: ', loss_val/(len(one_index[0])+len(zero_index[0])))
# eval
model.eval()
test_score = model([dataset['dd'], dataset['mm']], sysdevice).detach().numpy() if not t.cuda.is_available() else model([dataset['dd'], dataset['mm']], sysdevice).cpu().detach().numpy()
test_pred_lbl = list()
for pair in test_pair_list:
test_pred_lbl.append(test_score[pair[0], pair[1]])
join_list = [[pred,target] for pred, target in zip(test_pred_lbl, test_pair_lbl)]
df = pd.DataFrame(np.array(join_list))
df.to_csv(save_result_path, header=False, index=False) # save the predicted scores
auc_score, auprc_score = get_score(test_pair_lbl, test_pred_lbl)
print('foldIdx: ', foldIdx, 'auc_score: ', auc_score, 'auprc_score: ', auprc_score)
auc_queue.put(auc_score)
auprc_queue.put(auprc_score)
# evaluate nimcgcn model
def eval_nimcgcn(adj_path='/home/dong/mirna-disease/hmdd3/hmdd3assoc.csv', disease_semantic_sim_path = '/home/dong/mirna-disease/hmdd3/hmdd3_disease_sim2.csv', epochs=3000, method='nimgcn'):
n_miRNA, n_disease, one_edge_list, zero_edge_list = load_data(adj_path)
sizes = Sizes(n_miRNA, n_disease)
all_result_path = '/home/dong/mirna-disease/results_hmdd3_fault/hmdd3_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
folds_data_dir = '/home/dong/mirna-disease/folds/'
writer = open(all_result_path, 'w')
n_miRNA, n_disease, one_edge_list, zero_edge_list = load_data(adj_path)
adj, zero_index, one_index = assoc_list_to_adj(n_disease, n_miRNA, one_edge_list, [1] * len(one_edge_list))
disease_sim, miRNA_sim = cal_sim(adj.T, disease_semantic_sim_path=disease_semantic_sim_path)
for randseed in random_seeds:
folds_data = load_fold_data2(folds_data_dir, randseed, disease_semantic_sim_path)
foldIdx = 0
numFold = 5
auc_queue = Queue(numFold)
aupr_queue = Queue(numFold)
processList = list()
for train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, disease_sim_correct, miRNA_sim_correct in folds_data:
if not path.exists('/home/dong/mirna-disease/results_hmdd3_fault'):
os.mkdir('/home/dong/mirna-disease/results_hmdd3_fault')
save_dir = '/home/dong/mirna-disease/results_hmdd3_fault/' + str(randseed) + '/'
if not path.exists(save_dir):
os.mkdir(save_dir)
foldIdx += 1
save_result_path = save_dir + str(foldIdx) + '_hmdd3_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
if t.cuda.is_available():
eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl,
sizes, epochs, auc_queue, aupr_queue, method, save_result_path, disease_sim, miRNA_sim)
else:
process = Process(target=eval_fold, args=(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl,
sizes, epochs, auc_queue, aupr_queue, method, save_result_path, disease_sim, miRNA_sim))
processList.append(process)
process.start()
if not t.cuda.is_available():
for process in processList:
process.join()
auc_list = [auc_queue.get() for i in range(numFold)]
auprc_list = [aupr_queue.get() for i in range(numFold)]
avg_auc = sum(auc_list)/len(auc_list)
avg_auprc = sum(auprc_list)/len(auprc_list)
print('NIMCGCN average performance: auc:', avg_auc, 'auprc: ', avg_auprc)
save_score_path = save_dir + str(randseed) + '_hmdd3_' + method + '_' + disease_semantic_sim_path[disease_semantic_sim_path.rfind('/') + 1:]
with open(save_score_path, 'a+') as f:
f.write('Fold,auc,auprc\n')
writer.write('Fold,auc,auprc\n')
for i in range(5):
f.write(',' + str(auc_list[i]) + ',' + str(auprc_list[i]) + '\n')
writer.write(',' + str(auc_list[i]) + ',' + str(auprc_list[i]) + '\n')
f.write('Average,' + str(avg_auc) + ',' + str(avg_auprc) + '\n')
writer.write('Average,' + str(avg_auc) + ',' + str(avg_auprc) + '\n')
writer.close()
methods = ['nimgcn_variance1', 'nimgcn_variance2', 'nimgcn_variance3'] # 'nimgcn'
for method in methods:
eval_nimcgcn(method=method)
eval_nimcgcn(disease_semantic_sim_path='../../../../data/hmdd2/disease_sim.csv', method=method)
# faulty way of calculation NIMCGCN average performance: auc: 0.9103700006895264 auprc: 0.23458280617241423
import torch as t
from torch import nn, optim
from utility.utils import *
from data.hmdd2.miRNA_sim import *
import numpy as np
from nimcgcn.code.model2 import *
from multiprocess import Process, Queue
np.random.seed(1337)
class Sizes(object):
def __init__(self, n_miRNA, n_disease):
self.m = n_miRNA
self.d = n_disease
self.fg = 256
self.fd = 256
self.k = 32
def get_edge_index(matrix):
edge_index = [[], []]
for i in range(matrix.shape[0]):
for j in range(matrix.shape[1]):
if matrix[i][j] != 0:
edge_index[0].append(i)
edge_index[1].append(j)
return t.LongTensor(edge_index)
class Myloss(nn.Module):
def __init__(self, alpha=0.4):
super(Myloss, self).__init__()
self.alpha = alpha
def forward(self, one_index, zero_index, target, input):
loss = nn.MSELoss(reduction='none')
loss_sum = loss(input, target)
return (1-self.alpha)*loss_sum[one_index].sum()+self.alpha*loss_sum[zero_index].sum()
def eval_fold(foldIdx, disease_sim, miRNA_sim, train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl, sizes, epochs, disease_semantic_sim_path,
auc_queue,
auprc_queue):
print('Starting fold: ', foldIdx)
# as
# disease
dataset = dict()
dd_edge_index = get_edge_index(disease_sim)
dataset['dd'] = {'data': t.FloatTensor(disease_sim), 'edge_index': dd_edge_index}
mm_edge_index = get_edge_index(miRNA_sim)
dataset['mm'] = {'data': t.FloatTensor(miRNA_sim), 'edge_index': mm_edge_index}
model = SimpleModel4(sizes)
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.train()
loss_fnc = nn.CrossEntropyLoss()
for epoch in range(0,epochs+1):
model.zero_grad()
score = model([dataset['dd'], dataset['mm']], t.LongTensor(train_pair_list))
loss = loss_fnc(score, t.LongTensor(train_pair_lbl))
loss.backward()
optimizer.step()
print('Epoch: ', epoch, ' loss: ', loss.item()/(len(train_pair_list)))
# eval
model.eval()
test_output = model([dataset['dd'], dataset['mm']], t.LongTensor(test_pair_list))
test_score = test_output.detach().numpy()[:,1].tolist()
# test_pred_lbl = list()
# for pair in test_pair_list:
# test_pred_lbl.append(test_score[pair[0], pair[1]])
test_pred_lbl = test_score
auc_score, auprc_score = get_score(test_pair_lbl, test_pred_lbl)
print('foldIdx: ', foldIdx, 'auc_score: ', auc_score, 'auprc_score: ', auprc_score)
auc_queue.put(auc_score)
auprc_queue.put(auprc_score)
# evaluate nimcgcn model
def eval_nimcgcn(adj_path='./m-d.csv', disease_semantic_sim_path = './disease_sim2.csv', negrate=1.0, epochs=300):
n_miRNA, n_disease, one_edge_list, zero_edge_list = load_data(adj_path)
sizes = Sizes(n_miRNA, n_disease)
adj, zero_index, one_index = assoc_list_to_adj(n_disease, n_miRNA, one_edge_list, [1] * len(one_edge_list))
disease_sim, miRNA_sim = cal_sim(adj.T, disease_semantic_sim_path=disease_semantic_sim_path)# our current adj is miRNA: row, but the function take the row
folds_data = randomSplitTrainTestArray(one_edge_list, zero_edge_list, n_disease, n_miRNA, 5, negrate)
foldIdx = 0
numFold = 5
auc_queue = Queue(numFold)
aupr_queue = Queue(numFold)
processList = list()
for train_pair_list, train_pair_lbl, test_pair_list, test_pair_lbl in folds_data:
foldIdx