Commit fe45a213 authored by Ngan Thi Dong's avatar Ngan Thi Dong
Browse files

update README and eval_nimcgcn

parent cb8fd0da
......@@ -9,6 +9,9 @@ The code was tested on python 3.7+, all required packages are put in requirement
`genFoldsData.py --data_dir data/XXX --save_dir data/XXX/folds --randseed RRR`
where XXX is the dataset(either hmdd2 or hmdd3), RRR is the random seed used
The script will split the data into five folds according to the given random seed. The training data set is balanced (negative:positive rate is set to 1). You can also change this rage with the `--neg_rate` argument. The testing data set is the whole test set consisting of the known association in the test split and all the possible combinations of miRNA-disease pairs (except the known ones in the training data). The program will also calculate miRNA functional, miRNA sequence, miRNA GIP similarity, and disease semantic, disease GIP similarity according to the training data in each data split.
We use the code given by the EPMDA authors for the GIP kernel similarity calculation. The code for miRNA sequence similarity is the one given by DBMDA authors. For miRNA functional and disease semantic similarity calculation, the code is partly adapted from the one released by MISIM 2.0 authors.
2. To evaluate **nimgcn** model, run:
......@@ -31,11 +34,11 @@ other configurable parameters include:
- _save_score_: should be either True or False, corresponding to whether to save to predicted scores or not
- _use_autoencoder_: whether to use autoencoder or not
- _use_seq_sim_: whether to use seq sim or not
- _imbalanced_: whether to use imbalanced training data or not
- _randseed_: the random seed used to generate train/test split
3. For **EPMDA** since the features took a lot of time to run, we provide all calculated features in epmda/data folder
Please run `eval_epmda_balance.py` with the corresponding arguments for evaluating EPMDA with the balance set up
and `eval_epmda_original.py` with corresponding arguments for the original evaluation set up
Please run `eval_epmda.py` with the corresponding arguments for evaluating EPMDA with the balance/imblance set up
For feature calculation, please refer to the *.py files in epmda folder.
......
......@@ -24,7 +24,10 @@ def get_edge_index(matrix):
if matrix[i][j] != 0:
edge_index[0].append(i)
edge_index[1].append(j)
return t.LongTensor(edge_index)
edge_tensor = t.LongTensor(edge_index)
if t.cuda.is_available():
edge_tensor = edge_tensor.cuda()
return edge_tensor
class Myloss(nn.Module):
......@@ -77,7 +80,10 @@ def eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test
for epoch in range(0, args.epochs+1):
model.zero_grad()
score = model([dataset['dd'], dataset['mm']], device=sysdevice)
loss = regression_crit(one_index, zero_index, t.FloatTensor(adj), score)
adj_tensor = t.FloatTensor(adj)
if t.cuda.is_available():
adj_tensor = adj_tensor.cuda()
loss = regression_crit(one_index, zero_index, adj_tensor, score)
loss.backward()
optimizer.step()
print('Epoch: ', epoch, ' loss: ', loss.item()/(len(one_index[0])+len(zero_index[0])))
......@@ -98,6 +104,8 @@ def eval_fold(foldIdx, n_miRNA, n_disease, train_pair_list, train_pair_lbl, test
score_path = score_save_dir + str(foldIdx) + '_' + args.method + '_' + args.sim_type + '.csv'
if args.faulty:
score_path = score_path.replace('_' + args.sim_type + '.csv', 'Faulty_' + args.sim_type + '.csv')
if args.imbalanced:
score_path = score_path.replace('.csv', '_imbalanced.csv')
save_scores(test_pred_lbl, test_pair_lbl, score_path)
......@@ -148,6 +156,8 @@ def eval(args):
if args.faulty:
save_path += 'Faulty'
save_path += '_' + args.sim_type + '.csv'
if args.imbalanced:
save_path = save_path.replace('.csv', '_imbalanced.csv')
with open(save_path, 'w') as f:
f.write('Fold,auc,auprc\n')
......@@ -160,10 +170,10 @@ def main():
parser = argparse.ArgumentParser(description='Neural based matrix completion for virus-host PPI')
parser.add_argument('--epochs', type=int, default=300, metavar='N',
help='number of epochs to train')
parser.add_argument('--data_dir', default='/home/dong/simplifying_mirna_disease/hmdd2/', help='dataset directory')
parser.add_argument('--fold_dir', default='/home/dong/simplifying_mirna_disease/hmdd2/folds/', help='dataset directory')
parser.add_argument('--result_dir', default='/home/dong/simplifying_mirna_disease/results_hmdd2/', help='saved result directory')
parser.add_argument('--method', default='nimgcn', help='method should be one of nimgcn, nimgcn1, nimgcn2, nimgcn3')
parser.add_argument('--data_dir', default='data/hmdd2/', help='dataset directory')
parser.add_argument('--fold_dir', default='data/hmdd2/folds/', help='dataset directory')
parser.add_argument('--result_dir', default='data/hmdd2/results/', help='saved result directory')
parser.add_argument('--method', default='nimgcn2', help='method should be one of nimgcn, nimgcn1, nimgcn2, nimgcn3')
parser.add_argument('--save_score', default=False, help='whether to save the predicted score or not')
parser.add_argument('--sim_type', default='functional2', help='the miRNA and disease sim, pass in "functional2" for miRNA functional + disease semantic(with phenotype info added),'
'"functional1" for miRNA functional and disease semantic only,'
......@@ -173,11 +183,34 @@ def main():
parser.add_argument('--numFold', default=5, help='value of K for K-foldCV, default is 5')
parser.add_argument('--neg_rate', default=1.0, help='the negative sampling rate')
parser.add_argument('--faulty', default=False, help='Faulty calculation or not')
parser.add_argument('--imbalanced', default=False, help='Faulty calculation or not')
args = parser.parse_args()
args.save_score = True if str(args.save_score) == 'True' else False
args.faulty = True if str(args.faulty) == 'True' else False
args.imbalanced = True if str(args.imbalanced) == 'True' else False
randseeds = [123, 456, 789, 101, 112]
data_dirs = ['data/hmdd2/']
fold_dirs = ['data/hmdd2/folds/']
result_dirs = ['data/hmdd2/results/']
methods = ['nimgcn', 'nimgcn1', 'nimgcn2', 'nimgcn3']
faulties = [True, False]
for randseed in randseeds:
args.randseed = randseed
for method in methods:
args.method = method
for idata, data_dir in enumerate(data_dirs):
args.data_dir = data_dir
args.fold_dir = fold_dirs[idata]
args.result_dir = result_dirs[idata]
args.save_score = True
for vbool in faulties:
args.faulty = vbool
for vbal in faulties:
args.imbalanced = vbal
eval(args)
eval(args)
if __name__ == "__main__":
main()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment