Commit 1d555125 authored by Joshua Ghost's avatar Joshua Ghost
Browse files

finish GAT

parent dcb1718f
import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
from scipy.sparse.linalg.eigen.arpack import eigsh
import sys
def preprocess_adj_bias(adj):
num_nodes = adj.shape[0]
adj = adj + sp.eye(num_nodes) # self-loop
adj[adj > 0.0] = 1.0
if not sp.isspmatrix_coo(adj):
adj = adj.tocoo()
adj = adj.astype(np.float32)
indices = np.vstack((adj.col, adj.row)).transpose() # This is where I made a mistake, I used (adj.row, adj.col) instead
# return tf.SparseTensor(indices=indices, values=adj.data, dense_shape=adj.shape)
return indices, adj.data, adj.shape
def adj_to_bias(adj, sizes, nhood=1):
nb_graphs = adj.shape[0]
mt = np.empty(adj.shape)
for g in range(nb_graphs):
mt[g] = np.eye(adj.shape[1])
for _ in range(nhood):
mt[g] = np.matmul(mt[g], (adj[g] + np.eye(adj.shape[1])))
for i in range(sizes[g]):
for j in range(sizes[g]):
if mt[g][i][j] > 0.0:
mt[g][i][j] = 1.0
return -1e9 * (1.0 - mt)
def sparse_to_tuple(sparse_mx):
"""Convert sparse matrix to tuple representation."""
def to_tuple(mx):
if not sp.isspmatrix_coo(mx):
mx = mx.tocoo()
coords = np.vstack((mx.row, mx.col)).transpose()
values = mx.data
shape = mx.shape
return coords, values, shape
if isinstance(sparse_mx, list):
for i in range(len(sparse_mx)):
sparse_mx[i] = to_tuple(sparse_mx[i])
else:
sparse_mx = to_tuple(sparse_mx)
return sparse_mx
def preprocess_features(features):
"""Row-normalize feature matrix and convert to tuple representation"""
features = sp.lil_matrix(features.numpy())
rowsum = np.array(features.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
features = r_mat_inv.dot(features)
return features.todense(), sparse_to_tuple(features)
#return features, sparse_to_tuple(features)
\ No newline at end of file
...@@ -45,39 +45,34 @@ class Sampler: ...@@ -45,39 +45,34 @@ class Sampler:
#print(type(self.train_adj)) #print(type(self.train_adj))
self.degree_p = None self.degree_p = None
def _preprocess_adj(self, normalization, adj, cuda): def _preprocess_adj(self, normalization, adj, device):
adj_normalizer = fetch_normalization(normalization) adj_normalizer = fetch_normalization(normalization)
r_adj = adj_normalizer(adj) r_adj = adj_normalizer(adj)
r_adj = sparse_mx_to_torch_sparse_tensor(r_adj).float() r_adj = sparse_mx_to_torch_sparse_tensor(r_adj).float()
if cuda: return r_adj.to(device=device)
r_adj = r_adj.cuda()
return r_adj
def _preprocess_fea(self, fea, cuda): def _preprocess_fea(self, fea, device):
if cuda: return fea.to(device=device)
return fea.cuda()
else:
return fea
def stub_sampler(self, normalization, cuda): def stub_sampler(self, normalization, device):
""" """
The stub sampler. Return the original data. The stub sampler. Return the original data.
""" """
if normalization in self.trainadj_cache: if normalization in self.trainadj_cache:
r_adj = self.trainadj_cache[normalization] r_adj = self.trainadj_cache[normalization]
else: else:
r_adj = self._preprocess_adj(normalization, self.train_adj, cuda) r_adj = self._preprocess_adj(normalization, self.train_adj, device)
self.trainadj_cache[normalization] = r_adj self.trainadj_cache[normalization] = r_adj
fea = self._preprocess_fea(self.train_features, cuda) fea = self._preprocess_fea(self.train_features, device)
return r_adj, fea return r_adj, fea
def randomedge_sampler(self, percent, normalization, cuda): def randomedge_sampler(self, percent, normalization, device):
""" """
Randomly drop edge and preserve percent% edges. Randomly drop edge and preserve percent% edges.
""" """
"Opt here" "Opt here"
if percent >= 1.0: if percent >= 1.0:
return self.stub_sampler(normalization, cuda) return self.stub_sampler(normalization, device)
nnz = self.train_adj.nnz nnz = self.train_adj.nnz
perm = np.random.permutation(nnz) perm = np.random.permutation(nnz)
...@@ -87,16 +82,16 @@ class Sampler: ...@@ -87,16 +82,16 @@ class Sampler:
(self.train_adj.row[perm], (self.train_adj.row[perm],
self.train_adj.col[perm])), self.train_adj.col[perm])),
shape=self.train_adj.shape) shape=self.train_adj.shape)
r_adj = self._preprocess_adj(normalization, r_adj, cuda) r_adj = self._preprocess_adj(normalization, r_adj, device)
fea = self._preprocess_fea(self.train_features, cuda) fea = self._preprocess_fea(self.train_features, device)
return r_adj, fea return r_adj, fea
def vertex_sampler(self, percent, normalization, cuda): def vertex_sampler(self, percent, normalization, device):
""" """
Randomly drop vertexes. Randomly drop vertexes.
""" """
if percent >= 1.0: if percent >= 1.0:
return self.stub_sampler(normalization, cuda) return self.stub_sampler(normalization, device)
self.learning_type = "inductive" self.learning_type = "inductive"
pos_nnz = len(self.pos_train_idx) pos_nnz = len(self.pos_train_idx)
# neg_neighbor_nnz = 0.4 * percent # neg_neighbor_nnz = 0.4 * percent
...@@ -117,16 +112,16 @@ class Sampler: ...@@ -117,16 +112,16 @@ class Sampler:
# print(r_fea.shape) # print(r_fea.shape)
# print(r_adj.shape) # print(r_adj.shape)
# print(len(all_samples)) # print(len(all_samples))
r_adj = self._preprocess_adj(normalization, r_adj, cuda) r_adj = self._preprocess_adj(normalization, r_adj, device)
r_fea = self._preprocess_fea(r_fea, cuda) r_fea = self._preprocess_fea(r_fea, device)
return r_adj, r_fea, all_samples return r_adj, r_fea, all_samples
def degree_sampler(self, percent, normalization, cuda): def degree_sampler(self, percent, normalization, device):
""" """
Randomly drop edge wrt degree (high degree, low probility). Randomly drop edge wrt degree (high degree, low probility).
""" """
if percent >= 0: if percent >= 0:
return self.stub_sampler(normalization, cuda) return self.stub_sampler(normalization, device)
if self.degree_p is None: if self.degree_p is None:
degree_adj = self.train_adj.multiply(self.degree) degree_adj = self.train_adj.multiply(self.degree)
self.degree_p = degree_adj.data / (1.0 * np.sum(degree_adj.data)) self.degree_p = degree_adj.data / (1.0 * np.sum(degree_adj.data))
...@@ -138,37 +133,38 @@ class Sampler: ...@@ -138,37 +133,38 @@ class Sampler:
(self.train_adj.row[perm], (self.train_adj.row[perm],
self.train_adj.col[perm])), self.train_adj.col[perm])),
shape=self.train_adj.shape) shape=self.train_adj.shape)
r_adj = self._preprocess_adj(normalization, r_adj, cuda) r_adj = self._preprocess_adj(normalization, r_adj, device)
fea = self._preprocess_fea(self.train_features, cuda) fea = self._preprocess_fea(self.train_features, device)
return r_adj, fea return r_adj, fea
def get_test_set(self, normalization, cuda): def get_test_set(self, normalization, device):
""" """
Return the test set. Return the test set.
""" """
if self.learning_type == "transductive": if self.learning_type == "transductive":
return self.stub_sampler(normalization, cuda) return self.stub_sampler(normalization, device)
else: else:
if normalization in self.adj_cache: if normalization in self.adj_cache:
r_adj = self.adj_cache[normalization] r_adj = self.adj_cache[normalization]
else: else:
r_adj = self._preprocess_adj(normalization, self.adj, cuda) r_adj = self._preprocess_adj(normalization, self.adj, device)
self.adj_cache[normalization] = r_adj self.adj_cache[normalization] = r_adj
fea = self._preprocess_fea(self.features, cuda) fea = self._preprocess_fea(self.features, device)
return r_adj, fea return r_adj, fea
def get_val_set(self, normalization, cuda): def get_val_set(self, normalization, device):
""" """
Return the validataion set. Only for the inductive task. Return the validataion set. Only for the inductive task.
Currently behave the same with get_test_set Currently behave the same with get_test_set
""" """
return self.get_test_set(normalization, cuda) return self.get_test_set(normalization, device)
def get_label_and_idxes(self, cuda): def get_label_and_idxes(self, device):
""" """
Return all labels and indexes. Return all labels and indexes.
""" """
if cuda: return self.labels_torch.to(device=device),\
return self.labels_torch.cuda(), self.idx_train_torch.cuda(), self.idx_val_torch.cuda(), self.idx_test_torch.cuda() self.idx_train_torch.to(device=device),\
return self.labels_torch, self.idx_train_torch, self.idx_val_torch, self.idx_test_torch self.idx_val_torch.to(device=device),\
self.idx_test_torch.to(device=device)
\ No newline at end of file
...@@ -7,6 +7,10 @@ import numpy as np ...@@ -7,6 +7,10 @@ import numpy as np
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from src.process import adj_to_bias, preprocess_features
from src.utils import sparse_mx_to_torch_sparse_tensor
import scipy.sparse as sp
import torch.optim as optim import torch.optim as optim
from tensorboardX import SummaryWriter from tensorboardX import SummaryWriter
...@@ -25,7 +29,7 @@ from src.sample import Sampler ...@@ -25,7 +29,7 @@ from src.sample import Sampler
# Training settings # Training settings
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
# Training parameter # Training parameter
parser.add_argument('--no_cuda', action='store_true', default=False, parser.add_argument('--no_cuda', action='store_true', default=True,
help='Disables CUDA training.') help='Disables CUDA training.')
parser.add_argument('--fastmode', action='store_true', default=False, parser.add_argument('--fastmode', action='store_true', default=False,
help='Disable validation during training.') help='Disable validation during training.')
...@@ -83,7 +87,7 @@ args = parser.parse_args() ...@@ -83,7 +87,7 @@ args = parser.parse_args()
if args.debug: if args.debug:
print(args) print(args)
# pre setting # pre setting
args.cuda = not args.no_cuda and torch.cuda.is_available() args.device = 'cuda' if (not args.no_cuda and torch.cuda.is_available()) else 'cpu'
args.mixmode = args.no_cuda and args.mixmode and torch.cuda.is_available() args.mixmode = args.no_cuda and args.mixmode and torch.cuda.is_available()
if args.aggrmethod == "default": if args.aggrmethod == "default":
if args.type == "resgcn": if args.type == "resgcn":
...@@ -101,33 +105,46 @@ if args.type == "mutigcn": ...@@ -101,33 +105,46 @@ if args.type == "mutigcn":
# random seed setting # random seed setting
np.random.seed(args.seed) np.random.seed(args.seed)
torch.manual_seed(args.seed) torch.manual_seed(args.seed)
if args.cuda or args.mixmode: if args.device == 'cuda' or args.mixmode:
torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed(args.seed)
# should we need fix random seed here? # should we need fix random seed here?
sampler = Sampler(args.dataset, args.datapath, args.task_type) sampler = Sampler(args.dataset, args.datapath, args.task_type)
# get labels and indexes # get labels and indexes
labels, idx_train, idx_val, idx_test = sampler.get_label_and_idxes(args.cuda) labels, idx_train, idx_val, idx_test = sampler.get_label_and_idxes(device=args.device)
nfeat = sampler.nfeat nfeat = sampler.nfeat
nclass = sampler.nclass nclass = sampler.nclass
print("nclass: %d\tnfea:%d" % (nclass, nfeat)) print("nclass: %d\tnfea:%d" % (nclass, nfeat))
# The model # The model
model = GCNModel(nfeat=nfeat, if args.type == "GAT":
nhid=args.hidden, model = GAT(in_size=nfeat,
nclass=nclass, hidden_sizes=[8],
nhidlayer=args.nhiddenlayer, nbclasses=nclass,
dropout=args.dropout, attn_drop=0.6,
baseblock=args.type, p_drop=0.6,
inputlayer=args.inputlayer, n_heads=[8, 1],
outputlayer=args.outputlayer, activation=F.elu,
nbaselayer=args.nbaseblocklayer, residual=False)
activation=F.relu, args.lr = 0.005
withbn=args.withbn, #TODO: add l2 regularization# lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in vars if v.name not
withloop=args.withloop, # in ['bias', 'gamma', 'b', 'g', 'beta']]) * l2_coef
aggrmethod=args.aggrmethod, else:
mixmode=args.mixmode) model = GCNModel(nfeat=nfeat,
nhid=args.hidden,
nclass=nclass,
nhidlayer=args.nhiddenlayer,
dropout=args.dropout,
baseblock=args.type,
inputlayer=args.inputlayer,
outputlayer=args.outputlayer,
nbaselayer=args.nbaseblocklayer,
activation=F.relu,
withbn=args.withbn,
withloop=args.withloop,
aggrmethod=args.aggrmethod,
mixmode=args.mixmode)
print(args.aggrmethod) print(args.aggrmethod)
print(model) print(model)
optimizer = optim.Adam(model.parameters(), optimizer = optim.Adam(model.parameters(),
...@@ -136,11 +153,10 @@ optimizer = optim.Adam(model.parameters(), ...@@ -136,11 +153,10 @@ optimizer = optim.Adam(model.parameters(),
# scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=50, factor=0.618) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=50, factor=0.618)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[200, 300, 400, 500, 600, 700], gamma=0.5) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[200, 300, 400, 500, 600, 700], gamma=0.5)
# convert to cuda # convert to cuda
if args.cuda: model = model.to(device=args.device)
model.cuda()
# For the mix mode, lables and indexes are in cuda. # For the mix mode, lables and indexes are in cuda.
if args.cuda or args.mixmode: if args.device == 'cuda' or args.mixmode:
labels = labels.cuda() labels = labels.cuda()
idx_train = idx_train.cuda() idx_train = idx_train.cuda()
idx_val = idx_val.cuda() idx_val = idx_val.cuda()
...@@ -167,7 +183,6 @@ def get_lr(optimizer): ...@@ -167,7 +183,6 @@ def get_lr(optimizer):
return param_group['lr'] return param_group['lr']
# define the training function.
def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None): def train(epoch, train_adj, train_fea, idx_train, val_adj=None, val_fea=None):
if val_adj is None: if val_adj is None:
val_adj = train_adj val_adj = train_adj
...@@ -240,13 +255,28 @@ acc_val = np.zeros((args.epochs,)) ...@@ -240,13 +255,28 @@ acc_val = np.zeros((args.epochs,))
sampling_t = 0 sampling_t = 0
def gat_preprocess(adj, feature):
feature, _ = preprocess_features(feature)
feature = torch.Tensor(np.expand_dims(feature, 0)).to(args.device)
nb_nodes = feature.shape[1]
adj = adj.to_dense().numpy()
adj = sp.coo_matrix(adj_to_bias(np.expand_dims(adj, 0), [nb_nodes], nhood=1)[0])
adj = torch.unsqueeze(sparse_mx_to_torch_sparse_tensor(adj), dim=0).to(device=args.device)
return adj, feature
for epoch in range(args.epochs): for epoch in range(args.epochs):
input_idx_train = idx_train input_idx_train = idx_train
sampling_t = time.time() sampling_t = time.time()
# no sampling # no sampling
# randomedge sampling if args.sampling_percent >= 1.0, it behaves the same as stub_sampler. # randomedge sampling if args.sampling_percent >= 1.0, it behaves the same as stub_sampler.
(train_adj, train_fea) = sampler.randomedge_sampler(percent=args.sampling_percent, normalization=args.normalization, (train_adj, train_fea) = sampler.randomedge_sampler(percent=args.sampling_percent, normalization=args.normalization,
cuda=args.cuda) device="cpu")
if args.type == 'GAT':
train_adj, train_fea = gat_preprocess(train_adj, train_fea)
if args.mixmode: if args.mixmode:
train_adj = train_adj.cuda() train_adj = train_adj.cuda()
...@@ -257,7 +287,9 @@ for epoch in range(args.epochs): ...@@ -257,7 +287,9 @@ for epoch in range(args.epochs):
if False: if False:
outputs = train(epoch, train_adj, train_fea, input_idx_train) outputs = train(epoch, train_adj, train_fea, input_idx_train)
else: else:
(val_adj, val_fea) = sampler.get_test_set(normalization=args.normalization, cuda=args.cuda) (val_adj, val_fea) = sampler.get_test_set(normalization=args.normalization, device="cpu")
if args.type == 'GAT':
val_adj, val_fea = gat_preprocess(val_adj, val_fea)
if args.mixmode: if args.mixmode:
val_adj = val_adj.cuda() val_adj = val_adj.cuda()
outputs = train(epoch, train_adj, train_fea, input_idx_train, val_adj, val_fea) outputs = train(epoch, train_adj, train_fea, input_idx_train, val_adj, val_fea)
...@@ -279,8 +311,7 @@ for epoch in range(args.epochs): ...@@ -279,8 +311,7 @@ for epoch in range(args.epochs):
tb_writer.add_scalar('lr', outputs[4], epoch) tb_writer.add_scalar('lr', outputs[4], epoch)
tb_writer.add_scalars('Time', {'train': outputs[5], 'val': outputs[6]}, epoch) tb_writer.add_scalars('Time', {'train': outputs[5], 'val': outputs[6]}, epoch)
loss_train[epoch], acc_train[epoch], loss_val[epoch], acc_val[epoch] = outputs[0], outputs[1], outputs[2], outputs[ loss_train[epoch], acc_train[epoch], loss_val[epoch], acc_val[epoch] = outputs[:4]
3]
if args.early_stopping > 0 and early_stopping.early_stop: if args.early_stopping > 0 and early_stopping.early_stop:
print("Early stopping.") print("Early stopping.")
...@@ -295,7 +326,10 @@ if args.debug: ...@@ -295,7 +326,10 @@ if args.debug:
print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
# Testing # Testing
(test_adj, test_fea) = sampler.get_test_set(normalization=args.normalization, cuda=args.cuda) (test_adj, test_fea) = sampler.get_test_set(normalization=args.normalization, device="cpu")
if args.type == 'GAT':
test_adj, test_fea = gat_preprocess(test_adj, test_fea)
if args.mixmode: if args.mixmode:
test_adj = test_adj.cuda() test_adj = test_adj.cuda()
(loss_test, acc_test) = test(test_adj, test_fea) (loss_test, acc_test) = test(test_adj, test_fea)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment