Commit 7d83362d authored by Oleh Astappiev's avatar Oleh Astappiev
Browse files

vgg16/alexnet

parent d2cec2b7
import sys import sys
sys.path.append("..") sys.path.append("..")
import numpy as np
from src.data.imagenette import *
from src.data.embeddings import * from src.data.embeddings import *
from utils.common import * from utils.common import *
from utils.distance import *
from src.model.alexnet import AlexNetModel from src.model.alexnet import AlexNetModel
from src.model.siamese import SiameseModel
from tensorflow.keras import layers, Model from tensorflow.keras import layers, Model
from pathlib import Path
model_name = 'cifar10_alexnet' model_name = 'imagenette_alexnet'
embeddings_name = model_name + '_embeddings' embeddings_name = model_name + '_embeddings'
train_ds, test_ds, validation_ds = AlexNetModel.x_dataset()
train_ds, test_ds = load_dataset()
train_ds_size = tf.data.experimental.cardinality(train_ds).numpy()
fit_ds = train_ds.skip(train_ds_size / 10)
val_ds = train_ds.take(train_ds_size / 10)
fit_ds_size = tf.data.experimental.cardinality(fit_ds).numpy()
val_ds_size = tf.data.experimental.cardinality(val_ds).numpy()
test_ds_size = tf.data.experimental.cardinality(test_ds).numpy()
print("Training data size:", tf.data.experimental.cardinality(fit_ds).numpy())
print("Validation data size:", tf.data.experimental.cardinality(val_ds).numpy())
print("Evaluation data size:", tf.data.experimental.cardinality(test_ds).numpy())
# load model # load model
# alexnet = models.load_model(get_modeldir(model_name + '.tf')) # alexnet = models.load_model(get_modeldir(model_name + '.tf'))
...@@ -20,47 +33,58 @@ alexnet = AlexNetModel() ...@@ -20,47 +33,58 @@ alexnet = AlexNetModel()
alexnet.compile() alexnet.compile()
alexnet.summary() alexnet.summary()
exit()
# load weights # load weights
alexnet.load_weights(get_modeldir(model_name + '.h5')) alexnet.load_weights(get_modeldir(model_name + '.h5'))
# train # train
# alexnet.fit(train_ds, validation_data=test_ds) # alexnet.fit(fit_ds, validation_data=val_ds)
# save # save
# alexnet.save_weights(get_modeldir(model_name + '.h5')) # alexnet.save_weights(get_modeldir(model_name + '.h5'))
# alexnet.save(get_modeldir(model_name + '.tf')) # alexnet.save(get_modeldir(model_name + '.tf'))
# evaluate # evaluate
# alexnet.evaluate(validation_ds) print('evaluating...')
# res = alexnet.predict(validation_ds) alexnet.evaluate(test_ds)
for layer in alexnet.layers: for layer in alexnet.layers:
layer.trainable = False layer.trainable = False
# embeddings, embedding_labels = calc_embeddings(alexnet) # save embeddings
embedding_model = tf.keras.Model(inputs=alexnet.input, outputs=alexnet.layers[-2].output)
embedding_model.summary()
embedding_vds = train_ds.concatenate(test_ds)
# print('calculating embeddings...')
# embeddings = embedding_model.predict(embedding_vds)
# embedding_labels = np.concatenate([y for x, y in embedding_vds], axis=0)
# save_embeddings(embeddings, embedding_labels, embeddings_name) # save_embeddings(embeddings, embedding_labels, embeddings_name)
embeddings, embedding_labels = load_embeddings(embeddings_name)
NUM_CLASSES = np.unique(embedding_labels).shape[0]
"""# Siamese network training
# Model hyperparters Following this tutorial: https://keras.io/examples/vision/siamese_contrastive/
EMBEDDING_VECTOR_DIMENSION = 4096
IMAGE_VECTOR_DIMENSIONS = 512
## Prepare the dataset
alexnet = Model(inputs=alexnet.input, outputs=alexnet.layers[-3].output) We already have the embeddings precomputed in `embeddings` and their matching `labels`. To train the siamese networks, we need to generate random pairs of embeddings, assigning as target `1` if the two come from the same class and `0` otherwise.
In order to keep the training balanced, we can't simply select two random `(embedding, label)` tuples from the dataset, because this is heavily unbalanced towards the negative class. To keep thing simple, we'll randomly select two samples and then use `rejection_resample` to rebalance the classes.
## Training hyperparameters (values selected randomly at the moment, would be easy to set up hyperparameter tuning wth Keras Tuner) **NOTE**: rejection resampling works only if the number of classes is reasonably low: with 10 classes there's a 90% probability that a sample will be rejected, it can get very inefficient very quickly if the number of classes is too great.
TRAIN_BATCH_SIZE = 128 """
STEPS_PER_EPOCH = 1000
NUM_EPOCHS = 25
embeddings, embedding_labels = load_embeddings(embeddings_name) # zip together embeddings and their labels, cache in memory (maybe not necessay or maybe faster this way), shuffle, repeat forever.
embeddings_ds = tf.data.Dataset.zip(( embeddings_ds = tf.data.Dataset.zip((
tf.data.Dataset.from_tensor_slices(embeddings), tf.data.Dataset.from_tensor_slices(embeddings),
tf.data.Dataset.from_tensor_slices(embedding_labels) tf.data.Dataset.from_tensor_slices(embedding_labels)
)) )).cache().shuffle(1000).repeat()
embeddings_ds = embeddings_ds.cache().shuffle(1000).repeat()
# change for triplet loss implementation
@tf.function @tf.function
def make_label_for_pair(embeddings, labels): def make_label_for_pair(embeddings, labels):
...@@ -79,15 +103,240 @@ train_ds = train_ds.map(make_label_for_pair, num_parallel_calls=tf.data.AUTOTUNE ...@@ -79,15 +103,240 @@ train_ds = train_ds.map(make_label_for_pair, num_parallel_calls=tf.data.AUTOTUNE
# train_ds = train_ds.rejection_resample(lambda embs, target: tf.cast(target, tf.int32), [0.5, 0.5], initial_dist=[0.9, 0.1]) # train_ds = train_ds.rejection_resample(lambda embs, target: tf.cast(target, tf.int32), [0.5, 0.5], initial_dist=[0.9, 0.1])
# train_ds = train_ds.map(lambda _, vals: vals) # discard the prepended "selected" class from the rejction resample, since we aleady have it available # train_ds = train_ds.map(lambda _, vals: vals) # discard the prepended "selected" class from the rejction resample, since we aleady have it available
ds = train_ds.batch(TRAIN_BATCH_SIZE).prefetch(tf.data.AUTOTUNE) """## Model and loss definition
siamese = SiameseModel(alexnet, EMBEDDING_VECTOR_DIMENSION, IMAGE_VECTOR_DIMENSIONS)
history = siamese.fit(ds, epochs=NUM_EPOCHS, steps_per_epoch=STEPS_PER_EPOCH) The `projection_model` is the part of the network that generates the final image vector (currently, a simple Dense layer with tanh activation, but it can be as complex as needed).
The `siamese` model is the one we train. It applies the projection model to two embeddings, calculates the euclidean distance between the two generated image vectors and calculates the contrastive loss.
As a note, [here](https://towardsdatascience.com/contrastive-loss-explaned-159f2d4a87ec) they mention that cosine distance is preferable to euclidean distance:
> in a large dimensional space, all points tend to be far apart by the euclidian measure. In higher dimensions, the angle between vectors is a more effective measure.
Note that, when using cosine distance, the margin needs to be reduced from its default value of 1 (see below).
__________________
### Contrastive Loss
$ Loss = Y*Dist(v_1,v_2)^2 + (1-Y)*max(margin-D,0)^2$
$Y$ is the GT target (1 if $v_1$ and $v_2$ belong to the same class, 0 otherwise). If images are from the same class, use the squared distance as loss (you want to push the distance to be close to 0 for same-class couples), otherwise keep the (squared) maximum between 0 and $margin - D$.
For different-class couples, the distance should be pushed to a high value. The **margin identifies a cone inside which vectors are considered the same**. For cosine distance, which has range [0,2], **1 is NOT an adequate value**).
**NOTE** In the loss implementation below, we calculate the mean of the two terms, though this should not actually be necessary (the minimizer value for the loss is the same whether the loss is divided by 2 or not).
"""
## Model hyperparters
EMBEDDING_VECTOR_DIMENSION = 4096
# EMBEDDING_VECTOR_DIMENSION = int(1280/2)
IMAGE_VECTOR_DIMENSIONS = 128
# IMAGE_VECTOR_DIMENSIONS = 3 # use for test visualization on tensorboard
ACTIVATION_FN = 'tanh' # same as in paper
MARGIN = 0.05
## These functions are straight from the Keras tutorial linked above
# Provided two tensors t1 and t2
# Euclidean distance = sqrt(sum(square(t1-t2)))
def euclidean_distance(vects):
"""Find the Euclidean distance between two vectors.
Arguments:
vects: List containing two tensors of same length.
Returns:
Tensor containing euclidean distance
(as floating point value) between vectors.
"""
x, y = vects
sum_square = tf.math.reduce_sum(tf.math.square(x - y), axis=1, keepdims=True)
return tf.math.sqrt(tf.math.maximum(sum_square, tf.keras.backend.epsilon()))
def cosine_distance(vects):
"""Find the Cosine distance between two vectors.
Arguments:
vects: List containing two tensors of same length.
Returns:
Tensor containing euclidean distance
(as floating point value) between vectors.
"""
# NOTE: Cosine_distance = 1 - cosine_similarity
# Cosine distance is defined betwen [0,2] where 0 is vectors with the same direction and verse,
# 1 is perpendicular vectors and 2 is opposite vectors
cosine_similarity = tf.keras.layers.Dot(axes=1, normalize=True)(vects)
return 1 - cosine_similarity
def loss(margin=1):
"""Provides 'constrastive_loss' an enclosing scope with variable 'margin'.
Arguments:
margin: Integer, defines the baseline for distance for which pairs
should be classified as dissimilar. - (default is 1).
Returns:
'constrastive_loss' function with data ('margin') attached.
"""
# Contrastive loss = mean( (1-true_value) * square(prediction) +
# true_value * square( max(margin-prediction, 0) ))
def contrastive_loss(y_true, y_pred):
"""Calculates the constrastive loss.
Arguments:
y_true: List of labels (1 for same-class pair, 0 for different-class), fp32.
y_pred: List of predicted distances, fp32.
Returns:
A tensor containing constrastive loss as floating point value.
"""
square_dist = tf.math.square(y_pred)
margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
return tf.math.reduce_mean(
(1 - y_true) * square_dist + (y_true) * margin_square
)
return contrastive_loss
emb_input_1 = layers.Input(EMBEDDING_VECTOR_DIMENSION)
emb_input_2 = layers.Input(EMBEDDING_VECTOR_DIMENSION)
# todo Add more layers here
# projection model is the one to use for queries (put in a sequence after the embedding-generator model above)
projection_model = tf.keras.models.Sequential([
layers.Dense(IMAGE_VECTOR_DIMENSIONS, activation=ACTIVATION_FN, input_shape=(EMBEDDING_VECTOR_DIMENSION,))
# layers.Dense(128, activation='relu', input_shape=(EMBEDDING_VECTOR_DIMENSION,)),
# layers.Dense(IMAGE_VECTOR_DIMENSIONS, activation=None)
# relu on activation, max
])
v1 = projection_model(emb_input_1)
v2 = projection_model(emb_input_2)
computed_distance = layers.Lambda(cosine_distance)([v1, v2])
# siamese is the model we train
siamese = Model(inputs=[emb_input_1, emb_input_2], outputs=computed_distance)
## Training hyperparameters (values selected randomly at the moment, would be easy to set up hyperparameter tuning wth Keras Tuner)
## We have 128 pairs for each epoch, thus in total we will have 128 x 2 x 1000 images to give to the siamese
TRAIN_BATCH_SIZE = 128
STEPS_PER_EPOCH = 1000
NUM_EPOCHS = 3
# TODO: If there's a need to adapt the learning rate, explicitly create the optimizer instance here and pass it into compile
siamese.compile(loss=loss(margin=MARGIN), optimizer="RMSprop")
siamese.summary()
"""Select Projector interface for Tensorboard"""
# Commented out IPython magic to ensure Python compatibility.
# %load_ext tensorboard
# %tensorboard --logdir=logs
callbacks = [
tf.keras.callbacks.TensorBoard(get_logdir("inference/fit"), profile_batch=5)
]
# TODO: Would be good to have a validation dataset too.
ds = train_ds.batch(TRAIN_BATCH_SIZE) # .prefetch(tf.data.AUTOTUNE)
history = siamese.fit(
ds,
epochs=NUM_EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
callbacks=callbacks,
class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES}
)
# Build full inference model (from image to image vector):
im_input = embedding_model.input
embedding = embedding_model(im_input)
image_vector = projection_model(embedding)
inference_model = Model(inputs=im_input, outputs=image_vector)
inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf', include_optimizer=False)
"""# Validation
To validate the model, we load the validation chunk of the dataset and we feed it into the network. We don't need to repeat the preprocessing steps done to the dataset, because the preprocessing is embedded in the inference model by the `Rescaling` and `Resizing` layers we added above.
____________
## Visualizing embeddings in TensorBoard
In `metadata.tsv` file we list the labels in the same order as they appear in the embeddings list.
We write out the embeddings list as a tf.Variable initialized to the embeddings values, using TensorBoard's writers to specify the metadata file to use and the name of the tensor to display.
Additionally, in the specification of ProjectorConfig's proto message, there is the possibility to pass the values as a second .tsv file (`values.tsv`) instead than having them loaded from the checkpoint file.
I don't know which values are getting loaded at the moment, but since it works I won't change it further and keep both the .tsv and the checkpointed values.
(See https://stackoverflow.com/a/57230031/3214872)
"""
print('visualization')
def write_embeddings_for_tensorboard(image_vectors: list, labels: list, root_dir: Path):
import csv
from tensorboard.plugins import projector
root_dir.mkdir(parents=True, exist_ok=True)
with (root_dir / 'values.tsv').open('w') as fp:
writer = csv.writer(fp, delimiter='\t')
writer.writerows(image_vectors)
with (root_dir / 'metadata.tsv').open('w') as fp:
for lbl in labels:
fp.write(f'{lbl}\n')
image_vectors = np.asarray(image_vectors)
embeddings = tf.Variable(image_vectors, name='embeddings')
CHECKPOINT_FILE = str(root_dir / 'model.ckpt')
ckpt = tf.train.Checkpoint(embeddings=embeddings)
ckpt.save(CHECKPOINT_FILE)
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = "embeddings/.ATTRIBUTES/VARIABLE_VALUE"
embedding.metadata_path = 'metadata.tsv'
embedding.tensor_path = 'values.tsv'
projector.visualize_embeddings(root_dir, config)
# inference_model = tf.keras.models.load_model(get_modeldir(model_name + '_inference.tf'), compile=False)
# NUM_SAMPLES_TO_DISPLAY = 10000
NUM_SAMPLES_TO_DISPLAY = 3000
LOG_DIR = Path('../logs/logs_projection0428_alexnet')
LOG_DIR.mkdir(exist_ok=True, parents=True)
siamese.save(get_modeldir('cifar10_alexnet_seamese25_' + str(IMAGE_VECTOR_DIMENSIONS) + '.tf'), save_format='tf') # compute embeddings of the images and their labels, store them in a tsv file for visualization
image_vectors = inference_model.predict(embedding_vds)
labels = embedding_labels
embedding_vds = cifar10_complete_resized().batch(batch_size=32, drop_remainder=False) write_embeddings_for_tensorboard(image_vectors, labels, LOG_DIR)
embeddings = siamese.predict(embedding_vds)
labels = np.concatenate([y for x, y in embedding_vds], axis=0)
save_embeddings(embeddings, labels, 'cifar10_alexnet_embeddings_siamese25')
print('done') # # Do the same with some of the training data, just to see if it works with that
# ds = embeddings_ds.take(NUM_SAMPLES_TO_DISPLAY).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
# _image_vectors = []
# _labels = []
# for feats_batch in tqdm(ds):
# ims, lbls = feats_batch
# ims = ims.numpy()
# lbls = lbls.numpy()
# embs = projection_model(ims).numpy()
# _image_vectors.extend(embs.tolist())
# _labels.extend(lbls.tolist())
# write_embeddings_for_tensorboard(_image_vectors, _labels, LOG_DIR / 'train')
import sys
sys.path.append("..")
from src.data.embeddings import *
from utils.common import *
from utils.distance import *
from src.model.alexnet import AlexNetModel
from src.model.siamese import SiameseModel
from tensorflow.keras import layers, Model
model_name = 'imagenette_alexnet'
embeddings_name = model_name + '_embeddings'
from pathlib import Path
import tarfile
import numpy as np
batch_size = 32
img_height = 426
img_width = 320
data_dir = Path('../datasets/imagenette2-320/train')
print(data_dir.absolute())
train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir, image_size=(227, 227), batch_size=batch_size)
# load model
# alexnet = models.load_model(get_modeldir(model_name + '.tf'))
# create model
# alexnet = AlexNetModel()
# alexnet.compile()
# alexnet.summary()
# load weights
# alexnet.load_weights(get_modeldir(model_name + '.h5'))
# train
# alexnet.fit(train_ds, validation_data=test_ds)
# save
# alexnet.save_weights(get_modeldir(model_name + '.h5'))
# alexnet.save(get_modeldir(model_name + '.tf'))
# evaluate
# alexnet.evaluate(validation_ds)
# res = alexnet.predict(validation_ds)
# for layer in alexnet.layers:
# layer.trainable = False
# embeddings, embedding_labels = calc_embeddings(alexnet)
# save_embeddings(embeddings, embedding_labels, embeddings_name)
print("Done.")
...@@ -5,11 +5,16 @@ from src.utils.common import get_datadir, process_images, process_images_couple ...@@ -5,11 +5,16 @@ from src.utils.common import get_datadir, process_images, process_images_couple
import tensorflow as tf import tensorflow as tf
def cifar10_complete(): def load_dataset():
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data() (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
images = np.concatenate([train_images, test_images]) train = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).batch(32)
labels = np.concatenate([train_labels, test_labels]) val = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(32)
return tf.data.Dataset.from_tensor_slices((images, labels)) return train, val
def cifar10_complete():
train, val = load_dataset()
return train.concatenate(val)
def cifar10_complete_resized(): def cifar10_complete_resized():
...@@ -17,146 +22,3 @@ def cifar10_complete_resized(): ...@@ -17,146 +22,3 @@ def cifar10_complete_resized():
return ds.map(process_images_couple).prefetch(buffer_size=tf.data.experimental.AUTOTUNE) return ds.map(process_images_couple).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
def shuffle_arrays(arrays, set_seed=-1):
"""Shuffles arrays in-place, in the same order, along axis=0
Parameters:
-----------
arrays : List of NumPy arrays.
set_seed : Seed value if int >= 0, else seed is random.
"""
assert all(len(arr) == len(arrays[0]) for arr in arrays)
seed = np.random.randint(0, 2 ** (32 - 1) - 1) if set_seed < 0 else set_seed
for arr in arrays:
rstate = np.random.RandomState(seed)
rstate.shuffle(arr)
def produce_tuples():
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
total_labels = 10
images_per_label = 6000
tuples_per_label = int(images_per_label / 3)
total_tuples = int(tuples_per_label * total_labels)
# re arrange whole dataset by labels
labels_stat = np.zeros((total_labels), dtype='uint16')
labels_train = np.empty((total_labels, images_per_label, 32, 32, 3), dtype='uint8')
for i in range(len(train_images)):
tr_leb = train_labels[i]
tr_img = train_images[i]
labels_train[tr_leb, labels_stat[tr_leb]] = tr_img
labels_stat[tr_leb] += 1
for i in range(len(test_images)):
tr_leb = test_labels[i]
tr_img = test_images[i]
labels_train[tr_leb, labels_stat[tr_leb]] = tr_img
labels_stat[tr_leb] += 1
# shuffle all
for i in range(total_labels):
np.random.shuffle(labels_train[i])
# create tuples
anchor_images = np.empty((total_tuples, 32, 32, 3), dtype='uint8')
anchor_labels = np.empty((total_tuples), dtype='uint8')
for i in range(total_labels):
for j in range(tuples_per_label):
anchor_labels[i * tuples_per_label + j] = i
anchor_images[i * tuples_per_label + j] = labels_train[i, j]
positive_images = np.empty((total_tuples, 32, 32, 3), dtype='uint8')
positive_labels = np.empty((total_tuples), dtype='uint8')
for i in range(total_labels):
for j in range(tuples_per_label):
positive_labels[i * tuples_per_label + j] = i
positive_images[i * tuples_per_label + j] = labels_train[i, tuples_per_label + j]
negative_images = np.empty((total_tuples, 32, 32, 3), dtype='uint8')
negative_labels = np.empty((total_tuples), dtype='uint8')
for i in range(total_labels):
for j in range(tuples_per_label):
negative_labels[i * tuples_per_label + j] = i
negative_images[i * tuples_per_label + j] = labels_train[i, tuples_per_label * 2 + j]
# we need to ensure we use random labels, but without images from anchor label
shuffle_arrays([negative_labels, negative_images])
for i in range(total_labels):
k = ((i + 1) * tuples_per_label, 0)[i == 9]
for j in range(tuples_per_label):
c = i * tuples_per_label + j
tmp_label = negative_labels[c]
if tmp_label == i:
tmp_image = negative_images[c]
while negative_labels[k] == i:
k += 1
negative_labels[c] = negative_labels[k]
negative_images[c] = negative_images[k]
negative_labels[k] = tmp_label
negative_images[k] = tmp_image
# randomize them one more time
for i in range(total_labels):
shuffle_arrays([
negative_labels[i * tuples_per_label:(i + 1) * tuples_per_label],
negative_images[i * tuples_per_label:(i + 1) * tuples_per_label]
])
return (anchor_images, anchor_labels), (positive_images, positive_labels), (negative_images, negative_labels)
def save_tuples(anchor_images, anchor_labels, positive_images, positive_labels, negative_images, negative_labels):
data = [anchor_images, anchor_labels, positive_images, positive_labels, negative_images, negative_labels]
with open(get_datadir('cifar10_tuples.pkl'), 'wb') as outfile:
pickle.dump(data, outfile, -1)
def load_tuples():
with open(get_datadir('cifar10_tuples.pkl'), 'rb') as infile:
result = pickle.load(infile)
return (result[0], result[1]), (result[2], result[3]), (result[4], result[5])
def prepare_dataset():
(anchor_images, anchor_labels), (positive_images, positive_labels), (negative_images, negative_labels) = produce_tuples()
anchor_ds = tf.data.Dataset.from_tensor_slices(anchor_images)
positive_ds = tf.data.Dataset.from_tensor_slices(positive_images)
negative_ds = tf.data.Dataset.from_tensor_slices(negative_images)
anchor_ds = (anchor_ds.map(process_images).batch(batch_size=32, drop_remainder=True))
positive_ds = (positive_ds.map(process_images).batch(batch_size=32, drop_remainder=True))
negative_ds = (negative_ds.map(process_images).batch(batch_size=32, drop_remainder=True))
dataset = tf.data.Dataset.zip((anchor_ds, positive_ds, negative_ds))
# dataset = dataset.shuffle(buffer_size=1024)
return dataset
def visualize(anchor, positive, negative):
"""Visualize a few triplets from the supplied batches."""
def show(ax, image):
ax.imshow(image)
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
fig = plt.figure(figsize=(9, 9))
axs = fig.subplots(3, 3)
for i in range(3):
show(axs[i, 0], anchor[0][i])
show(axs[i, 1], positive[0][i])
show(axs[i, 2], negative[0][i])
plt.show()