Commit fb09f304 authored by Oleh Astappiev's avatar Oleh Astappiev
Browse files

fix: other datasets and models

parent 8c2437e2
import sys import sys
sys.path.append("..") sys.path.append("..")
import numpy as np
import tensorflow as tf import tensorflow as tf
from src.data.imagenette import load_dataset3, NUM_CLASSES from src.data.imagenette import load_dataset3, NUM_CLASSES
from src.utils.embeddings import save_embeddings, load_embeddings, project_embeddings from src.utils.embeddings import save_embeddings, load_embeddings, project_embeddings, calc_vectors
from src.utils.common import get_modeldir from src.utils.common import get_modeldir
from src.model.alexnet import AlexNetModel, TARGET_SHAPE from src.model.alexnet import AlexNetModel, TARGET_SHAPE
from src.model.siamese import SiameseModel from src.model.siamese import SiameseModel
...@@ -13,6 +12,7 @@ model_name = 'imagenette_alexnet' ...@@ -13,6 +12,7 @@ model_name = 'imagenette_alexnet'
embeddings_name = model_name + '_embeddings' embeddings_name = model_name + '_embeddings'
train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=AlexNetModel.preprocess_input) train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=AlexNetModel.preprocess_input)
comb_ds = train_ds.concatenate(val_ds).concatenate(test_ds)
# create model # create model
model = AlexNetModel() model = AlexNetModel()
...@@ -33,31 +33,22 @@ model.evaluate(test_ds) ...@@ -33,31 +33,22 @@ model.evaluate(test_ds)
for layer in model.layers: for layer in model.layers:
layer.trainable = False layer.trainable = False
# save embeddings print('calculating embeddings...')
embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output) embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output)
embedding_model.summary() embedding_model.summary()
embedding_vds = train_ds.concatenate(val_ds).concatenate(test_ds) emb_vectors, emb_labels = calc_vectors(comb_ds, embedding_model)
print('calculating embeddings...') save_embeddings(emb_vectors, emb_labels, embeddings_name)
embeddings = embedding_model.predict(embedding_vds)
embedding_labels = np.concatenate([y for x, y in embedding_vds], axis=0)
save_embeddings(embeddings, embedding_labels, embeddings_name)
# embeddings, embedding_labels = load_embeddings(embeddings_name) # emb_vectors, emb_labels = load_embeddings(embeddings_name)
# siamese is the model we train # siamese is the model we train
siamese = SiameseModel(embedding_vector_dimension=4096, image_vector_dimensions=3) siamese = SiameseModel(embedding_vector_dimension=4096, image_vector_dimensions=3)
siamese.compile(loss_margin=0.05) siamese.compile(loss_margin=0.05)
siamese.summary() siamese.summary()
## Training hyperparameters (values selected randomly at the moment, would be easy to set up hyperparameter tuning wth Keras Tuner) ds = SiameseModel.prepare_dataset(emb_vectors, emb_labels)
## We have 128 pairs for each epoch, thus in total we will have 128 x 2 x 1000 images to give to the siamese history = siamese.fit(ds, class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES})
NUM_EPOCHS = 3
TRAIN_BATCH_SIZE = 128
STEPS_PER_EPOCH = 1000
ds = SiameseModel.prepare_dataset(embeddings, embedding_labels)
history = siamese.fit(ds, epochs=NUM_EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES})
# Build full inference model (from image to image vector): # Build full inference model (from image to image vector):
inference_model = siamese.get_inference_model(embedding_model) inference_model = siamese.get_inference_model(embedding_model)
...@@ -67,5 +58,8 @@ inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf ...@@ -67,5 +58,8 @@ inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf
print('visualization') print('visualization')
# compute vectors of the images and their labels, store them in a tsv file for visualization # compute vectors of the images and their labels, store them in a tsv file for visualization
image_vectors = inference_model.predict(embedding_vds) siamese_vectors, siamese_labels = calc_vectors(comb_ds, inference_model)
project_embeddings(image_vectors, embedding_labels, model_name) project_embeddings(siamese_vectors, siamese_labels, model_name + '_siamese')
projection_vectors = siamese.get_projection_model().predict(emb_vectors)
project_embeddings(projection_vectors, emb_labels, model_name + '_siamese2')
import sys import sys
sys.path.append("..") sys.path.append("..")
import numpy as np
import tensorflow as tf import tensorflow as tf
from src.data.cifar10 import load_dataset3, NUM_CLASSES from src.data.cifar10 import load_dataset3, NUM_CLASSES
from src.utils.embeddings import save_embeddings, load_embeddings, project_embeddings from src.utils.embeddings import save_embeddings, load_embeddings, project_embeddings, calc_vectors
from src.utils.common import get_modeldir from src.utils.common import get_modeldir
from src.model.alexnet import AlexNetModel, TARGET_SHAPE from src.model.alexnet import AlexNetModel, TARGET_SHAPE
from src.model.siamese import SiameseModel from src.model.siamese import SiameseModel
...@@ -13,6 +12,7 @@ model_name = 'cifar10_alexnet' ...@@ -13,6 +12,7 @@ model_name = 'cifar10_alexnet'
embeddings_name = model_name + '_embeddings' embeddings_name = model_name + '_embeddings'
train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=AlexNetModel.preprocess_input) train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=AlexNetModel.preprocess_input)
comb_ds = train_ds.concatenate(val_ds).concatenate(test_ds)
# create model # create model
model = AlexNetModel() model = AlexNetModel()
...@@ -33,31 +33,22 @@ model.evaluate(test_ds) ...@@ -33,31 +33,22 @@ model.evaluate(test_ds)
for layer in model.layers: for layer in model.layers:
layer.trainable = False layer.trainable = False
# save embeddings print('calculating embeddings...')
embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output) embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output)
embedding_model.summary() embedding_model.summary()
embedding_vds = train_ds.concatenate(val_ds).concatenate(test_ds) emb_vectors, emb_labels = calc_vectors(comb_ds, embedding_model)
print('calculating embeddings...') save_embeddings(emb_vectors, emb_labels, embeddings_name)
embeddings = embedding_model.predict(embedding_vds)
embedding_labels = np.concatenate([y for x, y in embedding_vds], axis=0)
save_embeddings(embeddings, embedding_labels, embeddings_name)
# embeddings, embedding_labels = load_embeddings(embeddings_name) # emb_vectors, emb_labels = load_embeddings(embeddings_name)
# siamese is the model we train # siamese is the model we train
siamese = SiameseModel(embedding_vector_dimension=4096, image_vector_dimensions=3) siamese = SiameseModel(embedding_vector_dimension=4096, image_vector_dimensions=3)
siamese.compile(loss_margin=0.05) siamese.compile(loss_margin=0.05)
siamese.summary() siamese.summary()
## Training hyperparameters (values selected randomly at the moment, would be easy to set up hyperparameter tuning wth Keras Tuner) ds = SiameseModel.prepare_dataset(emb_vectors, emb_labels)
## We have 128 pairs for each epoch, thus in total we will have 128 x 2 x 1000 images to give to the siamese history = siamese.fit(ds, class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES})
NUM_EPOCHS = 3
TRAIN_BATCH_SIZE = 128
STEPS_PER_EPOCH = 1000
ds = SiameseModel.prepare_dataset(embeddings, embedding_labels)
history = siamese.fit(ds, epochs=NUM_EPOCHS, steps_per_epoch=STEPS_PER_EPOCH, class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES})
# Build full inference model (from image to image vector): # Build full inference model (from image to image vector):
inference_model = siamese.get_inference_model(embedding_model) inference_model = siamese.get_inference_model(embedding_model)
...@@ -67,5 +58,5 @@ inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf ...@@ -67,5 +58,5 @@ inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf
print('visualization') print('visualization')
# compute vectors of the images and their labels, store them in a tsv file for visualization # compute vectors of the images and their labels, store them in a tsv file for visualization
image_vectors = inference_model.predict(embedding_vds) siamese_vectors, siamese_labels = calc_vectors(comb_ds, inference_model)
project_embeddings(image_vectors, embedding_labels, model_name) project_embeddings(siamese_vectors, siamese_labels, model_name + '_siamese')
import sys import sys
sys.path.append("..") sys.path.append("..")
import numpy as np
import tensorflow as tf import tensorflow as tf
from src.data.imagenette import load_dataset3, NUM_CLASSES from src.data.imagenette import load_dataset3, NUM_CLASSES
from src.utils.embeddings import save_embeddings, project_embeddings from src.utils.embeddings import save_embeddings, project_embeddings, calc_vectors
from src.utils.common import get_modeldir from src.utils.common import get_modeldir
from src.model.mobilenet import MobileNetModel, PRETRAIN_EPOCHS, TARGET_SHAPE from src.model.mobilenet import MobileNetModel, PRETRAIN_EPOCHS, TARGET_SHAPE
from src.model.siamese import SiameseModel from src.model.siamese import SiameseModel
...@@ -13,6 +12,7 @@ model_name = 'imagenet_mobilenet' ...@@ -13,6 +12,7 @@ model_name = 'imagenet_mobilenet'
embeddings_name = model_name + '_embeddings' embeddings_name = model_name + '_embeddings'
train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=MobileNetModel.preprocess_input) train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=MobileNetModel.preprocess_input)
comb_ds = train_ds.concatenate(val_ds).concatenate(test_ds)
PRETRAIN_TOTAL_STEPS = PRETRAIN_EPOCHS * len(train_ds) PRETRAIN_TOTAL_STEPS = PRETRAIN_EPOCHS * len(train_ds)
# create model # create model
...@@ -34,36 +34,22 @@ model.evaluate(test_ds) ...@@ -34,36 +34,22 @@ model.evaluate(test_ds)
for layer in model.layers: for layer in model.layers:
layer.trainable = False layer.trainable = False
# save embeddings print('calculating embeddings...')
embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-7].output) embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-7].output)
embedding_model.summary() embedding_model.summary()
embedding_vds = train_ds.concatenate(val_ds).concatenate(test_ds) emb_vectors, emb_labels = calc_vectors(comb_ds, embedding_model)
print('calculating embeddings...') save_embeddings(emb_vectors, emb_labels, embeddings_name)
embeddings = embedding_model.predict(embedding_vds)
embedding_labels = np.concatenate([y for x, y in embedding_vds], axis=0)
save_embeddings(embeddings, embedding_labels, embeddings_name)
# embeddings, embedding_labels = load_embeddings(embeddings_name) # emb_vectors, emb_labels = load_embeddings(embeddings_name)
# siamese is the model we train # siamese is the model we train
siamese = SiameseModel(embedding_vector_dimension=1024, image_vector_dimensions=3) siamese = SiameseModel(embedding_vector_dimension=1024, image_vector_dimensions=3)
siamese.compile(loss_margin=0.05) siamese.compile(loss_margin=0.05)
siamese.summary() siamese.summary()
## Training hyperparameters (values selected randomly at the moment, would be easy to set up hyperparameter tuning wth Keras Tuner) ds = SiameseModel.prepare_dataset(emb_vectors, emb_labels)
## We have 128 pairs for each epoch, thus in total we will have 128 x 2 x 1000 images to give to the siamese history = siamese.fit(ds, class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES})
NUM_EPOCHS = 3
TRAIN_BATCH_SIZE = 128
STEPS_PER_EPOCH = 1000
ds = SiameseModel.prepare_dataset(embeddings, embedding_labels)
history = siamese.fit(
ds,
epochs=NUM_EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES}
)
# Build full inference model (from image to image vector): # Build full inference model (from image to image vector):
inference_model = siamese.get_inference_model(embedding_model) inference_model = siamese.get_inference_model(embedding_model)
...@@ -74,5 +60,5 @@ inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf ...@@ -74,5 +60,5 @@ inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf
print('visualization') print('visualization')
# compute vectors of the images and their labels, store them in a tsv file for visualization # compute vectors of the images and their labels, store them in a tsv file for visualization
image_vectors = inference_model.predict(embedding_vds) siamese_vectors, siamese_labels = calc_vectors(comb_ds, inference_model)
project_embeddings(image_vectors, embedding_labels, model_name) project_embeddings(siamese_vectors, siamese_labels, model_name + '_siamese')
import sys import sys
sys.path.append("..") sys.path.append("..")
import numpy as np
import tensorflow as tf import tensorflow as tf
from src.data.cifar10 import load_dataset3, NUM_CLASSES from src.data.cifar10 import load_dataset3, NUM_CLASSES
from src.utils.embeddings import save_embeddings, project_embeddings from src.utils.embeddings import save_embeddings, project_embeddings, calc_vectors
from src.utils.common import get_modeldir from src.utils.common import get_modeldir
from src.model.mobilenet import MobileNetModel, PRETRAIN_EPOCHS, TARGET_SHAPE from src.model.mobilenet import MobileNetModel, PRETRAIN_EPOCHS, TARGET_SHAPE
from src.model.siamese import SiameseModel from src.model.siamese import SiameseModel
...@@ -13,6 +12,7 @@ model_name = 'cifar10_mobilenet' ...@@ -13,6 +12,7 @@ model_name = 'cifar10_mobilenet'
embeddings_name = model_name + '_embeddings' embeddings_name = model_name + '_embeddings'
train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=MobileNetModel.preprocess_input) train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=MobileNetModel.preprocess_input)
comb_ds = train_ds.concatenate(val_ds).concatenate(test_ds)
PRETRAIN_TOTAL_STEPS = PRETRAIN_EPOCHS * len(train_ds) PRETRAIN_TOTAL_STEPS = PRETRAIN_EPOCHS * len(train_ds)
model = MobileNetModel() model = MobileNetModel()
...@@ -33,37 +33,22 @@ model.evaluate(test_ds) ...@@ -33,37 +33,22 @@ model.evaluate(test_ds)
for layer in model.layers: for layer in model.layers:
layer.trainable = False layer.trainable = False
# save embeddings print('calculating embeddings...')
embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-7].output) embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-7].output)
embedding_model.summary() embedding_model.summary()
embedding_vds = train_ds.concatenate(val_ds).concatenate(test_ds) emb_vectors, emb_labels = calc_vectors(comb_ds, embedding_model)
print('calculating embeddings...') save_embeddings(emb_vectors, emb_labels, embeddings_name)
embeddings = embedding_model.predict(embedding_vds)
embedding_labels = np.concatenate([y for x, y in embedding_vds], axis=0)
save_embeddings(embeddings, embedding_labels, embeddings_name)
# embeddings, embedding_labels = load_embeddings(embeddings_name) # emb_vectors, emb_labels = load_embeddings(embeddings_name)
# siamese is the model we train # siamese is the model we train
siamese = SiameseModel(embedding_vector_dimension=1024, image_vector_dimensions=3) siamese = SiameseModel(embedding_vector_dimension=1024, image_vector_dimensions=3)
siamese.compile(loss_margin=0.05) siamese.compile(loss_margin=0.05)
siamese.summary() siamese.summary()
## Training hyperparameters (values selected randomly at the moment, would be easy to set up hyperparameter tuning wth Keras Tuner) ds = SiameseModel.prepare_dataset(emb_vectors, emb_labels)
## We have 128 pairs for each epoch, thus in total we will have 128 x 2 x 1000 images to give to the siamese history = siamese.fit(ds, class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES})
NUM_EPOCHS = 3
TRAIN_BATCH_SIZE = 128
STEPS_PER_EPOCH = 1000
ds = SiameseModel.prepare_dataset(embeddings, embedding_labels)
history = siamese.fit(
ds,
epochs=NUM_EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES}
)
# Build full inference model (from image to image vector): # Build full inference model (from image to image vector):
inference_model = siamese.get_inference_model(embedding_model) inference_model = siamese.get_inference_model(embedding_model)
...@@ -74,5 +59,5 @@ inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf ...@@ -74,5 +59,5 @@ inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf
print('visualization') print('visualization')
# compute embeddings of the images and their labels, store them in a tsv file for visualization # compute embeddings of the images and their labels, store them in a tsv file for visualization
image_vectors = inference_model.predict(embedding_vds) siamese_vectors, siamese_labels = calc_vectors(comb_ds, inference_model)
project_embeddings(image_vectors, embedding_labels, model_name) project_embeddings(siamese_vectors, siamese_labels, model_name + '_siamese')
...@@ -12,7 +12,7 @@ MARGIN = 0.5 ...@@ -12,7 +12,7 @@ MARGIN = 0.5
NUM_EPOCHS = 3 NUM_EPOCHS = 3
TRAIN_BATCH_SIZE = 128 TRAIN_BATCH_SIZE = 128
STEPS_PER_EPOCH = 100 # 1000 STEPS_PER_EPOCH = 100 # TODO: try restore 1000
@tf.function @tf.function
...@@ -57,6 +57,10 @@ class SiameseModel(Model): ...@@ -57,6 +57,10 @@ class SiameseModel(Model):
super(SiameseModel, self).__init__(inputs=[emb_input_1, emb_input_2], outputs=computed_distance) super(SiameseModel, self).__init__(inputs=[emb_input_1, emb_input_2], outputs=computed_distance)
# def call(self, inputs):
# """ Projection model is a model from embeddings to image vector """
# return self.projection_model(inputs)
def get_projection_model(self): def get_projection_model(self):
""" Projection model is a model from embeddings to image vector """ """ Projection model is a model from embeddings to image vector """
return self.projection_model return self.projection_model
......
...@@ -6,13 +6,11 @@ import tensorflow as tf ...@@ -6,13 +6,11 @@ import tensorflow as tf
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from scipy.spatial import distance_matrix from scipy.spatial import distance_matrix
from src.data.simple3 import load_dataset3, NUM_CLASSES from src.data.simple3 import load_dataset3, NUM_CLASSES
from src.utils.embeddings import project_embeddings, calc_vectors from src.utils.embeddings import project_embeddings, calc_vectors, save_embeddings
from src.utils.common import get_modeldir from src.utils.common import get_modeldir
from src.model.alexnet import AlexNetModel, TARGET_SHAPE from src.model.alexnet import AlexNetModel, TARGET_SHAPE
from src.model.siamese import SiameseModel from src.model.siamese import SiameseModel
# tf.config.run_functions_eagerly(True)
model_name = 'simple3_alexnet' model_name = 'simple3_alexnet'
embeddings_name = model_name + '_embeddings' embeddings_name = model_name + '_embeddings'
...@@ -35,9 +33,6 @@ model.load_weights(get_modeldir(model_name + '.h5')) ...@@ -35,9 +33,6 @@ model.load_weights(get_modeldir(model_name + '.h5'))
# print('evaluating...') # print('evaluating...')
# model.evaluate(test_ds) # model.evaluate(test_ds)
# alexnet_vectors, alexnet_labels = calc_vectors(comb_ds, model)
# project_embeddings(alexnet_vectors, alexnet_labels, model_name + '_alexnet')
for layer in model.layers: for layer in model.layers:
layer.trainable = False layer.trainable = False
...@@ -46,7 +41,10 @@ embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].ou ...@@ -46,7 +41,10 @@ embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].ou
embedding_model.summary() embedding_model.summary()
emb_vectors, emb_labels = calc_vectors(comb_ds, embedding_model) emb_vectors, emb_labels = calc_vectors(comb_ds, embedding_model)
project_embeddings(emb_vectors, emb_labels, model_name + '_emb') # project_embeddings(emb_vectors, emb_labels, model_name + '_emb')
save_embeddings(emb_vectors, emb_labels, embeddings_name)
# emb_vectors, emb_labels = load_embeddings(embeddings_name)
# siamese is the model we train # siamese is the model we train
siamese = SiameseModel(embedding_vector_dimension=4096, image_vector_dimensions=3) siamese = SiameseModel(embedding_vector_dimension=4096, image_vector_dimensions=3)
......
import sys import sys
sys.path.append("..") sys.path.append("..")
import numpy as np
import tensorflow as tf import tensorflow as tf
from src.data.imagenette import load_dataset3, NUM_CLASSES from src.data.imagenette import load_dataset3, NUM_CLASSES
from src.utils.embeddings import save_embeddings, project_embeddings from src.utils.embeddings import save_embeddings, project_embeddings, calc_vectors
from src.utils.common import get_modeldir from src.utils.common import get_modeldir
from src.model.vgg16 import VGG16Model, PRETRAIN_EPOCHS, TARGET_SHAPE from src.model.vgg16 import VGG16Model, PRETRAIN_EPOCHS, TARGET_SHAPE
from src.model.siamese import SiameseModel from src.model.siamese import SiameseModel
...@@ -13,6 +12,7 @@ model_name = 'imagenet_vgg16' ...@@ -13,6 +12,7 @@ model_name = 'imagenet_vgg16'
embeddings_name = model_name + '_embeddings' embeddings_name = model_name + '_embeddings'
train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=VGG16Model.preprocess_input) train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=VGG16Model.preprocess_input)
comb_ds = train_ds.concatenate(val_ds).concatenate(test_ds)
PRETRAIN_TOTAL_STEPS = PRETRAIN_EPOCHS * len(train_ds) PRETRAIN_TOTAL_STEPS = PRETRAIN_EPOCHS * len(train_ds)
# create model # create model
...@@ -34,36 +34,22 @@ model.evaluate(test_ds) ...@@ -34,36 +34,22 @@ model.evaluate(test_ds)
for layer in model.layers: for layer in model.layers:
layer.trainable = False layer.trainable = False
# save embeddings print('calculating embeddings...')
embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output) embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output)
embedding_model.summary() embedding_model.summary()
embedding_vds = train_ds.concatenate(val_ds).concatenate(test_ds) emb_vectors, emb_labels = calc_vectors(comb_ds, embedding_model)
print('calculating embeddings...') save_embeddings(emb_vectors, emb_labels, embeddings_name)
embeddings = embedding_model.predict(embedding_vds)
embedding_labels = np.concatenate([y for x, y in embedding_vds], axis=0)
save_embeddings(embeddings, embedding_labels, embeddings_name)
# embeddings, embedding_labels = load_embeddings(embeddings_name) # emb_vectors, emb_labels = load_embeddings(embeddings_name)
# siamese is the model we train # siamese is the model we train
siamese = SiameseModel(embedding_vector_dimension=4096, image_vector_dimensions=3) siamese = SiameseModel(embedding_vector_dimension=4096, image_vector_dimensions=3)
siamese.compile(loss_margin=0.05) siamese.compile(loss_margin=0.05)
siamese.summary() siamese.summary()
## Training hyperparameters (values selected randomly at the moment, would be easy to set up hyperparameter tuning wth Keras Tuner) ds = SiameseModel.prepare_dataset(emb_vectors, emb_labels)
## We have 128 pairs for each epoch, thus in total we will have 128 x 2 x 1000 images to give to the siamese history = siamese.fit(ds, class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES})
NUM_EPOCHS = 3
TRAIN_BATCH_SIZE = 128
STEPS_PER_EPOCH = 1000
ds = SiameseModel.prepare_dataset(embeddings, embedding_labels)
history = siamese.fit(
ds,
epochs=NUM_EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES}
)
# Build full inference model (from image to image vector): # Build full inference model (from image to image vector):
inference_model = siamese.get_inference_model(embedding_model) inference_model = siamese.get_inference_model(embedding_model)
...@@ -74,5 +60,5 @@ inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf ...@@ -74,5 +60,5 @@ inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf
print('visualization') print('visualization')
# compute vectors of the images and their labels, store them in a tsv file for visualization # compute vectors of the images and their labels, store them in a tsv file for visualization
image_vectors = inference_model.predict(embedding_vds) siamese_vectors, siamese_labels = calc_vectors(comb_ds, inference_model)
project_embeddings(image_vectors, embedding_labels, model_name) project_embeddings(siamese_vectors, siamese_labels, model_name + '_siamese')
import sys import sys
sys.path.append("..") sys.path.append("..")
import numpy as np
import tensorflow as tf import tensorflow as tf
from src.data.cifar10 import load_dataset3, NUM_CLASSES from src.data.cifar10 import load_dataset3, NUM_CLASSES
from src.utils.embeddings import save_embeddings, project_embeddings from src.utils.embeddings import save_embeddings, project_embeddings, calc_vectors
from src.utils.common import get_modeldir from src.utils.common import get_modeldir
from src.model.vgg16 import VGG16Model, PRETRAIN_EPOCHS from src.model.vgg16 import VGG16Model, PRETRAIN_EPOCHS
from src.model.siamese import SiameseModel from src.model.siamese import SiameseModel
...@@ -15,6 +14,7 @@ embeddings_name = model_name + '_embeddings' ...@@ -15,6 +14,7 @@ embeddings_name = model_name + '_embeddings'
TARGET_SHAPE = (32, 32) TARGET_SHAPE = (32, 32)
train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=VGG16Model.preprocess_input) train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, preprocess_fn=VGG16Model.preprocess_input)
comb_ds = train_ds.concatenate(val_ds).concatenate(test_ds)
PRETRAIN_TOTAL_STEPS = PRETRAIN_EPOCHS * len(train_ds) PRETRAIN_TOTAL_STEPS = PRETRAIN_EPOCHS * len(train_ds)
# create model # create model
...@@ -36,36 +36,22 @@ model.evaluate(test_ds) ...@@ -36,36 +36,22 @@ model.evaluate(test_ds)
for layer in model.layers: for layer in model.layers:
layer.trainable = False layer.trainable = False
# save embeddings print('calculating embeddings...')
embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output) embedding_model = tf.keras.Model(inputs=model.input, outputs=model.layers[-2].output)
embedding_model.summary() embedding_model.summary()
embedding_vds = train_ds.concatenate(val_ds).concatenate(test_ds) emb_vectors, emb_labels = calc_vectors(comb_ds, embedding_model)
print('calculating embeddings...') save_embeddings(emb_vectors, emb_labels, embeddings_name)
embeddings = embedding_model.predict(embedding_vds)
embedding_labels = np.concatenate([y for x, y in embedding_vds], axis=0)
save_embeddings(embeddings, embedding_labels, embeddings_name)
# embeddings, embedding_labels = load_embeddings(embeddings_name) # emb_vectors, emb_labels = load_embeddings(embeddings_name)
# siamese is the model we train # siamese is the model we train
siamese = SiameseModel(embedding_vector_dimension=4096, image_vector_dimensions=3) siamese = SiameseModel(embedding_vector_dimension=4096, image_vector_dimensions=3)
siamese.compile(loss_margin=0.05) siamese.compile(loss_margin=0.05)
siamese.summary() siamese.summary()