Commit ffa4e7d0 authored by Oleh Astappiev's avatar Oleh Astappiev
Browse files

fix: efficientnet and vit models

parent fb09f304
import sys import sys
sys.path.append("..") sys.path.append("..")
import numpy as np from src.data.cifar10 import load_dataset3, NUM_CLASSES
import tensorflow as tf from src.utils.embeddings import project_embeddings, calc_vectors, save_embeddings
from src.utils.embeddings import project_embeddings
from src.utils.common import get_modeldir, get_datadir from src.utils.common import get_modeldir, get_datadir
from src.model.efficientnet import EfficientNetModel, MODEL_INPUT_SIZE from src.model.efficientnet import EfficientNetModel, TARGET_SHAPE, BATCH_SIZE
from src.model.siamese import SiameseModel from src.model.siamese import SiameseModel
import tensorflow_datasets as tfds
import pandas as pd
from tqdm import tqdm
from pathlib import Path
model_name = 'cifar10_efficientnet' model_name = 'cifar10_efficientnet'
embeddings_name = model_name + '_embeddings' embeddings_name = model_name + '_embeddings'
embedding_model = EfficientNetModel() train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, batch_size=BATCH_SIZE, preprocess_fn=EfficientNetModel.preprocess_input)
embedding_model.summary() comb_ds = train_ds.concatenate(val_ds).concatenate(test_ds)
# DATASET_NAME = 'cats_vs_dogs'
DATASET_NAME = 'cifar10'
# DATASET_NAME = 'cars196'
ds = tfds.load(DATASET_NAME, split='train')
# Resize images to the model's input size and normalize to [0.0, 1.0] as per the
# expected image input signature: https://www.tensorflow.org/hub/common_signatures/images#input
def resize_and_normalize(features):
return {
# 'id': features['id'],
'label': features['label'],
'image': tf.image.resize(tf.image.convert_image_dtype(features['image'], tf.float32), MODEL_INPUT_SIZE[1:3])
}
ds = ds.map(resize_and_normalize, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False)
# Add batch and prefetch to dataset to speed up processing model = EfficientNetModel()
BATCH_SIZE = 256 model.summary()
batched_ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
# Dataset has keys "id" (that we ignore), "image" and "label". print('calculating embeddings...')
# "image" has shape [BATCH_SIZE,32,32,3] and is an RGB uint8 image emb_vectors, emb_labels = calc_vectors(comb_ds, model)
# "label" has shape [BATCH_SIZE,1] and is an integer label (value between 0 and 9) save_embeddings(emb_vectors, emb_labels, embeddings_name)
# Naming schema: <dataset_name>-<dataset_split>.<model-name>.embeddings.pickle # emb_vectors, emb_labels = load_embeddings(embeddings_name)
DST_FNAME = get_datadir('efficientnet_v2_imagenet1k_s.embeddings.pkl')
if Path(DST_FNAME).exists():
# When you need to use the embeddings, upload the file (or store it on Drive and mount your drive folder in Colab), then run:
df = pd.read_pickle(DST_FNAME) # adapt the path as needed
embeddings = np.array(df.embedding.values.tolist())
labels = df.label.values
else:
embeddings = []
labels = []
for features_batch in tqdm(batched_ds):
embeddings.append(embedding_model(features_batch['image']).numpy())
labels.append(features_batch['label'].numpy())
embeddings = np.concatenate(embeddings)
labels = np.concatenate(labels)
# Store the precompued values to disk
df = pd.DataFrame({'embedding': embeddings.tolist(), 'label': labels})
df.to_pickle(DST_FNAME)
# Download the generated file to store the calculated embeddings.
NUM_CLASSES = np.unique(labels).shape[0]
# siamese is the model we train # siamese is the model we train
siamese = SiameseModel(embedding_vector_dimension=1280, image_vector_dimensions=128) siamese = SiameseModel(embedding_vector_dimension=1280, image_vector_dimensions=128)
siamese.compile(loss_margin=0.005) siamese.compile(loss_margin=0.005)
siamese.summary() siamese.summary()
## Training hyperparameters (values selected randomly at the moment, would be easy to set up hyperparameter tuning wth Keras Tuner) ds = SiameseModel.prepare_dataset(emb_vectors, emb_labels)
## We have 128 pairs for each epoch, thus in total we will have 128 x 2 x 1000 images to give to the siamese history = siamese.fit(ds, class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES})
NUM_EPOCHS = 50
TRAIN_BATCH_SIZE = 128
STEPS_PER_EPOCH = 2000
ds = SiameseModel.prepare_dataset(embeddings, labels)
history = siamese.fit(
ds,
epochs=NUM_EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES}
)
# Build full inference model (from image to image vector): # Build full inference model (from image to image vector):
inference_model = siamese.get_inference_model(embedding_model) inference_model = siamese.get_inference_model(model)
inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf', include_optimizer=False) inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf', include_optimizer=False)
# inference_model = tf.keras.models.load_model(get_modeldir(model_name + '_inference.tf'), compile=False) # inference_model = tf.keras.models.load_model(get_modeldir(model_name + '_inference.tf'), compile=False)
print('visualization') print('visualization')
# compute vectors of the images and their labels, store them in a tsv file for visualization # compute vectors of the images and their labels, store them in a tsv file for visualization
image_vectors = siamese.get_projection_model().predict(embeddings) projection_vectors = siamese.get_projection_model().predict(emb_vectors)
project_embeddings(image_vectors, labels, model_name) project_embeddings(projection_vectors, emb_labels, model_name + '_siamese')
...@@ -4,18 +4,26 @@ from tensorflow.keras import layers, callbacks, datasets, Sequential ...@@ -4,18 +4,26 @@ from tensorflow.keras import layers, callbacks, datasets, Sequential
tensorboard_cb = callbacks.TensorBoard(get_logdir("efficientnet/fit")) tensorboard_cb = callbacks.TensorBoard(get_logdir("efficientnet/fit"))
BATCH_SIZE = 256
TARGET_SHAPE = (384, 384)
MODEL_URL = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/feature_vector/2" MODEL_URL = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/feature_vector/2"
MODEL_INPUT_SIZE = [None, 384, 384, 3]
class EfficientNetModel(Sequential): class EfficientNetModel(Sequential):
def __init__(self): def __init__(self):
super(EfficientNetModel, self).__init__([ super(EfficientNetModel, self).__init__([
hub.KerasLayer(MODEL_URL, trainable=False) # EfficientNet V2 S backbone, frozen weights hub.KerasLayer(MODEL_URL, trainable=False) # EfficientNet V2 S backbone, frozen weights
]) ])
self.build(MODEL_INPUT_SIZE) self.build(TARGET_SHAPE + (3,))
def compile(self, metrics=['accuracy'], **kwargs): def compile(self, metrics=['accuracy'], **kwargs):
super().compile(metrics=metrics, **kwargs) super().compile(metrics=metrics, **kwargs)
def fit(self, x=None, y=None, callbacks=[tensorboard_cb], **kwargs): def fit(self, x=None, y=None, callbacks=[tensorboard_cb], **kwargs):
return super().fit(x=x, y=y, callbacks=callbacks, **kwargs) return super().fit(x=x, y=y, callbacks=callbacks, **kwargs)
@staticmethod
def preprocess_input(image, label):
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, TARGET_SHAPE, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
return image, label
...@@ -4,18 +4,27 @@ from tensorflow.keras import layers, callbacks, datasets, Sequential ...@@ -4,18 +4,27 @@ from tensorflow.keras import layers, callbacks, datasets, Sequential
tensorboard_cb = callbacks.TensorBoard(get_logdir("vit/fit")) tensorboard_cb = callbacks.TensorBoard(get_logdir("vit/fit"))
BATCH_SIZE = 256
TARGET_SHAPE = (224, 224)
MODEL_URL = "https://tfhub.dev/sayakpaul/vit_s16_fe/1" MODEL_URL = "https://tfhub.dev/sayakpaul/vit_s16_fe/1"
MODEL_INPUT_SIZE = [None, 224, 224, 3]
class VitModel(Sequential): class VitModel(Sequential):
def __init__(self): def __init__(self):
super(VitModel, self).__init__([ super(VitModel, self).__init__([
hub.KerasLayer(MODEL_URL, trainable=False) # EfficientNet V2 S backbone, frozen weights hub.KerasLayer(MODEL_URL, trainable=False) # EfficientNet V2 S backbone, frozen weights
]) ])
self.build(MODEL_INPUT_SIZE) self.build(TARGET_SHAPE + (3,))
def compile(self, metrics=['accuracy'], **kwargs): def compile(self, metrics=['accuracy'], **kwargs):
super().compile(metrics=metrics, **kwargs) super().compile(metrics=metrics, **kwargs)
def fit(self, x=None, y=None, callbacks=[tensorboard_cb], **kwargs): def fit(self, x=None, y=None, callbacks=[tensorboard_cb], **kwargs):
return super().fit(x=x, y=y, callbacks=callbacks, **kwargs) return super().fit(x=x, y=y, callbacks=callbacks, **kwargs)
@staticmethod
def preprocess_input(image, label):
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize(image, TARGET_SHAPE, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
image = (image - 0.5) * 2 # ViT requires images in range [-1,1]
return image, label
import sys import sys
sys.path.append("..") sys.path.append("..")
import numpy as np
import tensorflow as tf import tensorflow as tf
from src.utils.embeddings import project_embeddings from src.data.cifar10 import load_dataset3, NUM_CLASSES
from src.utils.embeddings import project_embeddings, calc_vectors, save_embeddings
from src.utils.common import get_modeldir, get_datadir from src.utils.common import get_modeldir, get_datadir
from src.model.vit import VitModel, MODEL_INPUT_SIZE from src.model.vit import VitModel, TARGET_SHAPE, BATCH_SIZE
from src.model.siamese import SiameseModel from src.model.siamese import SiameseModel
import tensorflow_datasets as tfds
import pandas as pd
from tqdm import tqdm
from pathlib import Path
model_name = 'cifar10_vit' model_name = 'cifar10_vit'
embeddings_name = model_name + '_embeddings' embeddings_name = model_name + '_embeddings'
embedding_model = VitModel() train_ds, val_ds, test_ds = load_dataset3(image_size=TARGET_SHAPE, batch_size=BATCH_SIZE, preprocess_fn=VitModel.preprocess_input)
embedding_model.summary() comb_ds = train_ds.concatenate(val_ds).concatenate(test_ds)
# DATASET_NAME = 'cats_vs_dogs'
DATASET_NAME = 'cifar10'
# DATASET_NAME = 'cars196'
# NOTE: For cars196 & other datasets with many classes, the rejection resampling
# used to balance the positive and negative classes does NOT work anymore! (the input pipeline chokes)
# Need to find a better solution!
# -> FIX: Using class weights based on the number of labels in the original dataset seems to work perfectly well (and training speed improves greatly too)
# Load dataset in a form already consumable by Tensorflow
ds = tfds.load(DATASET_NAME, split='train')
# Resize images to the model's input size and normalize to [0.0, 1.0] as per the
# expected image input signature: https://www.tensorflow.org/hub/common_signatures/images#input
def resize_and_normalize(features):
return {
# 'id': features['id'],
'label': features['label'],
'image': (tf.image.resize(tf.image.convert_image_dtype(features['image'], tf.float32),
MODEL_INPUT_SIZE[1:3]) - 0.5) * 2 # ViT requires images in range [-1,1]
}
model = VitModel()
model.summary()
ds = ds.map(resize_and_normalize, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False) print('calculating embeddings...')
emb_vectors, emb_labels = calc_vectors(comb_ds, model)
save_embeddings(emb_vectors, emb_labels, embeddings_name)
# Add batch and prefetch to dataset to speed up processing # emb_vectors, emb_labels = load_embeddings(embeddings_name)
BATCH_SIZE = 256
batched_ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
# Dataset has keys "id" (that we ignore), "image" and "label".
# "image" has shape [BATCH_SIZE,32,32,3] and is an RGB uint8 image
# "label" has shape [BATCH_SIZE,1] and is an integer label (value between 0 and 9)
# Naming schema: <dataset_name>-<dataset_split>.<model-name>.embeddings.pickle
DST_FNAME = get_datadir('vit_s16_fe.embeddings.pkl')
if Path(DST_FNAME).exists():
# When you need to use the embeddings, upload the file (or store it on Drive and mount your drive folder in Colab), then run:
df = pd.read_pickle(DST_FNAME) # adapt the path as needed
embeddings = np.array(df.embedding.values.tolist())
labels = df.label.values
else:
embeddings = []
labels = []
for features_batch in tqdm(batched_ds):
embeddings.append(embedding_model(features_batch['image']).numpy())
labels.append(features_batch['label'].numpy())
embeddings = np.concatenate(embeddings)
labels = np.concatenate(labels)
# Store the precompued values to disk
df = pd.DataFrame({'embedding': embeddings.tolist(), 'label': labels})
df.to_pickle(DST_FNAME)
# Download the generated file to store the calculated embeddings.
NUM_CLASSES = np.unique(labels).shape[0]
# siamese is the model we train # siamese is the model we train
siamese = SiameseModel(embedding_vector_dimension=384, image_vector_dimensions=512) siamese = SiameseModel(embedding_vector_dimension=384, image_vector_dimensions=512)
siamese.compile(loss_margin=0.005, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)) siamese.compile(loss_margin=0.005, optimizer=tf.keras.optimizers.Adam(learning_rate=0.001))
siamese.summary() siamese.summary()
## Training hyperparameters (values selected randomly at the moment, would be easy to set up hyperparameter tuning wth Keras Tuner) ds = SiameseModel.prepare_dataset(emb_vectors, emb_labels)
## We have 128 pairs for each epoch, thus in total we will have 128 x 2 x 1000 images to give to the siamese history = siamese.fit(ds, class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES})
NUM_EPOCHS = 10
TRAIN_BATCH_SIZE = 128
STEPS_PER_EPOCH = 3000
ds = SiameseModel.prepare_dataset(embeddings, labels)
history = siamese.fit(
ds,
epochs=NUM_EPOCHS,
steps_per_epoch=STEPS_PER_EPOCH,
class_weight={0: 1 / NUM_CLASSES, 1: (NUM_CLASSES - 1) / NUM_CLASSES}
)
# Build full inference model (from image to image vector): # Build full inference model (from image to image vector):
inference_model = siamese.get_inference_model(embedding_model) inference_model = siamese.get_inference_model(model)
inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf', include_optimizer=False) inference_model.save(get_modeldir(model_name + '_inference.tf'), save_format='tf', include_optimizer=False)
# inference_model = tf.keras.models.load_model(get_modeldir(model_name + '_inference.tf'), compile=False) # inference_model = tf.keras.models.load_model(get_modeldir(model_name + '_inference.tf'), compile=False)
print('visualization') print('visualization')
# compute vectors of the images and their labels, store them in a tsv file for visualization # compute vectors of the images and their labels, store them in a tsv file for visualization
image_vectors = siamese.get_projection_model().predict(embeddings) projection_vectors = siamese.get_projection_model().predict(emb_vectors)
project_embeddings(image_vectors, labels, model_name) project_embeddings(projection_vectors, emb_labels, model_name + '_siamese')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment