Commit bc971be1 authored by Oleh Astappiev's avatar Oleh Astappiev
Browse files

feat: export all datasets

parent f7082cdb
import numpy as np import numpy as np
import _pickle as pickle import _pickle as pickle
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from src.utils.common import get_datadir, process_images from src.utils.common import get_datadir, process_images, process_images_couple
from tensorflow.keras import datasets from tensorflow.keras import datasets
from tensorflow import data from tensorflow import data
import tensorflow as tf
def cifar10_complete():
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
images = np.concatenate([train_images, test_images])
labels = np.concatenate([train_labels, test_labels])
return tf.data.Dataset.from_tensor_slices((images, labels))
def cifar10_complete_resized():
ds = cifar10_complete()
return ds.map(process_images_couple).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
def shuffle_arrays(arrays, set_seed=-1): def shuffle_arrays(arrays, set_seed=-1):
"""Shuffles arrays in-place, in the same order, along axis=0 """Shuffles arrays in-place, in the same order, along axis=0
......
import numpy as np import numpy as np
import _pickle as pickle import _pickle as pickle
from keras import Model from keras import Model
import tensorflow as tf from src.data.cifar10 import cifar10_complete_resized
from tensorflow.keras import datasets from src.utils.common import get_datadir
from src.utils.common import process_images_couple, get_datadir
def calc_embeddings(alexnet): def calc_embeddings(alexnet):
# remove the last layer # remove the last layer
embedding_model = Model(inputs=alexnet.input, outputs=alexnet.layers[-2].output) embedding_model = Model(inputs=alexnet.input, outputs=alexnet.layers[-2].output)
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data() embedding_vds = cifar10_complete_resized().batch(batch_size=32, drop_remainder=False)
embedding_images = np.concatenate([train_images, test_images])
embedding_labels = np.concatenate([train_labels, test_labels])
embedding_vds = tf.data.Dataset.from_tensor_slices((embedding_images, embedding_labels))
embedding_vds = (embedding_vds.map(process_images_couple).batch(batch_size=32, drop_remainder=False))
print('predicting embeddings') print('predicting embeddings')
embeddings = embedding_model.predict(embedding_vds) embeddings = embedding_model.predict(embedding_vds)
print('done') embedding_labels = np.concatenate([y for x, y in embedding_vds], axis=0)
return embeddings, embedding_labels return embeddings, embedding_labels
# # zip together embeddings and their labels, cache in memory (maybe not necessay or maybe faster this way), shuffle, repeat forever.
# embeddings_ds = tf.data.Dataset.zip((
# tf.data.Dataset.from_tensor_slices(embeddings),
# tf.data.Dataset.from_tensor_slices(embedding_labels)
# ))
def save_embeddings(embeddings, labels): def save_embeddings(embeddings, labels):
data = [embeddings, labels] data = [embeddings, labels]
......
...@@ -4,44 +4,35 @@ sys.path.append("..") ...@@ -4,44 +4,35 @@ sys.path.append("..")
import csv import csv
from src.utils.hsv import * from src.utils.hsv import *
from src.utils.sift import * from src.utils.sift import *
import tensorflow as tf
from utils.common import * from utils.common import *
from utils.distance import * from src.data.cifar10 import *
from src.data.embeddings import * from tensorflow.keras import models
from src.model.alexnet import AlexNetModel
from tensorflow.keras import layers, Model, models, datasets
# Load dataset cifar10_vds = cifar10_complete_resized()
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()
cifar10_images = np.concatenate([train_images, test_images])
cifar10_labels = np.concatenate([train_labels, test_labels])
cifar10_vds = tf.data.Dataset.from_tensor_slices((cifar10_images, cifar10_labels))
def export_hsv(bin0=256, bin1=256, bin2=256): def export_hsv(bin0=256, bin1=256, bin2=256):
header = ['ID', 'Label', 'HSV vector'] header = ['ID', 'Label', 'HSV vector']
with open('../data/hsv_' + str(features) + '.csv', 'w', encoding='UTF8', newline='') as f: with open('../data/hsv_' + str(bin0) + '.csv', 'w', encoding='UTF8', newline='') as f:
writer = csv.writer(f, delimiter=";") writer = csv.writer(f, delimiter=";")
# write the header # write the header
writer.writerow(header) writer.writerow(header)
for i, (image, label) in enumerate(cifar10_vds): for i, (image, label) in enumerate(cifar10_vds):
img = process_images(image).numpy() a, b, c, hist_array = extract_hsv(image.numpy(), bin0, bin1, bin2)
a, b, c, hist_array = extract_hsv(img, bin0, bin1, bin2)
label_str = ','.join(map(str, label.numpy())) label_str = ','.join(map(str, label.numpy()))
value_str = ','.join(map(str, hist_array)) value_str = ','.join(map(str, hist_array))
writer.writerow([i, label_str, value_str]) writer.writerow([i, label_str, value_str])
def export_sift(features=8): def export_sift(nfeatures=8):
header = ['ID', 'Label', 'SIFT descriptors'] header = ['ID', 'Label', 'SIFT descriptors']
with open('../data/sift_' + str(features) + '.csv', 'w', encoding='UTF8', newline='') as f: with open('../data/sift_' + str(nfeatures) + '.csv', 'w', encoding='UTF8', newline='') as f:
writer = csv.writer(f, delimiter=";") writer = csv.writer(f, delimiter=";")
# write the header # write the header
writer.writerow(header) writer.writerow(header)
for i, (image, label) in enumerate(cifar10_vds): for i, (image, label) in enumerate(cifar10_vds):
img = process_images(image).numpy() keypoints, features = extract_sift(image.numpy(), nfeatures)
keypoints, features = extract_sift(img, features)
label_str = ','.join(map(str, label.numpy())) label_str = ','.join(map(str, label.numpy()))
if features is not None: if features is not None:
value_str = ','.join(map(str, features.flatten())) value_str = ','.join(map(str, features.flatten()))
...@@ -58,21 +49,30 @@ def export_embeddings(): ...@@ -58,21 +49,30 @@ def export_embeddings():
# write the header # write the header
writer.writerow(header) writer.writerow(header)
seamese = models.load_model(get_modeldir('seamese_cifar10.tf')) seamese = models.load_model(get_modeldir('seamese_cifar10_512.tf'))
embedding_vds = (cifar10_vds.batch(batch_size=32, drop_remainder=False))
embedding_vds = (cifar10_vds.map(process_images_couple).batch(batch_size=32, drop_remainder=False))
print('predicting embeddings') print('predicting embeddings')
embeddings = seamese.predict(embedding_vds) embeddings = seamese.predict(embedding_vds)
embeddings_labels = np.concatenate([y for x, y in embedding_vds], axis=0)
print('embeddings done') print('embeddings done')
for i, (label) in enumerate(cifar10_labels): for i, (label) in enumerate(embeddings_labels):
label_str = ','.join(map(str, label)) label_str = ','.join(map(str, label))
value_str = ','.join(map(str, embeddings[i])) value_str = ','.join(map(str, embeddings[i]))
writer.writerow([i, label_str, value_str]) writer.writerow([i, label_str, value_str])
# hsv 170, 171, 171 # HSV
# 512, 1024, 2048, 4096 # export_hsv(170, 171, 171) # 512
# export_hsv() # export_hsv(340, 342, 342) # 1024
# export_sift() # export_hsv(682, 683, 683) # 2048
# export_hsv(1366, 1365, 1365) # 4096
# SIFT
# export_sift(4)
# export_sift(8)
# export_sift(16)
# export_sift(32)
# Siamese Embeddings
export_embeddings() export_embeddings()
print('done') print('done')
import sys import sys
sys.path.append("..") sys.path.append("..")
from src.utils.hsv import *
from src.utils.sift import * from src.utils.sift import *
from src.data.cifar10 import *
import tensorflow as tf import tensorflow as tf
from tensorflow.keras import datasets
# Load dataset # Load dataset
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data() cifar10_vds = cifar10_complete()
cifar10_images = np.concatenate([train_images, test_images])
cifar10_labels = np.concatenate([train_labels, test_labels])
cifar10_vds = tf.data.Dataset.from_tensor_slices((cifar10_images, cifar10_labels))
def print_resized(dataset): def print_resized(dataset):
plt.figure(figsize=(20, 20)) plt.figure(figsize=(20, 20))
......
...@@ -10,6 +10,7 @@ from tensorflow.keras import layers, Model ...@@ -10,6 +10,7 @@ from tensorflow.keras import layers, Model
alexnet = AlexNetModel() alexnet = AlexNetModel()
alexnet.compile() alexnet.compile()
alexnet.load_weights(get_modeldir('alexnet_cifar10.h5')) alexnet.load_weights(get_modeldir('alexnet_cifar10.h5'))
alexnet = Model(inputs=alexnet.input, outputs=alexnet.layers[-2].output)
for layer in alexnet.layers: for layer in alexnet.layers:
layer.trainable = False layer.trainable = False
...@@ -79,4 +80,4 @@ embedding = alexnet(im_input) ...@@ -79,4 +80,4 @@ embedding = alexnet(im_input)
image_vector = projection_model(embedding) image_vector = projection_model(embedding)
inference_model = Model(inputs=im_input, outputs=image_vector) inference_model = Model(inputs=im_input, outputs=image_vector)
inference_model.save(get_modeldir('seamese_cifar10.tf'), save_format='tf', include_optimizer=False) inference_model.save(get_modeldir('seamese_cifar10_' + str(IMAGE_VECTOR_DIMENSIONS) + '.tf'), save_format='tf', include_optimizer=False)
...@@ -6,11 +6,9 @@ from tqdm import tqdm ...@@ -6,11 +6,9 @@ from tqdm import tqdm
sys.path.append("..") sys.path.append("..")
from utils.common import * from utils.common import *
from data.cifar10_tuples import * from src.data.cifar10 import *
from utils.distance import *
from src.data.embeddings import * from src.data.embeddings import *
from src.model.alexnet import AlexNetModel from tensorflow.keras import layers
from tensorflow.keras import layers, Model
def write_embeddings_for_tensorboard(image_vectors: list, labels: list , root_dir: Path): def write_embeddings_for_tensorboard(image_vectors: list, labels: list , root_dir: Path):
import csv import csv
...@@ -37,17 +35,13 @@ def write_embeddings_for_tensorboard(image_vectors: list, labels: list , root_di ...@@ -37,17 +35,13 @@ def write_embeddings_for_tensorboard(image_vectors: list, labels: list , root_di
embedding.tensor_path = 'values.tsv' embedding.tensor_path = 'values.tsv'
projector.visualize_embeddings(root_dir, config) projector.visualize_embeddings(root_dir, config)
inference_model = tf.keras.models.load_model(get_modeldir('seamese_cifar10.tf'), compile=False) inference_model = tf.keras.models.load_model(get_modeldir('seamese_cifar10_512.tf'), compile=False)
NUM_SAMPLES_TO_DISPLAY = 10000 NUM_SAMPLES_TO_DISPLAY = 10000
LOG_DIR=Path('../logs') LOG_DIR=Path('../logs')
LOG_DIR.mkdir(exist_ok=True, parents=True) LOG_DIR.mkdir(exist_ok=True, parents=True)
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data() embedding_vds = cifar10_complete()
embedding_images = np.concatenate([train_images, test_images])
embedding_labels = np.concatenate([train_labels, test_labels])
embedding_vds = tf.data.Dataset.from_tensor_slices((embedding_images, embedding_labels))
val_ds = (embedding_vds val_ds = (embedding_vds
.shuffle(500, seed=42) .shuffle(500, seed=42)
.take(NUM_SAMPLES_TO_DISPLAY) .take(NUM_SAMPLES_TO_DISPLAY)
......
...@@ -9,7 +9,6 @@ def extract_hsv(image, bin0=256, bin1=256, bin2=256): ...@@ -9,7 +9,6 @@ def extract_hsv(image, bin0=256, bin1=256, bin2=256):
"""Extract a 3 color channels histogram from the HSV""" """Extract a 3 color channels histogram from the HSV"""
hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
# The ranges of the 3 HSV channels in opencv are 0-180, 0-256, 0-256 respectively # The ranges of the 3 HSV channels in opencv are 0-180, 0-256, 0-256 respectively
# Bins is set to 1365, so that each picture can be represented by a 4000-dimensional vector
histh = cv2.calcHist([hsv], [0], None, [bin0], [0, 180]) histh = cv2.calcHist([hsv], [0], None, [bin0], [0, 180])
hists = cv2.calcHist([hsv], [1], None, [bin1], [0, 256]) hists = cv2.calcHist([hsv], [1], None, [bin1], [0, 256])
histv = cv2.calcHist([hsv], [2], None, [bin2], [0, 256]) histv = cv2.calcHist([hsv], [2], None, [bin2], [0, 256])
......
...@@ -4,9 +4,9 @@ import cv2 ...@@ -4,9 +4,9 @@ import cv2
from src.utils.common import * from src.utils.common import *
def extract_sift(image, features=500): def extract_sift(image, nfeatures=None):
# the result number of features is the number of keypoints * 128 # the result number of features is the number of keypoints * 128
sift = cv2.SIFT_create(features) sift = cv2.SIFT_create(nfeatures)
# Calculate the keypoint and each point description of the image # Calculate the keypoint and each point description of the image
keypoints, features = sift.detectAndCompute(image, None) keypoints, features = sift.detectAndCompute(image, None)
return keypoints, features return keypoints, features
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment