Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Oleh Astappiev
Near-Similar Image Recognition
Commits
aa738388
Commit
aa738388
authored
May 25, 2022
by
Oleh Astappiev
Browse files
feat: use classes for daasets
parent
195641ef
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
333 additions
and
567 deletions
+333
-567
src/alexnet.py
src/alexnet.py
+15
-47
src/alexnet_cifar10.py
src/alexnet_cifar10.py
+0
-62
src/data/base.py
src/data/base.py
+64
-0
src/data/cifar10.py
src/data/cifar10.py
+37
-45
src/data/imagenette.py
src/data/imagenette.py
+37
-45
src/data/simple3.py
src/data/simple3.py
+28
-36
src/efficientnet.py
src/efficientnet.py
+15
-30
src/embeddings.py
src/embeddings.py
+4
-10
src/mobilenet.py
src/mobilenet.py
+16
-47
src/mobilenet_cifar10.py
src/mobilenet_cifar10.py
+0
-60
src/model/alexnet.py
src/model/alexnet.py
+1
-1
src/model/mobilenet.py
src/model/mobilenet.py
+1
-1
src/model/siamese.py
src/model/siamese.py
+24
-44
src/model/vgg16.py
src/model/vgg16.py
+1
-1
src/simple.py
src/simple.py
+0
-87
src/utils/embeddings.py
src/utils/embeddings.py
+44
-1
src/utils/hsv.py
src/utils/hsv.py
+1
-1
src/utils/plot.py
src/utils/plot.py
+28
-0
src/utils/sift.py
src/utils/sift.py
+1
-1
src/vgg16.py
src/vgg16.py
+16
-48
No files found.
src/alexnet.py
View file @
aa738388
import
sys
sys
.
path
.
append
(
".."
)
import
tensorflow
as
tf
from
src.data.imagenette
import
load_dataset3
,
NUM_CLASSES
from
src.utils.embeddings
import
save_embeddings
,
load_embeddings
,
project_embeddings
,
calc_vectors
from
src.utils.common
import
get_modeldir
from
src.model.alexnet
import
AlexNetModel
,
TARGET_SHAPE
,
EMBEDDING_VECTOR_DIMENSION
from
src.model.alexnet
import
AlexNetModel
,
TARGET_SHAPE
from
src.data.imagenette
import
Imagenette
from
src.data.cifar10
import
Cifar10
from
src.utils.embeddings
import
project_embeddings
,
load_weights_of
,
get_embeddings_of
,
save_embeddings
from
src.model.siamese
import
SiameseModel
model_name
=
'i
magenette
_a
lex
n
et
'
embeddings_name
=
model_name
+
'_embeddings'
dataset
=
I
magenette
(
image_size
=
TARGET_SHAPE
,
map_fn
=
A
lex
N
et
Model
.
preprocess_input
)
# dataset = Cifar10(image_size=TARGET_SHAPE, map_fn=AlexNetModel.preprocess_input)
train_ds
,
val_ds
,
test_ds
=
load_dataset3
(
image_size
=
TARGET_SHAPE
,
map_fn
=
AlexNetModel
.
preprocess_input
)
comb_ds
=
train_ds
.
concatenate
(
val_ds
).
concatenate
(
test_ds
)
# create model
model
=
AlexNetModel
()
model
.
compile
()
model
.
summary
()
# load weights
# model.load_weights(get_modeldir(model_name + '.h5'))
# train & save model
model
.
fit
(
train_ds
,
validation_data
=
val_ds
)
model
.
save_weights
(
get_modeldir
(
model_name
+
'.h5'
))
# evaluate
print
(
'evaluating...'
)
model
.
evaluate
(
test_ds
)
print
(
'calculating embeddings...'
)
embedding_model
=
model
.
get_embedding_model
()
embedding_model
.
summary
()
load_weights_of
(
model
,
dataset
)
emb_vectors
,
emb_labels
=
calc_vectors
(
comb_ds
,
embedding_model
)
save_embeddings
(
emb_vectors
,
emb_labels
,
embeddings_name
)
emb_vectors
,
emb_labels
=
get_embeddings_of
(
model
.
get_
embedding_model
(),
dataset
)
emb_ds
=
SiameseModel
.
prepare_dataset
(
emb_vectors
,
emb_labels
)
# emb_vectors, emb_labels = load_embeddings(embeddings_name)
# siamese is the model we train
siamese
=
SiameseModel
(
embedding_vector_dimension
=
EMBEDDING_VECTOR_DIMENSION
,
image_vector_dimensions
=
3
)
siamese
=
SiameseModel
(
embedding_model
=
model
.
get_embedding_model
(),
image_vector_dimensions
=
512
)
siamese
.
compile
(
loss_margin
=
0.05
)
siamese
.
summary
()
ds
=
SiameseModel
.
prepare_dataset
(
emb_vectors
,
emb_labels
)
history
=
siamese
.
fit
(
ds
,
class_weight
=
{
0
:
1
/
NUM_CLASSES
,
1
:
(
NUM_CLASSES
-
1
)
/
NUM_CLASSES
})
# Build full inference model (from image to image vector):
inference_model
=
siamese
.
get_inference_model
(
embedding_model
)
inference_model
.
save
(
get_modeldir
(
model_name
+
'_inference.tf'
))
# inference_model = tf.keras.models.load_model(get_modeldir(model_name + '_inference.tf'), compile=False)
siamese
.
fit
(
emb_ds
,
num_classes
=
dataset
.
num_classes
)
pr
int
(
'visualization'
)
# compute vectors of the images and their labels, store them in a tsv file for visualization
project
ion_vectors
=
siamese
.
get_projection_model
().
predict
(
emb_vectors
)
pr
oject_embeddings
(
projection_vectors
,
emb_labels
,
model_name
+
'_siamese
'
)
pr
ojection_vectors
=
siamese
.
projection_model
.
predict
(
emb_vectors
)
save_embeddings
(
projection_vectors
,
emb_labels
,
dataset
.
name
+
'_'
+
siamese
.
name
+
'_vectors'
)
project
_embeddings
(
projection_vectors
,
emb_labels
,
siamese
.
name
+
'_'
+
dataset
.
name
)
pr
int
(
'Done!
'
)
src/alexnet_cifar10.py
deleted
100644 → 0
View file @
195641ef
import
sys
sys
.
path
.
append
(
".."
)
import
tensorflow
as
tf
from
src.data.cifar10
import
load_dataset3
,
NUM_CLASSES
from
src.utils.embeddings
import
save_embeddings
,
load_embeddings
,
project_embeddings
,
calc_vectors
from
src.utils.common
import
get_modeldir
from
src.model.alexnet
import
AlexNetModel
,
TARGET_SHAPE
,
EMBEDDING_VECTOR_DIMENSION
from
src.model.siamese
import
SiameseModel
model_name
=
'cifar10_alexnet'
embeddings_name
=
model_name
+
'_embeddings'
train_ds
,
val_ds
,
test_ds
=
load_dataset3
(
image_size
=
TARGET_SHAPE
,
map_fn
=
AlexNetModel
.
preprocess_input
)
comb_ds
=
train_ds
.
concatenate
(
val_ds
).
concatenate
(
test_ds
)
# create model
model
=
AlexNetModel
()
model
.
compile
()
model
.
summary
()
# load weights
# model.load_weights(get_modeldir(model_name + '.h5'))
# train & save model
model
.
fit
(
train_ds
,
validation_data
=
val_ds
)
model
.
save_weights
(
get_modeldir
(
model_name
+
'.h5'
))
# evaluate
print
(
'evaluating...'
)
model
.
evaluate
(
test_ds
)
print
(
'calculating embeddings...'
)
embedding_model
=
model
.
get_embedding_model
()
embedding_model
.
summary
()
emb_vectors
,
emb_labels
=
calc_vectors
(
comb_ds
,
embedding_model
)
save_embeddings
(
emb_vectors
,
emb_labels
,
embeddings_name
)
# emb_vectors, emb_labels = load_embeddings(embeddings_name)
# siamese is the model we train
siamese
=
SiameseModel
(
embedding_vector_dimension
=
EMBEDDING_VECTOR_DIMENSION
,
image_vector_dimensions
=
3
)
siamese
.
compile
(
loss_margin
=
0.05
)
siamese
.
summary
()
ds
=
SiameseModel
.
prepare_dataset
(
emb_vectors
,
emb_labels
)
history
=
siamese
.
fit
(
ds
,
class_weight
=
{
0
:
1
/
NUM_CLASSES
,
1
:
(
NUM_CLASSES
-
1
)
/
NUM_CLASSES
})
# Build full inference model (from image to image vector):
inference_model
=
siamese
.
get_inference_model
(
embedding_model
)
inference_model
.
save
(
get_modeldir
(
model_name
+
'_inference.tf'
))
# inference_model = tf.keras.models.load_model(get_modeldir(model_name + '_inference.tf'), compile=False)
print
(
'visualization'
)
# compute vectors of the images and their labels, store them in a tsv file for visualization
siamese_vectors
,
siamese_labels
=
calc_vectors
(
comb_ds
,
inference_model
)
project_embeddings
(
siamese_vectors
,
siamese_labels
,
model_name
+
'_siamese'
)
projection_vectors
=
siamese
.
get_projection_model
().
predict
(
emb_vectors
)
project_embeddings
(
projection_vectors
,
emb_labels
,
model_name
+
'_siamese2'
)
src/data/base.py
0 → 100644
View file @
aa738388
from
abc
import
ABC
,
abstractmethod
from
typing
import
Tuple
,
Callable
,
List
PRINT_SIZE
=
True
DEFAULT_BATCH_SIZE
=
32
class
BaseDataset
(
ABC
):
def
__init__
(
self
,
name
:
str
,
classes
:
List
[
str
],
image_size
:
Tuple
[
int
,
int
],
batch_size
:
int
=
DEFAULT_BATCH_SIZE
,
map_fn
:
Callable
=
None
):
self
.
name
=
name
self
.
classes
=
classes
self
.
num_classes
=
len
(
classes
)
self
.
_image_size
=
image_size
self
.
_batch_size
=
batch_size
self
.
_map_fn
=
map_fn
self
.
_train_ds
=
None
self
.
_val_ds
=
None
self
.
_test_ds
=
None
def
get_classes
(
self
):
return
self
.
classes
def
get_num_classes
(
self
):
return
len
(
self
.
classes
)
def
get_train
(
self
):
if
self
.
_train_ds
is
None
:
self
.
__load
()
return
self
.
_train_ds
def
get_val
(
self
):
if
self
.
_val_ds
is
None
:
self
.
__load
()
return
self
.
_val_ds
def
get_test
(
self
):
if
self
.
_test_ds
is
None
:
self
.
__load
()
return
self
.
_test_ds
def
get_combined
(
self
):
return
self
.
get_train
().
concatenate
(
self
.
get_val
()).
concatenate
(
self
.
get_test
())
def
__load
(
self
):
args
=
self
.
_load_dataset
(
self
.
_image_size
,
self
.
_batch_size
,
self
.
_map_fn
)
train_ds
,
val_ds
,
test_ds
=
self
.
_split_dataset
(
*
args
)
self
.
_train_ds
=
train_ds
self
.
_val_ds
=
val_ds
self
.
_test_ds
=
test_ds
if
PRINT_SIZE
:
print
(
self
.
name
,
"dataset loaded"
)
print
(
"Training size:"
,
train_ds
.
cardinality
().
numpy
())
print
(
"Validation size:"
,
val_ds
.
cardinality
().
numpy
())
print
(
"Evaluation size:"
,
test_ds
.
cardinality
().
numpy
())
@
abstractmethod
def
_load_dataset
(
self
,
image_size
,
batch_size
,
map_fn
):
pass
@
abstractmethod
def
_split_dataset
(
self
,
*
args
):
pass
src/data/cifar10.py
View file @
aa738388
import
tensorflow
as
tf
from
src.data.base
import
BaseDataset
,
DEFAULT_BATCH_SIZE
BATCH_SIZE
=
32
IMAGE_SIZE
=
(
32
,
32
)
NUM_CLASSES
=
10
DEFAULT_IMAGE_SIZE
=
(
32
,
32
)
CLASS_NAMES
=
[
'airplane'
,
'automobile'
,
'bird'
,
'cat'
,
'deer'
,
'dog'
,
'frog'
,
'horse'
,
'ship'
,
'truck'
]
def
load_dataset
(
image_size
=
IMAGE_SIZE
,
batch_size
=
BATCH_SIZE
,
map_fn
=
None
):
train_ds
=
tf
.
keras
.
utils
.
image_dataset_from_directory
(
directory
=
'../datasets/cifar10/train/'
,
labels
=
'inferred'
,
label_mode
=
'int'
,
batch_size
=
batch_size
,
image_size
=
image_size
,
interpolation
=
'nearest'
)
test_ds
=
tf
.
keras
.
utils
.
image_dataset_from_directory
(
directory
=
'../datasets/cifar10/test/'
,
labels
=
'inferred'
,
label_mode
=
'int'
,
batch_size
=
batch_size
,
image_size
=
image_size
,
shuffle
=
False
,
interpolation
=
'nearest'
)
if
map_fn
is
not
None
:
train_ds
=
train_ds
.
map
(
map_fn
).
prefetch
(
tf
.
data
.
AUTOTUNE
)
test_ds
=
test_ds
.
map
(
map_fn
).
prefetch
(
tf
.
data
.
AUTOTUNE
)
return
train_ds
,
test_ds
def
load_dataset3
(
image_size
=
IMAGE_SIZE
,
batch_size
=
BATCH_SIZE
,
map_fn
=
None
):
train_ds
,
test_ds
=
load_dataset
(
image_size
=
image_size
,
batch_size
=
batch_size
,
map_fn
=
map_fn
)
train_ds_size
=
tf
.
data
.
experimental
.
cardinality
(
train_ds
).
numpy
()
train_ds
=
train_ds
.
skip
(
train_ds_size
/
10
)
val_ds
=
train_ds
.
take
(
train_ds_size
/
10
)
if
True
:
print
(
"CIFAR10 dataset loaded"
)
print
(
"Training data size:"
,
tf
.
data
.
experimental
.
cardinality
(
train_ds
).
numpy
())
print
(
"Validation data size:"
,
tf
.
data
.
experimental
.
cardinality
(
val_ds
).
numpy
())
print
(
"Evaluation data size:"
,
tf
.
data
.
experimental
.
cardinality
(
test_ds
).
numpy
())
return
train_ds
,
val_ds
,
test_ds
class
Cifar10
(
BaseDataset
):
def
__init__
(
self
,
image_size
=
DEFAULT_IMAGE_SIZE
,
batch_size
=
DEFAULT_BATCH_SIZE
,
map_fn
=
None
):
super
(
Cifar10
,
self
).
__init__
(
name
=
'cifar10'
,
classes
=
CLASS_NAMES
,
image_size
=
image_size
,
batch_size
=
batch_size
,
map_fn
=
map_fn
)
def
_load_dataset
(
self
,
image_size
,
batch_size
,
map_fn
):
train_ds
=
tf
.
keras
.
utils
.
image_dataset_from_directory
(
directory
=
'../datasets/cifar10/train/'
,
labels
=
'inferred'
,
label_mode
=
'int'
,
batch_size
=
batch_size
,
image_size
=
image_size
,
interpolation
=
'nearest'
)
test_ds
=
tf
.
keras
.
utils
.
image_dataset_from_directory
(
directory
=
'../datasets/cifar10/test/'
,
labels
=
'inferred'
,
label_mode
=
'int'
,
batch_size
=
batch_size
,
image_size
=
image_size
,
shuffle
=
False
,
interpolation
=
'nearest'
)
if
map_fn
is
not
None
:
train_ds
=
train_ds
.
map
(
map_fn
).
prefetch
(
tf
.
data
.
AUTOTUNE
)
test_ds
=
test_ds
.
map
(
map_fn
).
prefetch
(
tf
.
data
.
AUTOTUNE
)
return
train_ds
,
test_ds
def
_split_dataset
(
self
,
train_ds
,
test_ds
):
train_ds_size
=
train_ds
.
cardinality
().
numpy
()
train_ds
=
train_ds
.
skip
(
train_ds_size
/
10
)
val_ds
=
train_ds
.
take
(
train_ds_size
/
10
)
return
train_ds
,
val_ds
,
test_ds
\ No newline at end of file
src/data/imagenette.py
View file @
aa738388
import
tensorflow
as
tf
from
src.data.base
import
BaseDataset
,
DEFAULT_BATCH_SIZE
BATCH_SIZE
=
32
IMAGE_SIZE
=
(
224
,
224
)
NUM_CLASSES
=
10
DEFAULT_IMAGE_SIZE
=
(
400
,
320
)
CLASS_NAMES
=
[
'fish'
,
'dog'
,
'player'
,
'saw'
,
'building'
,
'music'
,
'truck'
,
'gas'
,
'ball'
,
'parachute'
]
def
load_dataset
(
image_size
=
IMAGE_SIZE
,
batch_size
=
BATCH_SIZE
,
map_fn
=
None
):
train_ds
=
tf
.
keras
.
utils
.
image_dataset_from_directory
(
directory
=
'../datasets/imagenette2/train/'
,
labels
=
'inferred'
,
label_mode
=
'int'
,
batch_size
=
batch_size
,
image_size
=
image_size
,
interpolation
=
'nearest'
)
test_ds
=
tf
.
keras
.
utils
.
image_dataset_from_directory
(
directory
=
'../datasets/imagenette2/val/'
,
labels
=
'inferred'
,
label_mode
=
'int'
,
batch_size
=
batch_size
,
image_size
=
image_size
,
shuffle
=
False
,
interpolation
=
'nearest'
)
if
map_fn
is
not
None
:
train_ds
=
train_ds
.
map
(
map_fn
).
prefetch
(
tf
.
data
.
AUTOTUNE
)
test_ds
=
test_ds
.
map
(
map_fn
).
prefetch
(
tf
.
data
.
AUTOTUNE
)
return
train_ds
,
test_ds
def
load_dataset3
(
image_size
=
IMAGE_SIZE
,
batch_size
=
BATCH_SIZE
,
map_fn
=
None
):
train_ds
,
test_ds
=
load_dataset
(
image_size
=
image_size
,
batch_size
=
batch_size
,
map_fn
=
map_fn
)
test_ds_size
=
tf
.
data
.
experimental
.
cardinality
(
test_ds
).
numpy
()
val_ds
=
test_ds
.
take
(
test_ds_size
/
2
)
test_ds
=
test_ds
.
skip
(
test_ds_size
/
2
)
if
True
:
print
(
"Imagenette dataset loaded"
)
print
(
"Training data size:"
,
tf
.
data
.
experimental
.
cardinality
(
train_ds
).
numpy
())
print
(
"Validation data size:"
,
tf
.
data
.
experimental
.
cardinality
(
val_ds
).
numpy
())
print
(
"Evaluation data size:"
,
tf
.
data
.
experimental
.
cardinality
(
test_ds
).
numpy
())
return
train_ds
,
val_ds
,
test_ds
class
Imagenette
(
BaseDataset
):
def
__init__
(
self
,
image_size
=
DEFAULT_IMAGE_SIZE
,
batch_size
=
DEFAULT_BATCH_SIZE
,
map_fn
=
None
):
super
(
Imagenette
,
self
).
__init__
(
name
=
'imagenette'
,
classes
=
CLASS_NAMES
,
image_size
=
image_size
,
batch_size
=
batch_size
,
map_fn
=
map_fn
)
def
_load_dataset
(
self
,
image_size
,
batch_size
,
map_fn
):
train_ds
=
tf
.
keras
.
utils
.
image_dataset_from_directory
(
directory
=
'../datasets/imagenette2/train/'
,
labels
=
'inferred'
,
label_mode
=
'int'
,
batch_size
=
batch_size
,
image_size
=
image_size
,
interpolation
=
'nearest'
)
test_ds
=
tf
.
keras
.
utils
.
image_dataset_from_directory
(
directory
=
'../datasets/imagenette2/val/'
,
labels
=
'inferred'
,
label_mode
=
'int'
,
batch_size
=
batch_size
,
image_size
=
image_size
,
shuffle
=
False
,
interpolation
=
'nearest'
)
if
map_fn
is
not
None
:
train_ds
=
train_ds
.
map
(
map_fn
).
prefetch
(
tf
.
data
.
AUTOTUNE
)
test_ds
=
test_ds
.
map
(
map_fn
).
prefetch
(
tf
.
data
.
AUTOTUNE
)
return
train_ds
,
test_ds
def
_split_dataset
(
self
,
train_ds
,
test_ds
):
test_ds_size
=
test_ds
.
cardinality
().
numpy
()
val_ds
=
test_ds
.
take
(
test_ds_size
/
2
)
test_ds
=
test_ds
.
skip
(
test_ds_size
/
2
)
return
train_ds
,
val_ds
,
test_ds
src/data/simple3.py
View file @
aa738388
import
tensorflow
as
tf
from
src.data.base
import
BaseDataset
BATCH_SIZE
=
6
IMAGE_SIZE
=
(
400
,
320
)
NUM_CLASSES
=
3
DEFAULT_BATCH_SIZE
=
6
DEFAULT_IMAGE_SIZE
=
(
400
,
320
)
CLASS_NAMES
=
[
'building'
,
'dog'
,
'player'
]
def
load_dataset
(
image_size
=
IMAGE_SIZE
,
batch_size
=
BATCH_SIZE
,
map_fn
=
None
):
ds
=
tf
.
keras
.
utils
.
image_dataset_from_directory
(
directory
=
'../datasets/simple3/'
,
labels
=
'inferred'
,
label_mode
=
'int'
,
batch_size
=
batch_size
,
image_size
=
image_size
,
interpolation
=
'nearest'
)
if
map_fn
is
not
None
:
ds
=
ds
.
map
(
map_fn
).
prefetch
(
tf
.
data
.
AUTOTUNE
)
return
ds
def
load_dataset3
(
image_size
=
IMAGE_SIZE
,
batch_size
=
BATCH_SIZE
,
map_fn
=
None
):
ds
=
load_dataset
(
image_size
=
image_size
,
batch_size
=
batch_size
,
map_fn
=
map_fn
)
ds_size
=
tf
.
data
.
experimental
.
cardinality
(
ds
).
numpy
()
train_ds
=
ds
.
take
(
ds_size
*
0.6
)
val_ds
=
ds
.
skip
(
ds_size
*
0.6
).
take
(
ds_size
*
0.2
)
test_ds
=
ds
.
skip
(
ds_size
*
0.6
).
skip
(
ds_size
*
0.2
)
if
True
:
print
(
"Simple 3 dataset loaded"
)
print
(
"Total dataset size:"
,
ds_size
)
print
(
"Training data size:"
,
tf
.
data
.
experimental
.
cardinality
(
train_ds
).
numpy
())
print
(
"Validation data size:"
,
tf
.
data
.
experimental
.
cardinality
(
val_ds
).
numpy
())
print
(
"Evaluation data size:"
,
tf
.
data
.
experimental
.
cardinality
(
test_ds
).
numpy
())
return
train_ds
,
val_ds
,
test_ds
class
Simple3
(
BaseDataset
):
def
__init__
(
self
,
image_size
=
DEFAULT_IMAGE_SIZE
,
batch_size
=
DEFAULT_BATCH_SIZE
,
map_fn
=
None
):
super
(
Simple3
,
self
).
__init__
(
name
=
'simple3'
,
classes
=
CLASS_NAMES
,
image_size
=
image_size
,
batch_size
=
batch_size
,
map_fn
=
map_fn
)
def
_load_dataset
(
self
,
image_size
,
batch_size
,
map_fn
):
ds
=
tf
.
keras
.
utils
.
image_dataset_from_directory
(
directory
=
'../datasets/simple3/'
,
labels
=
'inferred'
,
label_mode
=
'int'
,
batch_size
=
batch_size
,
image_size
=
image_size
,
interpolation
=
'nearest'
)
if
map_fn
is
not
None
:
ds
=
ds
.
map
(
map_fn
).
prefetch
(
tf
.
data
.
AUTOTUNE
)
return
ds
def
_split_dataset
(
self
,
ds
):
ds_size
=
ds
.
cardinality
().
numpy
()
train_ds
=
ds
.
take
(
ds_size
*
0.6
)
val_ds
=
ds
.
skip
(
ds_size
*
0.6
).
take
(
ds_size
*
0.2
)
test_ds
=
ds
.
skip
(
ds_size
*
0.6
).
skip
(
ds_size
*
0.2
)
return
train_ds
,
val_ds
,
test_ds
src/efficientnet.py
View file @
aa738388
import
sys
sys
.
path
.
append
(
".."
)
from
src.data.cifar10
import
load_dataset3
,
NUM_CLASSES
from
src.utils.embeddings
import
project_embeddings
,
calc_vectors
,
save_embeddings
from
src.utils.common
import
get_modeldir
,
get_datadir
from
src.model.efficientnet
import
EfficientNetModel
,
TARGET_SHAPE
,
BATCH_SIZE
from
src.data.imagenette
import
Imagenette
from
src.data.cifar10
import
Cifar10
from
src.utils.embeddings
import
project_embeddings
,
load_weights_of
,
get_embeddings_of
,
save_embeddings
from
src.model.siamese
import
SiameseModel
model_name
=
'cifar10_efficientnet'
embeddings_name
=
model_name
+
'_embeddings'
train_ds
,
val_ds
,
test_ds
=
load_dataset3
(
image_size
=
TARGET_SHAPE
,
batch_size
=
BATCH_SIZE
,
map_fn
=
EfficientNetModel
.
preprocess_input
)
comb_ds
=
train_ds
.
concatenate
(
val_ds
).
concatenate
(
test_ds
)
dataset
=
Imagenette
(
image_size
=
TARGET_SHAPE
,
batch_size
=
BATCH_SIZE
,
map_fn
=
EfficientNetModel
.
preprocess_input
)
# dataset = Cifar10(image_size=TARGET_SHAPE, batch_size=BATCH_SIZE, map_fn=EfficientNetModel.preprocess_input)
model
=
EfficientNetModel
()
model
.
summary
()
print
(
'calculating embeddings...'
)
emb_vectors
,
emb_labels
=
calc_vectors
(
comb_ds
,
model
)
save_embeddings
(
emb_vectors
,
emb_labels
,
embeddings_name
)
model
.
compile
()
load_weights_of
(
model
,
dataset
)
# emb_vectors, emb_labels = load_embeddings(embeddings_name)
emb_vectors
,
emb_labels
=
get_embeddings_of
(
model
,
dataset
)
emb_ds
=
SiameseModel
.
prepare_dataset
(
emb_vectors
,
emb_labels
)
# siamese is the model we train
siamese
=
SiameseModel
(
embedding_vector_dimension
=
1280
,
image_vector_dimensions
=
128
)
siamese
=
SiameseModel
(
embedding_model
=
model
,
image_vector_dimensions
=
512
)
siamese
.
compile
(
loss_margin
=
0.05
)
siamese
.
summary
()
ds
=
SiameseModel
.
prepare_dataset
(
emb_vectors
,
emb_labels
)
history
=
siamese
.
fit
(
ds
,
class_weight
=
{
0
:
1
/
NUM_CLASSES
,
1
:
(
NUM_CLASSES
-
1
)
/
NUM_CLASSES
})
# Build full inference model (from image to image vector):
inference_model
=
siamese
.
get_inference_model
(
model
)
inference_model
.
save
(
get_modeldir
(
model_name
+
'_inference.tf'
))
# inference_model = tf.keras.models.load_model(get_modeldir(model_name + '_inference.tf'), compile=False)
siamese
.
fit
(
emb_ds
,
num_classes
=
dataset
.
num_classes
)
pr
int
(
'visualization'
)
# compute vectors of the images and their labels, store them in a tsv file for visualization
project
ion_vectors
=
siamese
.
get_projection_model
().
predict
(
emb_vectors
)
pr
oject_embeddings
(
projection_vectors
,
emb_labels
,
model_name
+
'_siamese
'
)
pr
ojection_vectors
=
siamese
.
projection_model
.
predict
(
emb_vectors
)
save_embeddings
(
projection_vectors
,
emb_labels
,
dataset
.
name
+
'_'
+
siamese
.
name
+
'_vectors'
)
project
_embeddings
(
projection_vectors
,
emb_labels
,
siamese
.
name
+
'_'
+
dataset
.
name
)
pr
int
(
'Done!
'
)
src/embeddings.py
View file @
aa738388
import
sys
sys
.
path
.
append
(
".."
)
import
csv
from
src.utils.hsv
import
*
from
src.utils.sift
import
*
from
src.utils.embeddings
import
*
from
src.data.cifar10
import
*
from
src.data.cifar10
import
Cifar10
train_ds
,
test_ds
=
load_dataset
()
train_ds
,
test_ds
=
Cifar10
()
cifar10_vds
=
train_ds
.
concatenate
(
test_ds
)
...
...
@@ -41,11 +41,6 @@ def export_sift(nfeatures=8):
writer
.
writerow
([
i
,
label_str
,
value_str
])
def
export_embeddings
():
embeddings
,
embeddings_labels
=
load_embeddings
(
name
=
'cifar10_alexnet_embeddings'
)
export_embeddings
(
embeddings
,
embeddings_labels
,
'alexnet'
)
# HSV
# export_hsv(170, 171, 171) # 512
# export_hsv(340, 342, 342) # 1024
...
...
@@ -59,5 +54,4 @@ def export_embeddings():
# export_sift(32)
# Siamese Embeddings
export_embeddings
()
print
(
'done'
)
print
(
'Done!'
)
src/mobilenet.py
View file @
aa738388
...
...
@@ -2,59 +2,28 @@ import sys
sys
.
path
.
append
(
".."
)
import
tensorflow
as
tf
from
src.
data.imag
enet
te
import
load_dataset3
,
NUM_CLASSES
from
src.
utils.embeddings
import
save_embeddings
,
project_embeddings
,
calc_vectors
from
src.
utils.common
import
get_modeldir
from
src.
model.mobilenet
import
MobileNetModel
,
PRETRAIN_EPOCHS
,
TARGET_SHAPE
,
EMBEDDING_VECTOR_DIMENSION
from
src.
model.mobil
enet
import
MobileNetModel
,
PRETRAIN_EPOCHS
,
TARGET_SHAPE
from
src.
data.imagenette
import
Imagenette
from
src.
data.cifar10
import
Cifar10
from
src.
utils.embeddings
import
project_embeddings
,
load_weights_of
,
get_embeddings_of
,
save_embeddings
from
src.model.siamese
import
SiameseModel
model_name
=
'imagenette_mobilenet'
embeddings_name
=
model_name
+
'_embeddings'
train_ds
,
val_ds
,
test_ds
=
load_dataset3
(
image_size
=
TARGET_SHAPE
,
map_fn
=
MobileNetModel
.
preprocess_input
)
comb_ds
=
train_ds
.
concatenate
(
val_ds
).
concatenate
(
test_ds
)
PRETRAIN_TOTAL_STEPS
=
PRETRAIN_EPOCHS
*
len
(
train_ds
)
dataset
=
Imagenette
(
image_size
=
TARGET_SHAPE
,
map_fn
=
MobileNetModel
.
preprocess_input
)
# dataset = Cifar10(image_size=TARGET_SHAPE, map_fn=MobileNetModel.preprocess_input)
PRETRAIN_TOTAL_STEPS
=
PRETRAIN_EPOCHS
*
len
(
dataset
.
get_train
())
model
=
MobileNetModel
()
model
.
compile
(
optimizer
=
tf
.
keras
.
optimizers
.
RMSprop
(
tf
.
keras
.
optimizers
.
schedules
.
CosineDecay
(
1e-3
,
PRETRAIN_TOTAL_STEPS
)))
model
.
summary
()
# load weights
# model.load_weights(get_modeldir(model_name + '.h5'))
# train & save model
model
.
fit
(
train_ds
,
epochs
=
PRETRAIN_EPOCHS
,
validation_data
=
val_ds
)
model
.
save_weights
(
get_modeldir
(
model_name
+
'.h5'
))
# evaluate
print
(
'evaluating...'
)
model
.
evaluate
(
test_ds
)
print
(
'calculating embeddings...'
)
embedding_model
=
model
.
get_embedding_model
()
embedding_model
.
summary
()
load_weights_of
(
model
,
dataset
)
emb_vectors
,
emb_labels
=
calc_vectors
(
comb_ds
,
embedding_model
)
save_embeddings
(
emb_vectors
,
emb_labels
,
embeddings_name
)
emb_vectors
,
emb_labels
=
get_embeddings_of
(
model
.
get_
embedding_model
(),
dataset
)
emb_ds
=
SiameseModel
.
prepare_dataset
(
emb_vectors
,
emb_labels
)
# emb_vectors, emb_labels = load_embeddings(embeddings_name)
# siamese is the model we train
siamese
=
SiameseModel
(
embedding_vector_dimension
=
EMBEDDING_VECTOR_DIMENSION
,
image_vector_dimensions
=
3
)
siamese
=
SiameseModel
(
embedding_model
=
model
.
get_embedding_model
(),
image_vector_dimensions
=
512
)
siamese
.
compile
(
loss_margin
=
0.05
)
siamese
.
summary
()
ds
=
SiameseModel
.
prepare_dataset
(
emb_vectors
,
emb_labels
)
history
=
siamese
.
fit
(
ds
,
class_weight
=
{
0
:
1
/
NUM_CLASSES
,
1
:
(
NUM_CLASSES
-
1
)
/
NUM_CLASSES
})