Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Oleh Astappiev
Near-Similar Image Recognition
Commits
bc971be1
Commit
bc971be1
authored
Feb 01, 2022
by
Oleh Astappiev
Browse files
feat: export all datasets
parent
f7082cdb
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
53 additions
and
63 deletions
+53
-63
src/data/cifar10.py
src/data/cifar10.py
+14
-1
src/data/embeddings.py
src/data/embeddings.py
+4
-17
src/export_dataset.py
src/export_dataset.py
+25
-25
src/images.py
src/images.py
+2
-6
src/siamese.py
src/siamese.py
+2
-1
src/siamese_visualize.py
src/siamese_visualize.py
+4
-10
src/utils/hsv.py
src/utils/hsv.py
+0
-1
src/utils/sift.py
src/utils/sift.py
+2
-2
No files found.
src/data/cifar10
_tuples
.py
→
src/data/cifar10.py
View file @
bc971be1
import
numpy
as
np
import
_pickle
as
pickle
import
matplotlib.pyplot
as
plt
from
src.utils.common
import
get_datadir
,
process_images
from
src.utils.common
import
get_datadir
,
process_images
,
process_images_couple
from
tensorflow.keras
import
datasets
from
tensorflow
import
data
import
tensorflow
as
tf
def
cifar10_complete
():
(
train_images
,
train_labels
),
(
test_images
,
test_labels
)
=
datasets
.
cifar10
.
load_data
()
images
=
np
.
concatenate
([
train_images
,
test_images
])
labels
=
np
.
concatenate
([
train_labels
,
test_labels
])
return
tf
.
data
.
Dataset
.
from_tensor_slices
((
images
,
labels
))
def
cifar10_complete_resized
():
ds
=
cifar10_complete
()
return
ds
.
map
(
process_images_couple
).
prefetch
(
buffer_size
=
tf
.
data
.
experimental
.
AUTOTUNE
)
def
shuffle_arrays
(
arrays
,
set_seed
=-
1
):
"""Shuffles arrays in-place, in the same order, along axis=0
...
...
src/data/embeddings.py
View file @
bc971be1
import
numpy
as
np
import
_pickle
as
pickle
from
keras
import
Model
import
tensorflow
as
tf
from
tensorflow.keras
import
datasets
from
src.utils.common
import
process_images_couple
,
get_datadir
from
src.data.cifar10
import
cifar10_complete_resized
from
src.utils.common
import
get_datadir
def
calc_embeddings
(
alexnet
):
# remove the last layer
embedding_model
=
Model
(
inputs
=
alexnet
.
input
,
outputs
=
alexnet
.
layers
[
-
2
].
output
)
(
train_images
,
train_labels
),
(
test_images
,
test_labels
)
=
datasets
.
cifar10
.
load_data
()
embedding_images
=
np
.
concatenate
([
train_images
,
test_images
])
embedding_labels
=
np
.
concatenate
([
train_labels
,
test_labels
])
embedding_vds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
embedding_images
,
embedding_labels
))
embedding_vds
=
(
embedding_vds
.
map
(
process_images_couple
).
batch
(
batch_size
=
32
,
drop_remainder
=
False
))
embedding_vds
=
cifar10_complete_resized
().
batch
(
batch_size
=
32
,
drop_remainder
=
False
)
print
(
'predicting embeddings'
)
embeddings
=
embedding_model
.
predict
(
embedding_vds
)
print
(
'done'
)
embedding_labels
=
np
.
concatenate
([
y
for
x
,
y
in
embedding_vds
],
axis
=
0
)
return
embeddings
,
embedding_labels
# # zip together embeddings and their labels, cache in memory (maybe not necessay or maybe faster this way), shuffle, repeat forever.
# embeddings_ds = tf.data.Dataset.zip((
# tf.data.Dataset.from_tensor_slices(embeddings),
# tf.data.Dataset.from_tensor_slices(embedding_labels)
# ))
def
save_embeddings
(
embeddings
,
labels
):
data
=
[
embeddings
,
labels
]
...
...
src/export_dataset.py
View file @
bc971be1
...
...
@@ -4,44 +4,35 @@ sys.path.append("..")
import
csv
from
src.utils.hsv
import
*
from
src.utils.sift
import
*
import
tensorflow
as
tf
from
utils.common
import
*
from
utils.distance
import
*
from
src.data.embeddings
import
*
from
src.model.alexnet
import
AlexNetModel
from
tensorflow.keras
import
layers
,
Model
,
models
,
datasets
from
src.data.cifar10
import
*
from
tensorflow.keras
import
models
# Load dataset
(
train_images
,
train_labels
),
(
test_images
,
test_labels
)
=
datasets
.
cifar10
.
load_data
()
cifar10_images
=
np
.
concatenate
([
train_images
,
test_images
])
cifar10_labels
=
np
.
concatenate
([
train_labels
,
test_labels
])
cifar10_vds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
cifar10_images
,
cifar10_labels
))
cifar10_vds
=
cifar10_complete_resized
()
def
export_hsv
(
bin0
=
256
,
bin1
=
256
,
bin2
=
256
):
header
=
[
'ID'
,
'Label'
,
'HSV vector'
]
with
open
(
'../data/hsv_'
+
str
(
features
)
+
'.csv'
,
'w'
,
encoding
=
'UTF8'
,
newline
=
''
)
as
f
:
with
open
(
'../data/hsv_'
+
str
(
bin0
)
+
'.csv'
,
'w'
,
encoding
=
'UTF8'
,
newline
=
''
)
as
f
:
writer
=
csv
.
writer
(
f
,
delimiter
=
";"
)
# write the header
writer
.
writerow
(
header
)
for
i
,
(
image
,
label
)
in
enumerate
(
cifar10_vds
):
img
=
process_images
(
image
).
numpy
()
a
,
b
,
c
,
hist_array
=
extract_hsv
(
img
,
bin0
,
bin1
,
bin2
)
a
,
b
,
c
,
hist_array
=
extract_hsv
(
image
.
numpy
(),
bin0
,
bin1
,
bin2
)
label_str
=
','
.
join
(
map
(
str
,
label
.
numpy
()))
value_str
=
','
.
join
(
map
(
str
,
hist_array
))
writer
.
writerow
([
i
,
label_str
,
value_str
])
def
export_sift
(
features
=
8
):
def
export_sift
(
n
features
=
8
):
header
=
[
'ID'
,
'Label'
,
'SIFT descriptors'
]
with
open
(
'../data/sift_'
+
str
(
features
)
+
'.csv'
,
'w'
,
encoding
=
'UTF8'
,
newline
=
''
)
as
f
:
with
open
(
'../data/sift_'
+
str
(
n
features
)
+
'.csv'
,
'w'
,
encoding
=
'UTF8'
,
newline
=
''
)
as
f
:
writer
=
csv
.
writer
(
f
,
delimiter
=
";"
)
# write the header
writer
.
writerow
(
header
)
for
i
,
(
image
,
label
)
in
enumerate
(
cifar10_vds
):
img
=
process_images
(
image
).
numpy
()
keypoints
,
features
=
extract_sift
(
img
,
features
)
keypoints
,
features
=
extract_sift
(
image
.
numpy
(),
nfeatures
)
label_str
=
','
.
join
(
map
(
str
,
label
.
numpy
()))
if
features
is
not
None
:
value_str
=
','
.
join
(
map
(
str
,
features
.
flatten
()))
...
...
@@ -58,21 +49,30 @@ def export_embeddings():
# write the header
writer
.
writerow
(
header
)
seamese
=
models
.
load_model
(
get_modeldir
(
'seamese_cifar10.tf'
))
embedding_vds
=
(
cifar10_vds
.
map
(
process_images_couple
).
batch
(
batch_size
=
32
,
drop_remainder
=
False
))
seamese
=
models
.
load_model
(
get_modeldir
(
'seamese_cifar10_512.tf'
))
embedding_vds
=
(
cifar10_vds
.
batch
(
batch_size
=
32
,
drop_remainder
=
False
))
print
(
'predicting embeddings'
)
embeddings
=
seamese
.
predict
(
embedding_vds
)
embeddings_labels
=
np
.
concatenate
([
y
for
x
,
y
in
embedding_vds
],
axis
=
0
)
print
(
'embeddings done'
)
for
i
,
(
label
)
in
enumerate
(
cifar10
_labels
):
for
i
,
(
label
)
in
enumerate
(
embeddings
_labels
):
label_str
=
','
.
join
(
map
(
str
,
label
))
value_str
=
','
.
join
(
map
(
str
,
embeddings
[
i
]))
writer
.
writerow
([
i
,
label_str
,
value_str
])
# hsv 170, 171, 171
# 512, 1024, 2048, 4096
# export_hsv()
# export_sift()
# HSV
# export_hsv(170, 171, 171) # 512
# export_hsv(340, 342, 342) # 1024
# export_hsv(682, 683, 683) # 2048
# export_hsv(1366, 1365, 1365) # 4096
# SIFT
# export_sift(4)
# export_sift(8)
# export_sift(16)
# export_sift(32)
# Siamese Embeddings
export_embeddings
()
print
(
'done'
)
src/images.py
View file @
bc971be1
import
sys
sys
.
path
.
append
(
".."
)
from
src.utils.hsv
import
*
from
src.utils.sift
import
*
from
src.data.cifar10
import
*
import
tensorflow
as
tf
from
tensorflow.keras
import
datasets
# Load dataset
(
train_images
,
train_labels
),
(
test_images
,
test_labels
)
=
datasets
.
cifar10
.
load_data
()
cifar10_images
=
np
.
concatenate
([
train_images
,
test_images
])
cifar10_labels
=
np
.
concatenate
([
train_labels
,
test_labels
])
cifar10_vds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
cifar10_images
,
cifar10_labels
))
cifar10_vds
=
cifar10_complete
()
def
print_resized
(
dataset
):
plt
.
figure
(
figsize
=
(
20
,
20
))
...
...
src/siamese.py
View file @
bc971be1
...
...
@@ -10,6 +10,7 @@ from tensorflow.keras import layers, Model
alexnet
=
AlexNetModel
()
alexnet
.
compile
()
alexnet
.
load_weights
(
get_modeldir
(
'alexnet_cifar10.h5'
))
alexnet
=
Model
(
inputs
=
alexnet
.
input
,
outputs
=
alexnet
.
layers
[
-
2
].
output
)
for
layer
in
alexnet
.
layers
:
layer
.
trainable
=
False
...
...
@@ -79,4 +80,4 @@ embedding = alexnet(im_input)
image_vector
=
projection_model
(
embedding
)
inference_model
=
Model
(
inputs
=
im_input
,
outputs
=
image_vector
)
inference_model
.
save
(
get_modeldir
(
'seamese_cifar10.tf'
),
save_format
=
'tf'
,
include_optimizer
=
False
)
inference_model
.
save
(
get_modeldir
(
'seamese_cifar10
_'
+
str
(
IMAGE_VECTOR_DIMENSIONS
)
+
'
.tf'
),
save_format
=
'tf'
,
include_optimizer
=
False
)
src/siamese_visualize.py
View file @
bc971be1
...
...
@@ -6,11 +6,9 @@ from tqdm import tqdm
sys
.
path
.
append
(
".."
)
from
utils.common
import
*
from
data.cifar10_tuples
import
*
from
utils.distance
import
*
from
src.data.cifar10
import
*
from
src.data.embeddings
import
*
from
src.model.alexnet
import
AlexNetModel
from
tensorflow.keras
import
layers
,
Model
from
tensorflow.keras
import
layers
def
write_embeddings_for_tensorboard
(
image_vectors
:
list
,
labels
:
list
,
root_dir
:
Path
):
import
csv
...
...
@@ -37,17 +35,13 @@ def write_embeddings_for_tensorboard(image_vectors: list, labels: list , root_di
embedding
.
tensor_path
=
'values.tsv'
projector
.
visualize_embeddings
(
root_dir
,
config
)
inference_model
=
tf
.
keras
.
models
.
load_model
(
get_modeldir
(
'seamese_cifar10.tf'
),
compile
=
False
)
inference_model
=
tf
.
keras
.
models
.
load_model
(
get_modeldir
(
'seamese_cifar10
_512
.tf'
),
compile
=
False
)
NUM_SAMPLES_TO_DISPLAY
=
10000
LOG_DIR
=
Path
(
'../logs'
)
LOG_DIR
.
mkdir
(
exist_ok
=
True
,
parents
=
True
)
(
train_images
,
train_labels
),
(
test_images
,
test_labels
)
=
datasets
.
cifar10
.
load_data
()
embedding_images
=
np
.
concatenate
([
train_images
,
test_images
])
embedding_labels
=
np
.
concatenate
([
train_labels
,
test_labels
])
embedding_vds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
embedding_images
,
embedding_labels
))
embedding_vds
=
cifar10_complete
()
val_ds
=
(
embedding_vds
.
shuffle
(
500
,
seed
=
42
)
.
take
(
NUM_SAMPLES_TO_DISPLAY
)
...
...
src/utils/hsv.py
View file @
bc971be1
...
...
@@ -9,7 +9,6 @@ def extract_hsv(image, bin0=256, bin1=256, bin2=256):
"""Extract a 3 color channels histogram from the HSV"""
hsv
=
cv2
.
cvtColor
(
image
,
cv2
.
COLOR_RGB2HSV
)
# The ranges of the 3 HSV channels in opencv are 0-180, 0-256, 0-256 respectively
# Bins is set to 1365, so that each picture can be represented by a 4000-dimensional vector
histh
=
cv2
.
calcHist
([
hsv
],
[
0
],
None
,
[
bin0
],
[
0
,
180
])
hists
=
cv2
.
calcHist
([
hsv
],
[
1
],
None
,
[
bin1
],
[
0
,
256
])
histv
=
cv2
.
calcHist
([
hsv
],
[
2
],
None
,
[
bin2
],
[
0
,
256
])
...
...
src/utils/sift.py
View file @
bc971be1
...
...
@@ -4,9 +4,9 @@ import cv2
from
src.utils.common
import
*
def
extract_sift
(
image
,
features
=
500
):
def
extract_sift
(
image
,
n
features
=
None
):
# the result number of features is the number of keypoints * 128
sift
=
cv2
.
SIFT_create
(
features
)
sift
=
cv2
.
SIFT_create
(
n
features
)
# Calculate the keypoint and each point description of the image
keypoints
,
features
=
sift
.
detectAndCompute
(
image
,
None
)
return
keypoints
,
features
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment