Skip to content

Commit

Permalink
resolved comments
Browse files Browse the repository at this point in the history
  • Loading branch information
ovysotska committed Nov 1, 2023
1 parent 3924270 commit d77abeb
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 15 deletions.
45 changes: 32 additions & 13 deletions src/python/bow/bow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import cv2
import argparse
from pathlib import Path
import matplotlib.pyplot as plt

from sklearn import preprocessing
from sklearn.cluster import KMeans
Expand All @@ -19,6 +18,14 @@ def listImagesInFolder(folderPath):


def rescaleImageIfNeeded(image):
"""Rescales the image to have a maximum kDefaultWidth and keeps the aspect ratio.
Args:
image (np.array): image
Returns:
np.array: rescaled or original image
"""
height, width = image.shape
if width > kDefaultWidth:
newHeight = (height * kDefaultWidth) / width
Expand All @@ -45,7 +52,7 @@ def extractSiftsFromImage(imageFile):


def computeIDF(descriptorsPerImage, clusters):
"""Compute inverse document frequence (IDF). Here means in how many images does the word occur.
"""Compute inverse document frequence (IDF). IDF in visual BoW context means in how many images does the word occur.
Args:
descriptorsByImages (list(list(1xD)): List of descriptors per image
Expand Down Expand Up @@ -79,6 +86,9 @@ def trainVocabulary(imageFiles, outputFile=""):
Args:
imageFiles (list(Path)): paths to images
Returns:
(np.array, np.array): A pair of values: CxD array of computed words and Cx1 inverse word occurance
"""
descriptorsPerImage = []
for imageFile in imageFiles:
Expand All @@ -87,7 +97,9 @@ def trainVocabulary(imageFiles, outputFile=""):

# flatten the descriptors list
descriptors = [
descriptor for descriptors in descriptorsPerImage for descriptor in descriptors
descriptor
for imageDescriptors in descriptorsPerImage
for descriptor in imageDescriptors
]
descriptors = np.array(descriptors)

Expand All @@ -98,9 +110,6 @@ def trainVocabulary(imageFiles, outputFile=""):

idfs = computeIDF(descriptorsPerImage, words)

plt.bar(range(0, len(idfs)), idfs)
plt.savefig("idf_" + str(kDefaultClusterSize) + ".png")

if outputFile:
np.savez(outputFile, vocabulary=words, idfs=idfs)
print("Vocabulary was saved to", outputFile)
Expand All @@ -112,13 +121,23 @@ def trainVocabularyFromFolder(folderPath, outputFile=""):


def getVocabulary(imageTrainFolder, vocabularyFile):
if vocabularyFile is not None:
"""Trains a vocabulary from images in imageTrainFolder or loads if the vocabulary exists under vocabularyFile
Args:
imageTrainFolder (Path): path to folder with images to be used for training
vocabularyFile (Path): a file with vocabulary. If file doesn't exists, the new vocabulary will be computed
Returns:
(np.array, np.array) | None: A pair of values: CxD array of computed words and Cx1 inverse word occurance,
or None if it was impossible to read or compute the vocabulary
"""
if vocabularyFile:
if vocabularyFile.exists():
print("Vocabulary exists and will be loaded")
data = np.load(vocabularyFile)
return data["vocabulary"], data["idfs"]
elif imageTrainFolder is None:
print("Vocabulary doesn't exits, please provide images to train on")
print("Vocabulary doesn't exits, please provide images to train on.")
return None
else:
return trainVocabularyFromFolder(imageTrainFolder, vocabularyFile)
Expand Down Expand Up @@ -184,7 +203,7 @@ def main():
help="Path to the image directory for which the histograms should be computed.",
)
parser.add_argument(
"--outputFile",
"--output_file",
required=False,
type=Path,
help="Filename where Bow features will be stored, .csv recommended.",
Expand All @@ -197,7 +216,7 @@ def main():
numberOfWords = vocabulary.shape[0]
vocabularyTree = KDTree(vocabulary)
if args.images:
if not args.outputFile:
if not args.output_file:
print(
"WARNING: The output file is not specified. The features will not be stored."
)
Expand All @@ -213,9 +232,9 @@ def main():
histograms.append(histogram)
print("Processing done")
histograms = np.array(histograms)
if args.outputFile:
np.savetxt(args.outputFile, histograms)
print("Features were saved to", args.outputFile)
if args.output_file:
np.savetxt(args.output_file, histograms)
print("Features were saved to", args.output_file)

return

Expand Down
3 changes: 1 addition & 2 deletions src/python/bow/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
numpy==1.24.4
opencv-python==4.8.1.78
scikit-learn==1.3.1
pytest==7.4.2
matplotlib==3.7.3
pytest==7.4.2

0 comments on commit d77abeb

Please sign in to comment.