Skip to content

Postprocessor

CountVisualBagOfWords(n_clusters=8, init='k-means++', n_init=10, max_iter=300, tol=0.0001, random_state=None, copy_x=True, algorithm='auto', with_mean=True, with_std=True)

Bases: VisualBagOfWords

Class which implements the count weighting schema, which means that the final representation will contain counts of each visual word appearing from the codebook

Example

codebook = [[0.6, 1.7, 0.3], [0.2, 0.7, 1.8]]

repr = [[0.6, 1.7, 0.3], [0.6, 1.7, 0.3]]

output of weighting schema = [2, 0]

NOTE: a preliminary step is necessary, that is feature extraction. You should do that using one of the provided visual techniques and setting this as postprocessor for the output of that technique as follows:

import clayrs.content_analyzer as ca
ca.FieldConfig(ca.SkImageCannyEdgeDetector(), postprocessing=[ca.CountVisualBagOfWords()])

ADDITIONAL NOTE: the technique requires 2D arrays of features for each image, such as edges in the case of the Canny Edge detector. In case any other dimensionality is provided, a ValueError will be raised.

Arguments for SkLearn KMeans

Arguments for SkLearn StandardScaler

NOTE: for this technique it is mandatory for the parameter "with_std" to be set to True

Source code in clayrs/content_analyzer/information_processor/postprocessors/postprocessor.py
195
196
197
198
199
200
201
def __init__(self, n_clusters: Any = 8, init: Any = "k-means++", n_init: Any = 10, max_iter: Any = 300,
             tol: Any = 1e-4, random_state: Any = None, copy_x: Any = True, algorithm: Any = "auto",
             with_mean: bool = True, with_std: bool = True):
    super().__init__(n_clusters=n_clusters, init=init, n_init=n_init, max_iter=max_iter, tol=tol,
                     random_state=random_state, copy_x=copy_x, algorithm=algorithm, with_mean=with_mean,
                     with_std=with_std)
    self._repr_string = autorepr(self, inspect.currentframe())

apply_weights(sparse_matrix)

Apply a count wight schema to the representations obtained from the vector quantization step

PARAMETER DESCRIPTION
sparse_matrix

scipy sparse csr matrix containing the count of occurrences of each visual word

TYPE: scipy.sparse.csr_matrix

Source code in clayrs/content_analyzer/information_processor/postprocessors/postprocessor.py
203
204
205
206
207
208
209
210
def apply_weights(self, sparse_matrix: scipy.sparse.csr_matrix) -> scipy.sparse.csr_matrix:
    """
    Apply a count wight schema to the representations obtained from the vector quantization step

    Args:
        sparse_matrix: scipy sparse csr matrix containing the count of occurrences of each visual word
    """
    return sparse_matrix

TfIdfVisualBagOfWords(n_clusters=8, init='k-means++', n_init=10, max_iter=300, tol=0.0001, random_state=None, copy_x=True, algorithm='auto', with_mean=True, with_std=True, norm='l2', use_idf=True, smooth_idf=True, sublinear_tf=False)

Bases: VisualBagOfWords

Class which implements the tf-idf weighting schema, which means that the final representation will contain tf-idf scores of each visual word appearing from the codebook

Example

codebook = [[0.6, 1.7, 0.3], [0.2, 0.7, 1.8]]

repr1 = [[0.6, 1.7, 0.3], [0.6, 1.7, 0.3]]

repr2 = [[0.6, 1.7, 0.3], [0.2, 0.7, 1.8]]

output of weighting schema = [[2, 0], [1, 1.69]]

NOTE: a preliminary step is necessary, that is feature extraction. You should do that using one of the provided visual techniques and setting this as postprocessor for the output of that technique as follows:

import clayrs.content_analyzer as ca
ca.FieldConfig(ca.SkImageCannyEdgeDetector(), postprocessing=[ca.TfIdfVisualBagOfWords()])

ADDITIONAL NOTE: the technique requires 2D arrays of features for each image, such as edges in the case of the Canny Edge detector. In case any other dimensionality is provided, a ValueError will be raised.

Arguments for SkLearn KMeans

Arguments for SkLearn StandardScaler

Arguments for SkLearn TfIdf Transformer

NOTE: for this technique it is mandatory for the parameter "with_std" to be set to True

Source code in clayrs/content_analyzer/information_processor/postprocessors/postprocessor.py
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
def __init__(self, n_clusters: Any = 8, init: Any = "k-means++", n_init: Any = 10, max_iter: Any = 300,
             tol: Any = 1e-4, random_state: Any = None, copy_x: Any = True, algorithm: Any = "auto",
             with_mean: bool = True, with_std: bool = True,
             norm: Optional[str] = "l2", use_idf: bool = True,
             smooth_idf: bool = True, sublinear_tf: bool = False):
    super().__init__(n_clusters=n_clusters, init=init, n_init=n_init, max_iter=max_iter, tol=tol,
                     random_state=random_state, copy_x=copy_x, algorithm=algorithm, with_mean=with_mean,
                     with_std=with_std)

    self.tf_idf_params = {"norm": norm,
                          "use_idf": use_idf,
                          "smooth_idf": smooth_idf,
                          "sublinear_tf": sublinear_tf}

    self._repr_string = autorepr(self, inspect.currentframe())

apply_weights(sparse_matrix)

Apply a tf-idf weighting schema to the representations obtained from the vector quantization step

PARAMETER DESCRIPTION
sparse_matrix

scipy sparse csr matrix containing the count of occurrences of each visual word

TYPE: scipy.sparse.csr_matrix

Source code in clayrs/content_analyzer/information_processor/postprocessors/postprocessor.py
273
274
275
276
277
278
279
280
def apply_weights(self, sparse_matrix: scipy.sparse.csr_matrix) -> scipy.sparse.csr_matrix:
    """
    Apply a tf-idf weighting schema to the representations obtained from the vector quantization step

    Args:
        sparse_matrix: scipy sparse csr matrix containing the count of occurrences of each visual word
    """
    return TfidfTransformer(**self.tf_idf_params).fit_transform(sparse_matrix.todok())

ScipyVQ(n_clusters=8, init='k-means++', n_init=10, max_iter=300, tol=0.0001, random_state=None, copy_x=True, algorithm='auto', with_mean=True, with_std=True)

Bases: EmbeddingInputPostProcessor

Vector quantization using Scipy implementation and SkLearn KMeans. The idea behind this technique is to "approximate" feature vectors, using only a finite set of prototype vectors from a codebook. The codebook is computed using the SkLearn KMeans implementation. After that, for each feature in the representation, the closest one from the codebook is found using the Vector Quantization implementation from scipy and the retrieved vector is replaced to the original one in the final representation.

import clayrs.content_analyzer as ca
ca.FieldConfig(ca.SkImageCannyEdgeDetector(), postprocessing=[ca.ScipyVQ()])

Arguments for SkLearn KMeans

Arguments for SkLearn StandardScaler

NOTE: for this technique it is mandatory for the parameter "with_std" to be set to True

Source code in clayrs/content_analyzer/information_processor/postprocessors/postprocessor.py
310
311
312
313
314
315
316
317
def __init__(self, n_clusters: Any = 8, init: Any = "k-means++", n_init: Any = 10, max_iter: Any = 300,
             tol: Any = 1e-4, random_state: Any = None, copy_x: Any = True, algorithm: Any = "auto",
             with_mean: bool = True, with_std: bool = True):
    self.k_means = KMeans(n_clusters=n_clusters, init=init, n_init=n_init, max_iter=max_iter, tol=tol,
                          random_state=random_state, copy_x=copy_x, algorithm=algorithm)
    self.with_mean = with_mean
    self.with_std = with_std
    self._repr_string = autorepr(self, inspect.currentframe())

SkLearnPCA(n_components=None, copy=True, whiten=False, svd_solver='auto', tol=0.0, iterated_power='auto', random_state=None)

Bases: DimensionalityReduction

Dimensionality reduction using the PCA implementation from SkLearn

Usage example:

import clayrs.content_analyzer as ca
ca.FieldConfig(ca.SkImageCannyEdgeDetector(), postprocessing=[ca.SkLearnPCA()])

Arguments for SkLearn PCA

Source code in clayrs/content_analyzer/information_processor/postprocessors/postprocessor.py
445
446
447
448
449
450
def __init__(self, n_components=None, copy=True, whiten=False, svd_solver='auto', tol=0.0,
             iterated_power='auto', random_state=None):
    super().__init__()
    self.pca = PCA(n_components=n_components, copy=copy, whiten=whiten, svd_solver=svd_solver, tol=tol,
                   iterated_power=iterated_power, random_state=random_state)
    self._repr_string = autorepr(self, inspect.currentframe())

SkLearnGaussianRandomProjections(n_components='auto', eps=0.1, random_state=None)

Bases: DimensionalityReduction

Dimensionality reduction using the Gaussian Random Projections implementation from SkLearn

Usage example:

import clayrs.content_analyzer as ca
ca.FieldConfig(ca.SkImageCannyEdgeDetector(), postprocessing=[ca.SkLearnGaussianRandomProjections()])

Arguments for SkLearn Gaussian Random Projection

Source code in clayrs/content_analyzer/information_processor/postprocessors/postprocessor.py
476
477
478
479
def __init__(self, n_components='auto', eps=0.1, random_state=None):
    super().__init__()
    self.random_proj = GaussianRandomProjection(n_components=n_components, eps=eps, random_state=random_state)
    self._repr_string = autorepr(self, inspect.currentframe())

SkLearnFeatureAgglomeration(n_clusters=2, affinity='euclidean', memory=None, connectivity=None, compute_full_tree='auto', linkage='ward', pooling_func=np.mean, distance_threshold=None, compute_distances=False)

Bases: DimensionalityReduction

Dimensionality reduction using the Feature Agglomeration implementation from SkLearn

Usage example:

import clayrs.content_analyzer as ca
ca.FieldConfig(ca.SkImageCannyEdgeDetector(), postprocessing=[ca.SkLearnFeatureAgglomeration()])

Arguments for SkLearn Feature Agglomeration

Source code in clayrs/content_analyzer/information_processor/postprocessors/postprocessor.py
505
506
507
508
509
510
511
512
513
def __init__(self, n_clusters=2, affinity='euclidean', memory=None, connectivity=None, compute_full_tree='auto',
             linkage='ward', pooling_func=np.mean, distance_threshold=None, compute_distances=False):
    super().__init__()
    self.feature_agg = FeatureAgglomeration(n_clusters=n_clusters, affinity=affinity, memory=memory,
                                            connectivity=connectivity, compute_full_tree=compute_full_tree,
                                            linkage=linkage, pooling_func=pooling_func,
                                            distance_threshold=distance_threshold,
                                            compute_distances=compute_distances)
    self._repr_string = autorepr(self, inspect.currentframe())