Source code for vissl.utils.instance_retrieval_utils.pca

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import logging

import numpy as np
import torch
from vissl.utils.io import load_file, save_file


# Credits: https://github.com/facebookresearch/deepcluster/blob/master/eval_retrieval.py    # NOQA
[docs]class PCA(object):
    """
    Fits and applies PCA whitening
    """

    def __init__(self, n_components):
        self.n_components = n_components

[docs]    def fit(self, X):
        mean = X.mean(axis=0)
        X -= mean
        self.mean = torch.from_numpy(mean).view(1, -1)
        Xcov = np.dot(X.T, X)
        d, V = np.linalg.eigh(Xcov)

        eps = d.max() * 1e-5
        n_0 = (d < eps).sum()
        if n_0 > 0:
            logging.info(f"{n_0} / {d.size} singular values are 0")
            d[d < eps] = eps
        totenergy = d.sum()
        if self.n_components > 0:
            # if we want to retain all components, n_components = -1
            idx = np.argsort(d)[::-1][: self.n_components]
        else:
            idx = np.argsort(d)[::-1]
        d = d[idx]
        V = V[:, idx]

        logging.info("keeping {} % of the energy".format((d.sum() / totenergy * 100.0)))

        D = np.diag(1.0 / np.sqrt(d))
        self.DVt = torch.from_numpy(np.dot(D, V.T))

[docs]    def to_cuda(self):
        self.mean = self.mean.cuda()
        self.DVt = self.DVt.cuda()

[docs]    def apply(self, X):
        logging.info("Applying PCA...")
        X = X - self.mean
        num = torch.mm(self.DVt, X.transpose(0, 1)).transpose(0, 1)
        return num


[docs]def load_pca(pca_out_fname):
    pca = load_file(pca_out_fname)
    return pca


[docs]def train_and_save_pca(features, n_pca, pca_out_fname):
    pca = PCA(n_pca)
    pca.fit(features)
    logging.info(f"Saving PCA features to: {pca_out_fname}")
    save_file(pca, pca_out_fname)
    logging.info(f"Saved PCA features to: {pca_out_fname}")
    return pca