Source code for vissl.utils.instance_retrieval_utils.data_util

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import logging
import math
import os
import subprocess
from collections import OrderedDict

import numpy as np
import scipy.io
import torch
import torchvision.transforms.functional as TF
from fvcore.common.file_io import PathManager
from PIL import Image, ImageFile
from torch.nn import functional as F
from torchvision import transforms
from vissl.utils.instance_retrieval_utils.evaluate import (
    compute_map,
    score_ap_from_ranks_1,
)
from vissl.utils.io import load_file


[docs]def is_revisited_dataset(dataset_name: str): """ Computes whether the specified dataseet name is a revisited version of the oxford and paris datasets. simply looks for pattern "roxford5k" and "rparis6k" in specified dataset_name. """ if dataset_name in ["roxford5k", "rparis6k"]: return True return False
[docs]def is_instre_dataset(dataset_name: str): """ Returns True if the dataset name is "instre". Helper function used in code at several places. """ if dataset_name == "instre": return True return False
[docs]def is_whiten_dataset(dataset_name: str): """ Returns if the dataset specified has name "whitening". User can use any dataset they want for whitening. """ if dataset_name == "whitening": return True return False
# pooling + whitening # Credits: Matthijs Douze
[docs]def add_bias_channel(x, dim: int = 1): """ Adds a bias channel useful during pooling + whitening operation. """ bias_size = list(x.size()) bias_size[dim] = 1 one = x.new_ones(bias_size) return torch.cat((x, one), dim)
# Credits: Matthijs Douze
[docs]def flatten(x: torch.Tensor, keepdims: bool = False): """ Flattens B C H W input to B C*H*W output, optionally retains trailing dimensions. """ y = x.view(x.size(0), -1) if keepdims: for _ in range(y.dim(), x.dim()): y = y.unsqueeze(-1) return y
# Credits: Matthijs Douze
[docs]def gem( x: torch.Tensor, p: int = 3, eps: float = 1e-6, clamp: bool = True, add_bias: bool = False, keepdims: bool = False, ): """ Gem pooling on the given tensor. Args: x (torch.Tensor): tensor on which the pooling should be done p (int): pooling number. If p=inf then simply perform max_pool2d If p=1 and x tensor has grad, simply perform avg_pool2d else, perform Gem pooling for specified p eps (float): if clamping the x tensor, use the eps for clamping clamp (float): whether to clamp the tensor add_bias (bool): whether to add the biad channel keepdims (bool): whether to flatten or keep the dimensions as is Returns: x (torch.Tensor): Gem pooled tensor """ if p == math.inf or p == "inf": x = F.max_pool2d(x, (x.size(-2), x.size(-1))) elif p == 1 and not (torch.is_tensor(p) and p.requires_grad): x = F.avg_pool2d(x, (x.size(-2), x.size(-1))) else: if clamp: x = x.clamp(min=eps) x = F.avg_pool2d(x.pow(p), (x.size(-2), x.size(-1))).pow(1.0 / p) if add_bias: x = add_bias_channel(x) if not keepdims: x = flatten(x) return x
# Credits: Matthijs Douze
[docs]def l2n(x: torch.Tensor, eps: float = 1e-6, dim: int = 1): """ L2 normalize the input tensor along the specified dimension Args: x (torch.Tensor): the tensor to normalize eps (float): epsilon to use to normalize to avoid the inf output dim (int): along which dimension to L2 normalize Returns: x (torch.Tensor): L2 normalized tensor """ x = x / (torch.norm(x, p=2, dim=dim, keepdim=True) + eps).expand_as(x) return x
# Credits: Matthijs Douze
[docs]class MultigrainResize(transforms.Resize): """ Resize with a `largest=False` argument allowing to resize to a common largest side without cropping Approach used in the Multigrain paper https://arxiv.org/pdf/1902.05509.pdf """ def __init__(self, size: int, largest: bool = False, **kwargs): super().__init__(size, **kwargs) self.largest = largest
[docs] @staticmethod def target_size(w: int, h: int, size: int, largest: bool = False): if (h < w) == largest: w, h = size, int(size * h / w) else: w, h = int(size * w / h), size size = (h, w) return size
def __call__(self, img): size = self.size w, h = img.size target_size = self.target_size(w, h, size, self.largest) return TF.resize(img, target_size, self.interpolation) def __repr__(self): r = super().__repr__() return r[:-1] + f", largest={self.largest})"
# Credits: Matthijs Douze
[docs]class WhiteningTrainingImageDataset: """ A set of training images for whitening """ def __init__(self, base_dir: str, image_list_file: str, num_samples: int = 0): with PathManager.open(image_list_file) as fopen: self.image_list = fopen.readlines() if num_samples > 0: self.image_list = self.image_list[:num_samples] self.root = base_dir self.N_images = len(self.image_list) logging.info(f"Loaded whitening data: {self.N_images}...")
[docs] def get_num_images(self): return self.N_images
[docs] def get_filename(self, i: int): return f"{self.root}/{self.image_list[i][:-1]}"
[docs]class InstreDataset: """ A dataset class that reads and parses the Instre Dataset so it's ready to be used in the code for retrieval evaluations """ def __init__(self, dataset_path: str, num_samples: int = 0): self.base_dir = dataset_path gnd_instre = scipy.io.loadmat(f"{self.base_dir}/gnd_instre.mat") self.gnd = gnd_instre["gnd"][0] self.qimlist = [fname[0] for fname in gnd_instre["qimlist"][0]] self.db_imlist = [fname[0] for fname in gnd_instre["imlist"][0]] if num_samples > 0: self.qimlist = self.qimlist[:num_samples] self.db_imlist = self.db_imlist[:num_samples] self.N_images = len(self.db_imlist) self.N_queries = len(self.qimlist) rs = np.random.RandomState(123) nq = self.N_queries self.val_subset = set(rs.choice(nq, nq // 10)) logging.info( f"Loaded INSTRE dataset: {self.N_images}, queries: {self.N_queries}" )
[docs] def get_num_images(self): """ Number of images in the dataset """ return self.N_images
[docs] def get_num_query_images(self): """ Number of query images in the dataset """ return self.N_queries
[docs] def get_filename(self, i: int): """ Return the image filepath for the db image """ return f"{self.base_dir}/{self.db_imlist[i]}"
[docs] def get_query_filename(self, i: int): """ Reutrn the image filepath for the query image """ return f"{self.base_dir}/{self.qimlist[i]}"
[docs] def get_query_roi(self, i: int): """ INSTRE dataset has no notion of ROI so we return None. """ return None
[docs] def eval_from_ranks(self, ranks): """ Return the mean average precision value or the train and validation both provided the ranks (scores of the model). """ nq, nb = ranks.shape gnd = self.gnd sum_ap = 0 sum_ap_val = 0 for i in range(nq): positives = gnd[i][0][0] - 1 ok = np.zeros(nb, dtype=bool) ok[positives] = True pos = np.where(ok[ranks[i]])[0] ap = score_ap_from_ranks_1(pos, len(positives)) sum_ap += ap if i in self.val_subset: sum_ap_val += ap return sum_ap / nq, sum_ap_val / len(self.val_subset)
[docs] def score(self, scores, temp_dir, verbose=True): """ For the input scores of the model, calculate the AP metric """ ranks = scores.argsort(axis=1)[:, ::-1] mAP, mAP_val = self.eval_from_ranks(ranks) if verbose: logging.info(f"INSTRE mAP={mAP} val {mAP_val}") return mAP, mAP_val
[docs]class RevisitedInstanceRetrievalDataset: """ A dataset class used for the Revisited Instance retrieval datasets: Revisited Oxford and Revisited Paris. The object reads and parses the datasets so it's ready to be used in the code for retrieval evaluations. """ def __init__(self, dataset: str, dir_main: str): # Credits: https://github.com/filipradenovic/revisitop/blob/master/python/dataset.py#L6 # NOQA self.DATASETS = ["roxford5k", "rparis6k"] dataset = dataset.lower() assert is_revisited_dataset(dataset), f"Unknown dataset: {dataset}!" # loading imlist, qimlist, and gnd, in cfg as a dict gnd_fname = f"{dir_main}/{dataset}/gnd_{dataset}.pkl" cfg = load_file(gnd_fname) cfg["gnd_fname"] = gnd_fname cfg["ext"] = ".jpg" cfg["qext"] = ".jpg" cfg["dir_data"] = f"{dir_main}/{dataset}" cfg["dir_images"] = f"{cfg['dir_data']}/jpg" cfg["n"] = len(cfg["imlist"]) cfg["nq"] = len(cfg["qimlist"]) cfg["dataset"] = dataset self.cfg = cfg logging.info( f"Dataset: {dataset}, images: {self.get_num_images()}, " f"queries: {self.get_num_query_images()}" )
[docs] def get_filename(self, i: int): """ Return the image filepath for the db image """ return f"{self.cfg['dir_images']}/{self.cfg['imlist'][i] + self.cfg['ext']}"
[docs] def get_query_filename(self, i: int): """ Reutrn the image filepath for the query image """ return f"{self.cfg['dir_images']}/{self.cfg['qimlist'][i] + self.cfg['qext']}"
[docs] def get_num_images(self): """ Number of images in the dataset """ return self.cfg["n"]
[docs] def get_num_query_images(self): """ Number of query images in the dataset """ return self.cfg["nq"]
[docs] def get_query_roi(self, i: int): """ Get the ROI for the query image that we want to test retrieval """ return self.cfg["gnd"][i]["bbx"]
[docs] def score(self, sim, temp_dir: str): """ For the input similarity scores of the model, calculate the mean AP metric and mean Precision@k metrics. """ sim = sim.T # Credits: https://github.com/filipradenovic/revisitop/blob/master/python/example_evaluate.py # NOQA ranks = np.argsort(-sim, axis=0) # revisited evaluation gnd = self.cfg["gnd"] # evaluate ranks ks = [1, 5, 10] # search for easy gnd_t = [] for i in range(len(gnd)): g = {} g["ok"] = np.concatenate([gnd[i]["easy"]]) g["junk"] = np.concatenate([gnd[i]["junk"], gnd[i]["hard"]]) gnd_t.append(g) mapE, apsE, mprE, prsE = compute_map(ranks, gnd_t, ks) # search for easy & hard gnd_t = [] for i in range(len(gnd)): g = {} g["ok"] = np.concatenate([gnd[i]["easy"], gnd[i]["hard"]]) g["junk"] = np.concatenate([gnd[i]["junk"]]) gnd_t.append(g) mapM, apsM, mprM, prsM = compute_map(ranks, gnd_t, ks) # search for hard gnd_t = [] for i in range(len(gnd)): g = {} g["ok"] = np.concatenate([gnd[i]["hard"]]) g["junk"] = np.concatenate([gnd[i]["junk"], gnd[i]["easy"]]) gnd_t.append(g) mapH, apsH, mprH, prsH = compute_map(ranks, gnd_t, ks) logging.info( ">> {}: mAP E: {}, M: {}, H: {}".format( self.cfg["dataset"], np.around(mapE * 100, decimals=2), np.around(mapM * 100, decimals=2), np.around(mapH * 100, decimals=2), ) ) logging.info( ">> {}: mP@k{} E: {}, M: {}, H: {}".format( self.cfg["dataset"], np.array(ks), np.around(mprE * 100, decimals=2), np.around(mprM * 100, decimals=2), np.around(mprH * 100, decimals=2), ) )
# Credits: https://github.com/facebookresearch/deepcluster/blob/master/eval_retrieval.py # NOQA # Adapted by: Priya Goyal (prigoyal@fb.com)
[docs]class InstanceRetrievalImageLoader: """ The custom loader for the Paris and Oxford Instance Retrieval datasets. """ def __init__(self, S, transforms): self.S = S self.transforms = transforms
[docs] def apply_img_transform(self, im): """ Apply the pre-defined transforms on the image. """ im_size_hw = np.array((im.size[1], im.size[0])) if self.S == -1: ratio = 1.0 elif self.S == -2: if np.max(im_size_hw) > 124: ratio = 1024.0 / np.max(im_size_hw) else: ratio = -1 else: ratio = float(self.S) / np.max(im_size_hw) new_size = tuple(np.round(im_size_hw * ratio).astype(np.int32)) im_resized = self.transforms( im.resize((new_size[1], new_size[0]), Image.BILINEAR) ) return im_resized, ratio
[docs] def load_and_prepare_whitening_image(self, fname): """ from the filename, load the whitening image and prepare it to be used by applying data transforms """ with PathManager.open(fname, "rb") as f: im = Image.open(f) if im.mode != "RGB": im = im.convert(mode="RGB") if self.transforms is not None: im = self.transforms(im) return im
[docs] def load_and_prepare_instre_image(self, fname): """ from the filename, load the db or query image and prepare it to be used by applying data transforms """ with PathManager.open(fname, "rb") as f: im = Image.open(f) if self.transforms is not None: im = self.transforms(im) return im
[docs] def load_and_prepare_image(self, fname, roi=None): """ Read image, get aspect ratio, and resize such as the largest side equals S. If there is a roi, adapt the roi to the new size and crop. Do not rescale the image once again. ROI format is (xmin,ymin,xmax,ymax) """ # Read image, get aspect ratio, and resize such as the largest side equals S with PathManager.open(fname, "rb") as f: im = Image.open(f).convert(mode="RGB") im_resized, ratio = self.apply_img_transform(im) # If there is a roi, adapt the roi to the new size and crop. Do not rescale # the image once again if roi is not None: # ROI format is (xmin,ymin,xmax,ymax) roi = np.round(roi * ratio).astype(np.int32) im_resized = im_resized[:, roi[1] : roi[3], roi[0] : roi[2]] return im_resized
[docs] def load_and_prepare_revisited_image(self, img_path, roi=None): """ Load the image, crop the roi from the image if the roi is not None, apply the image transforms. """ # to avoid crashing for truncated (corrupted images) ImageFile.LOAD_TRUNCATED_IMAGES = True # open path as file to avoid ResourceWarning # (https://github.com/python-pillow/Pillow/issues/835) with PathManager.open(img_path, "rb") as f: img = Image.open(f).convert("RGB") if roi is not None: im_resized = img.crop(roi) im_resized, _ = self.apply_img_transform(img) return im_resized
[docs]class InstanceRetrievalDataset: """ A dataset class used for the Instance retrieval datasets: Oxford and Paris. The object reads and parses the datasets so it's ready to be used in the code for retrieval evaluations. Credits: https://github.com/facebookresearch/deepcluster/blob/master/eval_retrieval.py # NOQA Adapted by: Priya Goyal (prigoyal@fb.com) """ def __init__(self, path, eval_binary_path, num_samples=None): self.path = path self.eval_binary_path = eval_binary_path # Some images from the Paris dataset are corrupted. Standard practice is # to ignore them. # See: https://www.robots.ox.ac.uk/~vgg/data/parisbuildings/corrupt.txt self.blacklisted_images = [ "paris_louvre_000136", "paris_louvre_000146", "paris_moulinrouge_000422", "paris_museedorsay_001059", "paris_notredame_000188", "paris_pantheon_000284", "paris_pantheon_000960", "paris_pantheon_000974", "paris_pompidou_000195", "paris_pompidou_000196", "paris_pompidou_000201", "paris_pompidou_000467", "paris_pompidou_000640", "paris_sacrecoeur_000299", "paris_sacrecoeur_000330", "paris_sacrecoeur_000353", "paris_triomphe_000662", "paris_triomphe_000833", "paris_triomphe_000863", "paris_triomphe_000867", ] self.blacklisted = set(self.blacklisted_images) self.q_names = None self.q_index = None self.N_images = None self.N_queries = None self.q_roi = None self.load(num_samples=num_samples)
[docs] def get_num_images(self): """ Number of images in the dataset """ return self.N_images
[docs] def get_num_query_images(self): """ Number of query images in the dataset """ return self.N_queries
[docs] def load(self, num_samples=None): """ Load the data ground truth and parse the data so it's ready to be used. """ # Load the dataset GT self.lab_root = f"{self.path}/lab/" self.img_root = f"{self.path}/jpg/" logging.info(f"Loading data: {self.path}") lab_filenames = np.sort(os.listdir(self.lab_root)) # Get the filenames without the extension self.img_filenames = [ e[:-4] for e in np.sort(os.listdir(self.img_root)) if e[:-4] not in self.blacklisted ] # Parse the label files. Some challenges as filenames do not correspond # exactly to query names. Go through all the labels to: # i) map names to filenames and vice versa # ii) get the relevant regions of interest of the queries, # iii) get the indexes of the dataset images that are queries # iv) get the relevants / non-relevants list self.relevants = {} self.junk = {} self.non_relevants = {} self.filename_to_name = {} self.name_to_filename = OrderedDict() self.q_roi = {} for e in lab_filenames: if e.endswith("_query.txt"): q_name = e[: -len("_query.txt")] with PathManager.open(f"{self.lab_root}/{e}") as fopen: q_data = fopen.readline().split(" ") if q_data[0].startswith("oxc1_"): q_filename = q_data[0][5:] else: q_filename = q_data[0] self.filename_to_name[q_filename] = q_name self.name_to_filename[q_name] = q_filename with PathManager.open(f"{self.lab_root}/{q_name}_ok.txt") as fopen: good = {e.strip() for e in fopen} with PathManager.open(f"{self.lab_root}/{q_name}_good.txt") as fopen: good = good.union({e.strip() for e in fopen}) with PathManager.open(f"{self.lab_root}/{q_name}_junk.txt") as fopen: junk = {e.strip() for e in fopen} good_plus_junk = good.union(junk) self.relevants[q_name] = [ i for i in range(len(self.img_filenames)) if self.img_filenames[i] in good ] self.junk[q_name] = [ i for i in range(len(self.img_filenames)) if self.img_filenames[i] in junk ] self.non_relevants[q_name] = [ i for i in range(len(self.img_filenames)) if self.img_filenames[i] not in good_plus_junk ] self.q_roi[q_name] = np.array( [float(q) for q in q_data[1:]], dtype=np.float32 ) self.q_names = list(self.name_to_filename.keys()) self.q_index = np.array( [self.img_filenames.index(self.name_to_filename[qn]) for qn in self.q_names] ) self.N_images = len(self.img_filenames) self.N_queries = len(self.q_index) if num_samples is not None: self.N_queries = min(self.N_queries, num_samples) self.N_images = min(self.N_images, num_samples)
[docs] def score(self, sim, temp_dir): """ From the input similarity score, compute the mean average precision """ if not os.path.exists(temp_dir): os.makedirs(temp_dir) idx = np.argsort(sim, axis=1)[:, ::-1] maps = [ self.score_rnk_partial(i, idx[i], temp_dir) for i in range(self.N_queries) ] for i in range(self.N_queries): logging.info("{0}: {1:.2f}".format(self.q_names[i], 100 * maps[i])) logging.info(20 * "-") logging.info("Mean: {0:.2f}".format(100 * np.mean(maps)))
[docs] def score_rnk_partial(self, i, idx, temp_dir): """ Compute the mean AP for a given single query """ rnk = np.array(self.img_filenames[: self.N_images])[idx] with PathManager.open(f"{temp_dir}/{self.q_names[i]}.rnk", "w") as f: f.write("\n".join(rnk) + "\n") cmd = ( f"{self.eval_binary_path} {self.lab_root}{self.q_names[i]} " f"{temp_dir}/{self.q_names[i]}.rnk" ) p = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) map_ = float(p.stdout.readlines()[0]) p.wait() return map_
[docs] def get_filename(self, i): """ Return the image filepath for the db image """ return os.path.normpath( "{0}/{1}.jpg".format(self.img_root, self.img_filenames[i]) )
[docs] def get_query_filename(self, i): """ Reutrn the image filepath for the query image """ return os.path.normpath( f"{self.img_root}/{self.img_filenames[self.q_index[i]]}.jpg" )
[docs] def get_query_roi(self, i): """ Get the ROI for the query image that we want to test retrieval """ return self.q_roi[self.q_names[i]]