Source code for sdofmv2.core.pca_analysis

import joblib
import numpy as np
from sklearn.decomposition import PCA
from .attention_map import patch_id_to_xy



[docs]
def mapping_dense_to_rgb(
    feature_map,
    visible_patch_ids,
    n_components,
    img_size,
    grid_size,
    patch_size,
    pretrained,
):
    """
    feature_map: torch.Tensor of shape [n_patches, dim]
    visible_patch_ids: masked token ids (which go to encoder network)
    n_components: output dimension after PCA
    img_size: input dimension
    grid_size: size of grid after patchfying
    patch_size: size of patch
    returns: RGB image (H, W, 3) in [0, 1]
    """
    # first check feature map dimension
    # assert feature_map.shape[0] == grid_size**2, \
    #     f"feature_map has {feature_map.shape[0]} patches, but expected {grid_size**2}"

    # Apply PCA to latent
    feature_map = feature_map.detach().cpu().numpy()

    if pretrained:
        pca = joblib.load(pretrained)
        X_pca = pca.transform(feature_map)
    else:
        pca = PCA(n_components=n_components)
        X_pca = pca.fit_transform(feature_map)  # (n_patches, 3)

    # Normalize each component to [0, 1]
    X_pca -= X_pca.min(axis=0, keepdims=True)
    X_pca /= X_pca.max(axis=0, keepdims=True) + 1e-8

    # convert flatten patches to height and width
    full_rgb = np.ones((img_size, img_size, n_components), dtype=np.float32) * 0.5
    for w, patch_id in zip(X_pca, visible_patch_ids):
        x, y = patch_id_to_xy(patch_id, patch_size, grid=grid_size)
        full_rgb[y : y + patch_size, x : x + patch_size] = w

    return full_rgb