transformer.py

import numpy as np
import nibabel as nib
from scipy.io import loadmat, savemat
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

def load_nii(filename):
    data = nib.load(filename)
    data = data.get_fdata()
    data = np.array(data)
    return data

def save_nii(filename,data):
    # data = torch.squeeze(data)
    # data = data.detach().clone().cpu()
    data = nib.Nifti1Image(data,np.eye(4),dtype=np.uint8)
    nib.save(data,filename)

def load_rve_encoding(ve_parameter):
    gx1 = ve_parameter["gy1"]
    gy1 = ve_parameter["gx1"]
    gx2 = ve_parameter["gy2"]
    gy2 = ve_parameter["gx2"]
    gx1 = torch.tensor(gx1)
    gy1 = torch.tensor(gy1)
    gx2 = torch.tensor(gx2)
    gy2 = torch.tensor(gy2)

    return torch.cat([gx1, gy1, gx2, gy2], dim=1)


def alpha_func_pytorch(phase):
    """
    Returns calculated tagging efficiency for unipolar pcasl as a function of the phase rotation per TR.

    Parameters:
        phase (float): Phase rotation per TR in radians.

    Returns:
        a (float): Tagging efficiency for unipolar pcasl.
    """
    # Coefficients derived from FT of simulated response across v=5:5:40
    a = (-1 / 36 / 1.8239) * (75.33 * torch.cos(phase) - 11.6 * torch.cos(3 * phase) + 1.93 * torch.cos(5 * phase))

    return a


def compute_labeling_efficiencies_pytorch(pred, rve_encoding):
    """
        :params x  (ns)
        :params y  (ns)
        :params df (ns)
    """
    x = pred[:, 0]
    y = pred[:, 1]
    df = pred[:, 2]
    x = x[:,None]
    y = y[:,None]
    df = df[:,None]
    gx1 = rve_encoding[:, 0].to(x.device)
    gy1 = rve_encoding[:, 1].to(x.device)
    gx2 = rve_encoding[:, 2].to(x.device)
    gy2 = rve_encoding[:, 3].to(x.device)
    gx1 = gx1[None,:]
    gy1 = gy1[None,:]
    gx2 = gx2[None,:]
    gy2 = gy2[None,:]

    phase = ((x - gx1) * (gx2 - gx1) + (y - gy1) * (gy2 - gy1)) / ((gx1 - gx2) ** 2 + (gy1 - gy2) ** 2) * torch.pi + df
    labeling_efficiencies = alpha_func_pytorch(phase)

    return labeling_efficiencies

def one_hot_encode(labels, num_classes):
    """
    Converts class indices into one-hot encoded vectors efficiently.

    Parameters:
    - labels: Tensor of shape (batch_size,) containing class indices.
    - num_classes: Total number of classes.

    Returns:
    - one_hot: A sparse representation of one-hot encoded vectors.
    """
    batch_size = labels.shape[0]
    one_hot = torch.zeros((batch_size, num_classes), dtype=torch.float32, device=labels.device)
    one_hot.scatter_(1, labels.unsqueeze(1), 1)  # Efficiently set correct index to 1
    return one_hot

# Transformer
class TransformerClassifier(nn.Module):
    def __init__(self, input_dim, num_classes, d_model=64, num_heads=8, num_layers=6, ff_dim=256):
        super(TransformerClassifier, self).__init__()
        
        self.dictionary = load_nii("theoretical_efficiency.nii")
        self.dictionary = torch.LongTensor(self.dictionary).to(device)
        
        self.embedding = nn.Linear(1, d_model)
        self.positional_encoding = nn.Parameter(torch.zeros(1, input_dim, d_model))
        
        encoder_layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=num_heads, dim_feedforward=ff_dim)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        
        self.fc1 = nn.Linear(d_model, 512)
        self.fc2 = nn.Linear(512, 1024)
        self.fc3 = nn.Linear(1024, num_classes)
        
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = x.unsqueeze(-1)
        x = self.embedding(x) + self.positional_encoding  
        x = x.permute(1, 0, 2)  
        x = self.transformer_encoder(x)  
        x = x[-1, :, :]  
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        logits = self.fc3(x)
        return logits
    
    def compute_constraint_loss(self, logits, x):
        """
        Computes the constraint loss:
        1. Convert logits to probabilities.
        2. Find the predicted class index (highest probability).
        3. Use `constraint_function` to map the class index to 3-size y.
        4. Compute MSE loss with target y.
        """
        probabilities = self.softmax(logits)  # Convert logits to probabilities
        predicted_indices = torch.argmax(probabilities, dim=1)  # Get predicted class indices

        y_pred_3 = self.dictionary[predicted_indices].float()
        # y_pred_3 = y_pred_3[:,:22].to(device)
        # Compute the mean of each column
        mean1 = x.mean(dim=1, keepdim=True)
        mean2 = y_pred_3.mean(dim=1, keepdim=True)

        # Compute the standard deviation of each column
        std1 = x.std(dim=1, unbiased=False, keepdim=True)
        std2 = y_pred_3.std(dim=1, unbiased=False, keepdim=True)

        # Compute covariance
        covariance = ((x - mean1) * (y_pred_3 - mean2)).mean(dim=1)
        correlation = covariance / (std1.squeeze() * std2.squeeze())

        # Filter values greater than 0.6
        filtered_corr = correlation[correlation > 0.6]

        # Compute the average of the remaining correlations
        if len(filtered_corr) > 0:
            avg_correlation = filtered_corr.mean().item()
        else:
            avg_correlation = 0

        return 1-avg_correlation


# Loss functions
def supervised_loss(y_pred, y_true):
    return torch.mean((y_pred - y_true) ** 2)

# Define the plot function for 2D histograms
def plot_2d_histogram(x, y, xlabel, ylabel, title, bins=100):
    plt.figure(figsize=(8, 6))
    plt.hist2d(x, y, bins=bins, range=[[-64, 64], [-64, 64]] if xlabel != 'f' and ylabel != 'f' else [[-64, 64], [-2, 2]])
    plt.colorbar(label='Density')
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.show()


if __name__ == "__main__":

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    x_values = np.arange(64, -65, -2)  # From -64 to 64 with step 2
    y_values = np.arange(64, -65, -2)  # From -64 to 64 with step 2
    f_values = np.arange(-2, 2.1, 0.1)
    dictionary = []
    for f in f_values:  # Iterate over f
        for x in x_values:  # Iterate over x
            for y in y_values:  # Iterate over y
                dictionary.append([x, y, f])

    x = load_nii("dMs_vectors.nii")
    # Encoding Steps
    # x = x[:,:22]

    y = load_nii("indices_ccmax.nii")
    y = np.squeeze(y)

    y0 = []
    for i in range(49152):
        y0.append(dictionary[int(y[i])-1])
    y0 = np.array(y0)

    input_dim = 62
    num_classes = 173225
    batch_size = 1024  # Reduce for efficiency
    num_epochs = 200
    lambda_constraint = 0.4  # Weight for constraint loss
    model_save_path = "./model/Trans_50.pth"
    # Generate Fake Dataset
    X = torch.tensor(x).float().to(device)
    Y = torch.tensor(y).long().to(device)
    #
    # # Use DataLoader
    dataset = TensorDataset(X, Y)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Initialize Model & Optimizer
    model = TransformerClassifier(input_dim, num_classes).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    cross_entropy_loss = nn.CrossEntropyLoss()

    # Training Loop
    for epoch in range(num_epochs):
        total_loss = 0
        total_cls_loss = 0
        total_constraint_loss = 0

        for x_batch, y_batch in data_loader:
            optimizer.zero_grad()

            logits = model(x_batch)  # Forward pass
            cls_loss = cross_entropy_loss(logits, y_batch)  # Classification loss
            constraint_loss = model.compute_constraint_loss(logits, x_batch)  # Constraint loss

            # Total loss with balance factor
            loss = cls_loss + 0.5 * constraint_loss
            # loss = cls_loss
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            total_cls_loss += cls_loss.item()
            total_constraint_loss += constraint_loss

        avg_cls_loss = total_cls_loss / len(data_loader)
        avg_constraint_loss = total_loss / len(data_loader) - avg_cls_loss

        print(
            f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(data_loader)}, Cls Loss: {avg_cls_loss}, Constraint Loss: {avg_constraint_loss}")

    # Save trained model
    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved to {model_save_path}")

    # Load the trained model for inference

    model.load_state_dict(torch.load(model_save_path, map_location=device))
    model.eval()  # Set model to evaluation mode
    dictionary = torch.LongTensor(dictionary).to(device)

    # Define mini-batch size for evaluation (to prevent memory overflow)
    eval_batch_size = 512  # Adjust based on available memory

    # Create DataLoader for evaluation
    eval_dataset = TensorDataset(X)
    eval_loader = DataLoader(eval_dataset, batch_size=eval_batch_size, shuffle=False)

    predicted_indices_list = []
    predicted_indices_list2 = []

    # Get final predicted outputs on the entire dataset
    # Perform mini-batch inference
    with torch.no_grad():  # No gradient computation to save memory
        for batch in eval_loader:
            x_batch = batch[0].to(device)  # Move batch to GPU
            logits = model(x_batch)  # Forward pass

            # Apply softmax to get probabilities
            probabilities = torch.softmax(logits, dim=1)

            # Get the highest probability index (predicted class)
            predicted_indices = torch.argmax(probabilities, dim=1)

            y_predictions = dictionary[predicted_indices]
            predicted_indices_list.append(y_predictions.cpu())
            predicted_indices_list2.append(predicted_indices.cpu())

    # Concatenate all predictions
    final_predictions = torch.cat(predicted_indices_list).numpy()
    final_predictions2 = torch.cat(predicted_indices_list2).numpy()

    savemat("./results/trans_50.mat", {"distribution_XYF_DL": final_predictions2})

    # probabilities = nn.Softmax(predictions)  # Convert logits to probabilities
    # predicted_indices = torch.argmax(probabilities, dim=1)  # Get predicted class indices
    # y_predictions = dictionary[predicted_indices].numpy()
        # Extract x, y, f values
        
    x_pred_1, y_pred_1, f_pred_1 = final_predictions[:, 0], final_predictions[:, 1], final_predictions[:, 2]
    x_pred_2, y_pred_2, f_pred_2 = y0[:, 0], y0[:, 1], y0[:, 2]

    
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))

    # 第一组数据 (final_predictions)
    plot_2d_histogram(x_pred_1, y_pred_1, 'x', 'y', '2D Distribution of x and y (final)', axes[0, 0])
    plot_2d_histogram(x_pred_1, f_pred_1, 'x', 'f', '2D Distribution of x and f (final)', axes[0, 1])
    plot_2d_histogram(y_pred_1, f_pred_1, 'y', 'f', '2D Distribution of y and f (final)', axes[0, 2])
    
    # 第二组数据 (y0)
    plot_2d_histogram(x_pred_2, y_pred_2, 'x', 'y', '2D Distribution of x and y (y0)', axes[1, 0])
    plot_2d_histogram(x_pred_2, f_pred_2, 'x', 'f', '2D Distribution of x and f (y0)', axes[1, 1])
    plot_2d_histogram(y_pred_2, f_pred_2, 'y', 'f', '2D Distribution of y and f (y0)', axes[1, 2])
    
    plt.tight_layout()  # 调整布局
    plt.show()  # 确保显示