do not do straight through nor rotation trick if input does not require grad, to make Genie2 cleaner

lucidrains · lucidrains · commit fa2211d63b7c · 2025-01-07T08:20:13.000-08:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "vector-quantize-pytorch"
-version = "1.21.0"
+version = "1.21.1"
 description = "Vector Quantization - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
diff --git a/tests/test_readme.py b/tests/test_readme.py
@@ -6,9 +6,11 @@ def exists(v):
 
 @pytest.mark.parametrize('use_cosine_sim', (True, False))
 @pytest.mark.parametrize('rotation_trick', (True, False))
+@pytest.mark.parametrize('input_requires_grad', (True, False))
 def test_vq(
     use_cosine_sim,
-    rotation_trick
+    rotation_trick,
+    input_requires_grad
 ):
     from vector_quantize_pytorch import VectorQuantize
 
@@ -22,6 +24,10 @@ def test_vq(
     )
 
     x = torch.randn(1, 1024, 256)
+
+    if input_requires_grad:
+        x.requires_grad_()
+
     quantized, indices, commit_loss = vq(x)
 
 def test_vq_eval():
diff --git a/vector_quantize_pytorch/vector_quantize_pytorch.py b/vector_quantize_pytorch/vector_quantize_pytorch.py
@@ -1023,7 +1023,7 @@ def forward(
         return_loss_breakdown = False,
         codebook_transform_fn: Callable | None = None
     ):
-        orig_input = x
+        orig_input, input_requires_grad = x, x.requires_grad
 
         # handle masking, either passed in as `mask` or `lens`
 
@@ -1117,11 +1117,14 @@ def forward(
 
             commit_quantize = maybe_detach(quantize)
 
-            if self.rotation_trick:
-                quantize = rotate_to(x, quantize)
-            else:
-                # standard STE to get gradients through VQ layer.
-                quantize = x + (quantize - x).detach()
+            # spare rotation trick calculation if inputs do not need gradients
+
+            if input_requires_grad:
+                if self.rotation_trick:
+                    quantize = rotate_to(x, quantize)
+                else:
+                    # standard STE to get gradients through VQ layer.
+                    quantize = x + (quantize - x).detach()
 
             if self.sync_update_v > 0.:
                 # (21) in https://minyoungg.github.io/vqtorch/assets/draft_050523.pdf