addressing missing floor #200

lucidrains · lucidrains · commit 689802656eed · 2025-03-10T07:15:13.000-07:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "vector-quantize-pytorch"
-version = "1.21.9"
+version = "1.22.0"
 description = "Vector Quantization - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
diff --git a/vector_quantize_pytorch/finite_scalar_quantization.py b/vector_quantize_pytorch/finite_scalar_quantization.py
@@ -14,6 +14,7 @@
 from torch import Tensor, int32
 from torch.amp import autocast
 
+import einx
 from einops import rearrange, pack, unpack
 
 import random
@@ -45,11 +46,15 @@ def unpack_one(t, ps, pattern):
 
 # tensor helpers
 
-def round_ste(z: Tensor) -> Tensor:
+def round_ste(z):
     """Round with straight through gradients."""
     zhat = z.round()
     return z + (zhat - z).detach()
 
+def floor_ste(z):
+    zhat = z.floor()
+    return z + (zhat - z).detach()
+
 # main class
 
 class FSQ(Module):
@@ -127,41 +132,43 @@ def symmetry_preserving_bound(self, z):
         levels_minus_1 = (self._levels - 1)
         scale = 2.0 / levels_minus_1
         bracket = (levels_minus_1 * (torch.tanh(z) + 1) / 2.0) + 0.5
+        bracket = floor_ste(bracket)
         return scale * bracket - 1.0
 
-    def quantize(self, z, preserve_symmetry = False):
+    def quantize(self, z):
         """ Quantizes z, returns quantized zhat, same shape as z. """
 
+        preserve_symmetry = self.preserve_symmetry
         half_width = self._levels // 2
 
-        if self.training:
-            unquantized = z
-
-            # determine where to quantize elementwise
-
-            quantize_mask = torch.bernoulli(
-                torch.full([z.shape[0], 1, 1, 1], self.noise_dropout, device = z.device)
-            ).bool().expand_as(z)
-            
-            if preserve_symmetry:
-                quantized = round_ste(self.symmetry_preserving_bound(z)) / half_width
-            else:
-                quantized = round_ste(self.bound(z)) / half_width
-            quantized = torch.where(quantize_mask, unquantized, quantized)
-
-            # determine where to add a random offset elementwise
-
-            offset_mask = torch.bernoulli(
-                torch.full([z.shape[0], 1, 1, 1], self.noise_dropout, device = z.device)
-            ).bool().expand_as(z)
-            
-            offset = (torch.rand_like(z) - 0.5) / half_width
-            quantized = torch.where(offset_mask, unquantized + offset, quantized)
-        elif preserve_symmetry:
+        if preserve_symmetry:
             quantized = round_ste(self.symmetry_preserving_bound(z)) / half_width
         else:
             quantized = round_ste(self.bound(z)) / half_width
 
+        if not self.training:
+            return quantized
+
+        batch, device, noise_dropout = z.shape[0], z.device, self.noise_dropout
+        unquantized = z
+
+        # determine where to quantize elementwise
+
+        quantize_mask = torch.bernoulli(
+            torch.full((batch,), noise_dropout, device = device)
+        ).bool()
+
+        quantized = torch.where(quantize_mask, unquantized, quantized)
+
+        # determine where to add a random offset elementwise
+
+        offset_mask = torch.bernoulli(
+            torch.full((batch,), noise_dropout, device = device)
+        ).bool()
+
+        offset = (torch.rand_like(z) - 0.5) / half_width
+        quantized = einx.where('b, b ..., b ...', offset_mask, unquantized + offset, quantized)
+
         return quantized
 
     def _scale_and_shift(self, zhat_normalized):
@@ -242,7 +249,7 @@ def forward(self, z):
             if force_f32 and orig_dtype not in self.allowed_dtypes:
                 z = z.float()
 
-            codes = self.quantize(z, preserve_symmetry=self.preserve_symmetry)
+            codes = self.quantize(z)
 
             # returning indices could be optional