Refactor discrete transformation module.

Renames the helper Class to TrinaryRepresentation. Does not encode redundant tokens in the last layer anymore.

Refactor discrete transformation module.
f70d8f40 · Gregor Kobsik · 3abbd8b2 · f70d8f40 · f70d8f40
Commit f70d8f40 authored 3 years ago by Gregor Kobsik
--- a/utils/__init__.py
+++ b/utils/__init__.py
 from utils.hsp_loader import load_hsp, load_chair, load_airplane
-from utils.kd_tree_utils import RepresentationTransformator, _directions
+from utils.kd_tree_utils import TrinaryRepresentation, _directions
 from utils.kd_tree import kdTree

 __all__ = [
@@ -7,6 +7,6 @@ __all__ = [
    "load_chair",
    "load_airplane",
    "_directions",
-    "RepresentationTransformator",
+    "TrinaryRepresentation",
    "kdTree",
 ]
--- a/utils/kd_tree_utils.py
+++ b/utils/kd_tree_utils.py
@@ -7,7 +7,7 @@ def _directions(spatial_dim):
    return np.array(list(itertools.product([-1, 1], repeat=spatial_dim)))


-class RepresentationTransformator():
+class TrinaryRepresentation():
    def __init__(self, spatial_dim=3):
        """ Provides a transformation wrapper between the iterative and successive sequence format.

@@ -36,7 +36,7 @@ class RepresentationTransformator():
        repr = [c - 1 for c in seq]
        return int("".join(map(str, repr)), base=3) + 1

-    def successive_to_iterative(self, value, depth, position):
+    def encode_trinary(self, value, depth, position):
        """ Transforms given successive sequence into an iterative sequence representation.

        Args:
@@ -45,36 +45,37 @@ class RepresentationTransformator():
            position: Numpy array holding the position token sequence with shape (S, spatial_dim).

        Return:
-            A tuple of (value, depth, position, target), where the last depth layer is encoded in target in a trinary
-            format representation.
+            A tuple of (value, depth, position), where the last layer is encoded in trinary representation.
        """
        max_depth = np.max(depth)
-        target = []
+        value_new = []
+        pos_new = []

        # extract the two last layers for processing
        last_layer = value[depth == max_depth]
        penultiumate_layer = value[depth == max_depth - 1]
+        penultimate_pos = position[depth == max_depth - 1]

        # iterate over the penultimate layer to encode the last layer
-        for token in penultiumate_layer:
-            if token == 0:  # padding token: 0
-                target = target + [0]
-            elif token == 1:  # token for all free: 1
-                target = target + [1]
-            elif token == 3:  # token for all full: 3^2^spatial_dim
-                target = target + [self.max_int_value_as_tri]
-            else:  # encode 8 leading tokens of last layer as one integer in trinary representation
-                target += [self.tri_to_dec(last_layer[:self.num_tokens])]
+        for i, token in enumerate(penultiumate_layer):
+            if token == 2:  # encode 8 leading tokens of last layer as one integer in trinary representation
+                value_new += [self.tri_to_dec(last_layer[:self.num_tokens])]
                last_layer = last_layer[self.num_tokens:]
+                pos_new += [penultimate_pos[i]]

        # discard the last layer, as we encoded it in 'target'
        value = value[depth != max_depth]
        position = position[depth != max_depth]
        depth = depth[depth != max_depth]

-        return value, depth, position, np.array(target)
+        # reconstruct last layer according to 'target'
+        value = np.concatenate([value, value_new])
+        position = np.concatenate([position, pos_new])
+        depth = np.concatenate([depth, len(value_new) * [max_depth]])

-    def successive_to_iterative_pytorch(self, value, depth, position):
+        return value, depth, position
+
+    def encode_trinary_pytorch(self, value, depth, position):
        """ Transforms given successive sequence into an iterative sequence representation. Provides a wrapper for
            pytorch tensors.

@@ -84,8 +85,7 @@ class RepresentationTransformator():
            position: Pytorch tensor holding the position token sequence with shape (S, spatial_dim).

        Return:
-            A tuple of (value, depth, position, target), where the last depth layer is encoded in target in a trinary
-            format representation.
+            A tuple of (value, depth, position), where the last layer is encoded in trinary representation.
        """
        device = value.device

@@ -93,58 +93,58 @@ class RepresentationTransformator():
        depth = depth.cpu().numpy()
        position = position.cpu().numpy()

-        value, depth, position, target = self.successive_to_iterative(value, depth, position)
+        value, depth, position = self.successive_to_iterative(value, depth, position)

        value = torch.tensor(value, dtype=torch.long, device=device)
        depth = torch.tensor(depth, dtype=torch.long, device=device)
        position = torch.tensor(position, dtype=torch.long, device=device)
-        target = torch.tensor(target, dtype=torch.long, device=device)

-        return value, depth, position, target
+        return value, depth, position

-    def iterative_to_successive(self, value, depth, position, target):
+    def decode_trianry(self, value, depth, position):
        """ Transforms given iterative sequence into an successive sequence representation.

        Args:
            value: Numpy array holding the value token sequence with shape (S), with token values in [0, 3].
            depth: Numpy array holding the depth token sequence with shape (S).
            position: Numpy array holding the position token sequence with shape (S, spatial_dim).
-            target: Numpy array holding the value token sequence of the next layer with the shape (S), with token
-                values in [0, 3**2**spatial_dim].

        Return:
-            A tuple of (value, depth, position), where the target sequence is encoded in the last depth layer of the
-            new sequence.
+            A tuple of (value, depth, position), where the last layer is decoded from trinary representation.
        """
        max_depth = np.max(depth)

-        next_layer_value = np.array([])
-        next_layer_depth = np.array([])
-        next_layer_pos = np.array([])
+        value_new = np.array([])
+        depth_new = np.array([])
+        pos_new = np.array([])

        # compute how much does the position per token change in the next layer
-        pos_step = position[0][0] // 2**(max_depth)  # assume same resolution for each dimension
+        pos_step = position[0][0] // 2**(max_depth - 1)  # assume same resolution for each dimension

        # retrive values and positions of last layer from the sequence
        last_layer_value = value[depth == max_depth]
        last_layer_pos = position[depth == max_depth]

        # parse the last layer and target sequence to decode next layer
-        for i, token in enumerate(last_layer_value):
-            if token == 2:
-                next_layer_value = np.concatenate([next_layer_value, self.dec_to_tri(target[i])])
+        for i in range(len(last_layer_value)):
+            value_new = np.concatenate([value_new, self.dec_to_tri(last_layer_value[i])])
+            depth_new = np.concatenate([depth_new, self.num_tokens * [max_depth]])
            n_pos = pos_step * self.dirs + last_layer_pos[i]
-                next_layer_pos = np.concatenate([next_layer_pos, n_pos]) if next_layer_pos.size != 0 else n_pos
-                next_layer_depth = np.concatenate([next_layer_depth, self.num_tokens * [max_depth + 1]])
+            pos_new = np.concatenate([pos_new, n_pos]) if pos_new.size != 0 else n_pos
+
+        # discard the last layer, as we encoded it
+        value = value[depth != max_depth]
+        position = position[depth != max_depth]
+        depth = depth[depth != max_depth]

        # concatenate sequences and return
-        value = np.concatenate([value, next_layer_value])
-        depth = np.concatenate([depth, next_layer_depth])
-        position = np.concatenate([position, next_layer_pos])
+        value = np.concatenate([value, value_new])
+        depth = np.concatenate([depth, depth_new])
+        position = np.concatenate([position, pos_new])

        return value, depth, position

-    def iterative_to_successive_pytorch(self, value, depth, position, target):
+    def decode_trinary_pytorch(self, value, depth, position):
        """ Transforms given iterative sequence into an successive sequence representation. Provides a wrapper for
            pytorch tensors.

@@ -152,21 +152,17 @@ class RepresentationTransformator():
            value: Pytorch tensor holding the value token sequence with shape (S), with token values in [0, 3].
            depth: Pytorch tensor holding the depth token sequence with shape (S).
            position: Pytorch tensor holding the position token sequence with shape (S, spatial_dim).
-            target: Pytorch tensor holding the value token sequence of the next layer with the shape (S), with token
-                values in [0, 3**2**spatial_dim].

        Return:
-            A tuple of (value, depth, position), where the target sequence is encoded in the last depth layer of the
-            new sequence.
+            A tuple of (value, depth, position), where the last layer is decoded from trinary representation.
        """
        device = value.device

        value = value.cpu().numpy()
        depth = depth.cpu().numpy()
        position = position.cpu().numpy()
-        target = target.cpu().numpy()

-        value, depth, position = self.iterative_to_successive(value, depth, position, target)
+        value, depth, position = self.iterative_to_successive(value, depth, position)

        value = torch.tensor(value, dtype=torch.long, device=device)
        depth = torch.tensor(depth, dtype=torch.long, device=device)