Breaking Changes. Buggy commit.

d8cbe275 · Sören Toschek · 14f08471 · d8cbe275 · d8cbe275 · d8cbe275
Commit d8cbe275 authored 2 months ago by Sören Toschek
--- a/cascading_transformer_training.py
+++ b/cascading_transformer_training.py
@@ -34,6 +34,7 @@ if train_0:
        5e-4,
        500,
        False,
+        True,
        #load("./hierarchicalTransformer/checkpoints/high_res_lab_1_final.pt",weights_only=True),
        #load("./featureVector/checkpoints/high_res_lab_1_final.pt",weights_only=True),
        batch_size=4
@@ -45,12 +46,13 @@ if train_0:
 if train_1:
    train_layer(
        2,
-        "new_loss_part_2",
+        "new_loss_part_4",
        5e-4,
        100,
        True,
-        load("./hierarchicalTransformer/checkpoints/new_loss_final.pt",weights_only=True),
-        load("./featureVector/checkpoints/new_loss_final.pt",weights_only=True),
+        False,
+        load("./hierarchicalTransformer/checkpoints/new_loss_part_3_final.pt",weights_only=True),
+        load("./featureVector/checkpoints/new_loss_part_3_final.pt",weights_only=True),
        )
    
    

--- a/check.ipynb
+++ b/check.ipynb
@@ -38,14 +38,17 @@
   "metadata": {},
   "outputs": [
    {
-     "ename": "NameError",
-     "evalue": "name 'model' is not defined",
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: './hierarchicalTransformer/checkpoints/first_step_test_499.pt'",
     "output_type": "error",
     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[2], line 7\u001b[0m\n\u001b[1;32m      4\u001b[0m model_0\u001b[38;5;241m.\u001b[39mload_state_dict(torch\u001b[38;5;241m.\u001b[39mload(checkpoint_path,weights_only\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m))\n\u001b[1;32m      6\u001b[0m model_0\u001b[38;5;241m.\u001b[39mcuda()\n\u001b[0;32m----> 7\u001b[0m grids \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241m.\u001b[39msample(\u001b[38;5;241m4\u001b[39m)\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m4\u001b[39m):\n\u001b[1;32m     10\u001b[0m     plot \u001b[38;5;241m=\u001b[39m k3d\u001b[38;5;241m.\u001b[39mplot()\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'model' is not defined"
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[2], line 4\u001b[0m\n\u001b[0;32m      1\u001b[0m checkpoint_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./hierarchicalTransformer/checkpoints/first_step_test_499.pt\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m      3\u001b[0m model_0 \u001b[38;5;241m=\u001b[39m firstStepNetwork(\u001b[38;5;241m8\u001b[39m)\n\u001b[1;32m----> 4\u001b[0m model_0\u001b[38;5;241m.\u001b[39mload_state_dict(\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheckpoint_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43mweights_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m)\n\u001b[0;32m      6\u001b[0m model_0\u001b[38;5;241m.\u001b[39mcuda()\n\u001b[0;32m      7\u001b[0m grids \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39msample(\u001b[38;5;241m4\u001b[39m)\n",
+      "File \u001b[1;32md:\\VoxelTransformer\\.venv\\Lib\\site-packages\\torch\\serialization.py:791\u001b[0m, in \u001b[0;36mload\u001b[1;34m(f, map_location, pickle_module, weights_only, **pickle_load_args)\u001b[0m\n\u001b[0;32m    788\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m pickle_load_args\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[0;32m    789\u001b[0m     pickle_load_args[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m--> 791\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43m_open_file_like\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m opened_file:\n\u001b[0;32m    792\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m _is_zipfile(opened_file):\n\u001b[0;32m    793\u001b[0m         \u001b[38;5;66;03m# The zipfile reader is going to advance the current file position.\u001b[39;00m\n\u001b[0;32m    794\u001b[0m         \u001b[38;5;66;03m# If we want to actually tail call to torch.jit.load, we need to\u001b[39;00m\n\u001b[0;32m    795\u001b[0m         \u001b[38;5;66;03m# reset back to the original position.\u001b[39;00m\n\u001b[0;32m    796\u001b[0m         orig_position \u001b[38;5;241m=\u001b[39m opened_file\u001b[38;5;241m.\u001b[39mtell()\n",
+      "File \u001b[1;32md:\\VoxelTransformer\\.venv\\Lib\\site-packages\\torch\\serialization.py:271\u001b[0m, in \u001b[0;36m_open_file_like\u001b[1;34m(name_or_buffer, mode)\u001b[0m\n\u001b[0;32m    269\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_open_file_like\u001b[39m(name_or_buffer, mode):\n\u001b[0;32m    270\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m _is_path(name_or_buffer):\n\u001b[1;32m--> 271\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_open_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    272\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    273\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m mode:\n",
+      "File \u001b[1;32md:\\VoxelTransformer\\.venv\\Lib\\site-packages\\torch\\serialization.py:252\u001b[0m, in \u001b[0;36m_open_file.__init__\u001b[1;34m(self, name, mode)\u001b[0m\n\u001b[0;32m    251\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name, mode):\n\u001b[1;32m--> 252\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './hierarchicalTransformer/checkpoints/first_step_test_499.pt'"
     ]
    }
   ],
@@ -75,22 +78,101 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
-     "ename": "FileNotFoundError",
-     "evalue": "[Errno 2] No such file or directory: './featureVector/checkpoints/new_loss_2_final.pt'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[4], line 10\u001b[0m\n\u001b[1;32m      7\u001b[0m grid_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m8\u001b[39m\n\u001b[1;32m      9\u001b[0m featureVector \u001b[38;5;241m=\u001b[39m MasterFeatureVector(\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m---> 10\u001b[0m featureVector\u001b[38;5;241m.\u001b[39mload_state_dict(\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfeature_vector_ckpt_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43mweights_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m)\n\u001b[1;32m     12\u001b[0m \u001b[38;5;66;03m#Reminder: upsamplingVoxelTransformer resolution is somewhat independent of \"grid_size\", due to the chunking.\u001b[39;00m\n\u001b[1;32m     13\u001b[0m upsampling_network \u001b[38;5;241m=\u001b[39m UpsamplingVoxelTransformer(\u001b[38;5;241m8\u001b[39m,use_bce_loss\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
-      "File \u001b[0;32m~/Documents/VoxelTransformer/.venv/lib/python3.11/site-packages/torch/serialization.py:1319\u001b[0m, in \u001b[0;36mload\u001b[0;34m(f, map_location, pickle_module, weights_only, mmap, **pickle_load_args)\u001b[0m\n\u001b[1;32m   1316\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m pickle_load_args\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m   1317\u001b[0m     pickle_load_args[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mencoding\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1319\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43m_open_file_like\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m opened_file:\n\u001b[1;32m   1320\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m _is_zipfile(opened_file):\n\u001b[1;32m   1321\u001b[0m         \u001b[38;5;66;03m# The zipfile reader is going to advance the current file position.\u001b[39;00m\n\u001b[1;32m   1322\u001b[0m         \u001b[38;5;66;03m# If we want to actually tail call to torch.jit.load, we need to\u001b[39;00m\n\u001b[1;32m   1323\u001b[0m         \u001b[38;5;66;03m# reset back to the original position.\u001b[39;00m\n\u001b[1;32m   1324\u001b[0m         orig_position \u001b[38;5;241m=\u001b[39m opened_file\u001b[38;5;241m.\u001b[39mtell()\n",
-      "File \u001b[0;32m~/Documents/VoxelTransformer/.venv/lib/python3.11/site-packages/torch/serialization.py:659\u001b[0m, in \u001b[0;36m_open_file_like\u001b[0;34m(name_or_buffer, mode)\u001b[0m\n\u001b[1;32m    657\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_open_file_like\u001b[39m(name_or_buffer, mode):\n\u001b[1;32m    658\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m _is_path(name_or_buffer):\n\u001b[0;32m--> 659\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_open_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    660\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    661\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m mode:\n",
-      "File \u001b[0;32m~/Documents/VoxelTransformer/.venv/lib/python3.11/site-packages/torch/serialization.py:640\u001b[0m, in \u001b[0;36m_open_file.__init__\u001b[0;34m(self, name, mode)\u001b[0m\n\u001b[1;32m    639\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, name, mode):\n\u001b[0;32m--> 640\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m)\n",
-      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './featureVector/checkpoints/new_loss_2_final.pt'"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2000\n",
+      "torch.Size([16, 16, 16])\n",
+      "torch.Size([32, 32, 32])\n",
+      "torch.Size([32, 32, 32])\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c2cca96f515c4b4bb48cb9573a589333",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([16, 16, 16])\n",
+      "torch.Size([32, 32, 32])\n",
+      "torch.Size([32, 32, 32])\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "13d12c13f7024d5a96cd6c3bf5230743",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([16, 16, 16])\n",
+      "torch.Size([32, 32, 32])\n",
+      "torch.Size([32, 32, 32])\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "84782414439e4175884839b7aee85d06",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([16, 16, 16])\n",
+      "torch.Size([32, 32, 32])\n",
+      "torch.Size([32, 32, 32])\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7e6bdeffa5644b4bb01410dfe946224b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
    }
   ],
   "source": [
@@ -100,8 +182,8 @@
    "#debug voxel upscaling:\n",
    "\n",
    "\n",
-    "checkpoint_path = \"./hierarchicalTransformer/checkpoints/new_loss_part_2_final.pt\"\n",
-    "feature_vector_ckpt_path = \"./featureVector/checkpoints/new_loss_part_2_final.pt\"\n",
+    "checkpoint_path = \"./hierarchicalTransformer/checkpoints/new_loss_part_4_final.pt\"\n",
+    "feature_vector_ckpt_path = \"./featureVector/checkpoints/new_loss_part_4_final.pt\"\n",
    "\n",
    "grid_size = 8\n",
    "\n",
@@ -328,7 +410,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.2"
+   "version": "3.11.0rc2"
  }
 },
 "nbformat": 4,

 %% Cell type:markdown id: tags:

 A general script to visually various aspects of the voxel generation pipeline such as:
 1. Correct mapping &inverse mapping between binary voxel grids and token/codeword grids.
 2. Plausible upscaling of shapes.
 3. Classifier of voxel shapes.

 %% Cell type:code id: tags:

 ``` python
 from codewords import *
 from dataLoading import dataset
 import k3d
 import numpy
 from dotenv import load_dotenv

 from codewords import *
 from dataLoading import dataset
 import k3d
 import numpy
 from upsampling_transformer import *
 from constants import dim_embedding
 from feature_vector import MasterFeatureVector
 from first_step_transformer import firstStepNetwork
 ```

 %% Cell type:code id: tags:

 ``` python
 checkpoint_path = "./hierarchicalTransformer/checkpoints/first_step_test_499.pt"

 model_0 = firstStepNetwork(8)
 model_0.load_state_dict(torch.load(checkpoint_path,weights_only=True))

 model_0.cuda()
 grids = model.sample(4)

 for i in range(4):
    plot = k3d.plot()
    plot += k3d.voxels(numpy.ones([8,8,8]),opacity=0.01)
    plot += k3d.voxels(grids[i].cpu())
    plot.display()
 #Debug first step of the chain.
 ```

 %% Output

    ---------------------------------------------------------------------------
-    NameError                                 Traceback (most recent call last)
-Cell     In[2], line 7
-          4 model_0.load_state_dict(torch.load(checkpoint_path,weights_only=True))
+    FileNotFoundError                         Traceback (most recent call last)
+Cell     In[2], line 4
+          1 checkpoint_path = "./hierarchicalTransformer/checkpoints/first_step_test_499.pt"
+          3 model_0 = firstStepNetwork(8)
+    ----> 4 model_0.load_state_dict(torch.load(checkpoint_path,weights_only=True))
          6 model_0.cuda()
-    ----> 7 grids = model.sample(4)
-          9 for i in range(4):
-         10     plot = k3d.plot()
-    NameError: name 'model' is not defined
+          7 grids = model.sample(4)
+File     d:\VoxelTransformer\.venv\Lib\site-packages\torch\serialization.py:791, in load(f, map_location, pickle_module, weights_only, **pickle_load_args)
+        788 if 'encoding' not in pickle_load_args.keys():
+        789     pickle_load_args['encoding'] = 'utf-8'
+    --> 791 with _open_file_like(f, 'rb') as opened_file:
+        792     if _is_zipfile(opened_file):
+        793         # The zipfile reader is going to advance the current file position.
+        794         # If we want to actually tail call to torch.jit.load, we need to
+        795         # reset back to the original position.
+        796         orig_position = opened_file.tell()
+File     d:\VoxelTransformer\.venv\Lib\site-packages\torch\serialization.py:271, in _open_file_like(name_or_buffer, mode)
+        269 def _open_file_like(name_or_buffer, mode):
+        270     if _is_path(name_or_buffer):
+    --> 271         return _open_file(name_or_buffer, mode)
+        272     else:
+        273         if 'w' in mode:
+File     d:\VoxelTransformer\.venv\Lib\site-packages\torch\serialization.py:252, in _open_file.__init__(self, name, mode)
+        251 def __init__(self, name, mode):
+    --> 252     super().__init__(open(name, mode))
+    FileNotFoundError: [Errno 2] No such file or directory: './hierarchicalTransformer/checkpoints/first_step_test_499.pt'

 %% Cell type:markdown id: tags:

 Fazit: sieht gut aus.

 %% Cell type:code id: tags:

 ``` python



 #debug voxel upscaling:


-checkpoint_path = "./hierarchicalTransformer/checkpoints/new_loss_part_2_final.pt"
-feature_vector_ckpt_path = "./featureVector/checkpoints/new_loss_part_2_final.pt"
+checkpoint_path = "./hierarchicalTransformer/checkpoints/new_loss_part_4_final.pt"
+feature_vector_ckpt_path = "./featureVector/checkpoints/new_loss_part_4_final.pt"

 grid_size = 8

 featureVector = MasterFeatureVector(1)
 featureVector.load_state_dict(torch.load(feature_vector_ckpt_path,weights_only=True))

 #Reminder: upsamplingVoxelTransformer resolution is somewhat independent of "grid_size", due to the chunking.
 upsampling_network = UpsamplingVoxelTransformer(8,use_bce_loss=True)
 upsampling_network.load_state_dict(torch.load(checkpoint_path,weights_only=True))

 model = UpsampleShape(
    upsampling_network,
    featureVector,
    1
    )

 n_samples = 4

 print(len(dataset))
 random_indices = torch.randint(0,50,[n_samples])


 batch = dataset[list(random_indices)]



 model.cuda()



 low_res = DownScaling(2)

 low_res_grids = low_res.forward(batch,flatten=False)

 result = model.sample(low_res_grids.float().cuda())



 numpy_voxels = result[0].detach()

 grid_size_low = 16
 high_res = 2*grid_size_low


 upsampler = nn.Upsample(scale_factor = 2)


 for i in range(n_samples):
    stackable_low_res = upsampler.forward(low_res_grids[i,:,:,:].cpu().float().unsqueeze(0).unsqueeze(0)).squeeze(0).squeeze(0)
    print(low_res_grids[i,:,:,:].shape)
    print(result[i,:,:,:].shape)
    print(stackable_low_res.shape)

    combined = torch.cat((stackable_low_res,result[i,:,:,:].cpu(),batch[i].cpu()),dim=2)

    plot = k3d.plot()
    plot += k3d.voxels(numpy.ones([grid_size_low,grid_size_low,grid_size_low]),opacity=0.01)
    plot += k3d.voxels(combined.cpu())

    plot.display()

    """
    #low-res plot.
    plot = k3d.plot()
    plot += k3d.voxels(numpy.ones([grid_size_low,grid_size_low,grid_size_low]),opacity=0.01)
    plot += k3d.voxels(low_res_grids[i,:,:,:].cpu())

    plot.display()


    plot = k3d.plot()
    plot += k3d.voxels(numpy.ones([high_res,high_res,high_res]),opacity=0.01)
    plot += k3d.voxels(result[i,:,:,:].cpu())
    #plot += grid_filler
    plot.display()

    #ground truth:

    plot = k3d.plot()
    plot += k3d.voxels(numpy.ones([high_res,high_res,high_res]),opacity=0.01)
    plot += k3d.voxels(batch[i].cpu())
    plot.display()
    """
 ```

 %% Output

-    ---------------------------------------------------------------------------
-    FileNotFoundError                         Traceback (most recent call last)
-Cell     In[4], line 10
-          7 grid_size = 8
-          9 featureVector = MasterFeatureVector(1)
-    ---> 10 featureVector.load_state_dict(torch.load(feature_vector_ckpt_path,weights_only=True))
-         12 #Reminder: upsamplingVoxelTransformer resolution is somewhat independent of "grid_size", due to the chunking.
-         13 upsampling_network = UpsamplingVoxelTransformer(8,use_bce_loss=True)
-File     ~/Documents/VoxelTransformer/.venv/lib/python3.11/site-packages/torch/serialization.py:1319, in load(f, map_location, pickle_module, weights_only, mmap, **pickle_load_args)
-       1316 if "encoding" not in pickle_load_args.keys():
-       1317     pickle_load_args["encoding"] = "utf-8"
-    -> 1319 with _open_file_like(f, "rb") as opened_file:
-       1320     if _is_zipfile(opened_file):
-       1321         # The zipfile reader is going to advance the current file position.
-       1322         # If we want to actually tail call to torch.jit.load, we need to
-       1323         # reset back to the original position.
-       1324         orig_position = opened_file.tell()
-File     ~/Documents/VoxelTransformer/.venv/lib/python3.11/site-packages/torch/serialization.py:659, in _open_file_like(name_or_buffer, mode)
-        657 def _open_file_like(name_or_buffer, mode):
-        658     if _is_path(name_or_buffer):
-    --> 659         return _open_file(name_or_buffer, mode)
-        660     else:
-        661         if "w" in mode:
-File     ~/Documents/VoxelTransformer/.venv/lib/python3.11/site-packages/torch/serialization.py:640, in _open_file.__init__(self, name, mode)
-        639 def __init__(self, name, mode):
-    --> 640     super().__init__(open(name, mode))
-    FileNotFoundError: [Errno 2] No such file or directory: './featureVector/checkpoints/new_loss_2_final.pt'
+    2000
+    torch.Size([16, 16, 16])
+    torch.Size([32, 32, 32])
+    torch.Size([32, 32, 32])
+
+
+    torch.Size([16, 16, 16])
+    torch.Size([32, 32, 32])
+    torch.Size([32, 32, 32])
+
+
+    torch.Size([16, 16, 16])
+    torch.Size([32, 32, 32])
+    torch.Size([32, 32, 32])
+
+
+    torch.Size([16, 16, 16])
+    torch.Size([32, 32, 32])
+    torch.Size([32, 32, 32])
+

 %% Cell type:code id: tags:

 ``` python
 ```

 %% Cell type:markdown id: tags:

 Here we compose multiple upsampling steps and check the result.

 %% Cell type:code id: tags:

 ``` python
 checkpoint_path_0 = "./hierarchicalTransformer/checkpoints/first_step_test_499.pt"

 checkpoint_path_1 = "./hierarchicalTransformer/checkpoints/lab_5_final.pt"
 feature_vector_ckpt_path_1 = "./featureVector/checkpoints/lab_5_final.pt"

 checkpoint_path_2 = "./hierarchicalTransformer/checkpoints/long_training_test_final.pt"
 feature_vector_ckpt_path_2 = "./featureVector/checkpoints/long_training_test_final.pt"

 grid_size_start = 8
 #we upscale from 8³ to 16³, and then from 16³ to 32³




 model_0 = firstStepNetwork(8)
 model_0.load_state_dict(torch.load(checkpoint_path_0,weights_only=True))

 model_0.cuda()


 featureVector_1 = MasterFeatureVector(1)
 featureVector_1.load_state_dict(torch.load(feature_vector_ckpt_path_1,weights_only=True))

 #Reminder: upsamplingVoxelTransformer resolution is somewhat independent of "grid_size", due to the chunking.
 upsampling_network_1 = UpsamplingVoxelTransformer(8)
 upsampling_network_1.load_state_dict(torch.load(checkpoint_path_1,weights_only=True))



 featureVector_2 = MasterFeatureVector(2)
 featureVector_2.load_state_dict(torch.load(feature_vector_ckpt_path_2,weights_only=True))

 #Reminder: upsamplingVoxelTransformer resolution is somewhat independent of "grid_size", due to the chunking.
 upsampling_network_2 = UpsamplingVoxelTransformer(8)
 upsampling_network_2.load_state_dict(torch.load(checkpoint_path_2,weights_only=True))

 model_1 = UpsampleShape(
    upsampling_network_1,
    featureVector_1,
    2
    )

 model_2 = UpsampleShape(
    upsampling_network_2,
    featureVector_2,
    1
    )

 n_samples = 4

 print(len(dataset))
 random_indices = torch.randint(0,50,[n_samples])


 batch = dataset[list(random_indices)]


 result_0 = model_0.sample(n_samples)


 model_1.cuda()
 result_1 = model_1.sample(low_res_grids.float().cuda())




 model_2.cuda()
 result_2 = model_2.sample(result_1.float().cuda())



 numpy_voxels = result_2[0].detach()

 grid_size_low = 8
 final_res = 4*grid_size_low

 #plot low, intermediate, and high/final resolution



 upsampler_0 = nn.Upsample(scale_factor = 4)
 upsampler_1 = nn.Upsample(scale_factor = 2)

 for i in range(n_samples):
    level_0 = upsampler_0.forward(result_0[i,:,:,:].cpu().float().unsqueeze(0).unsqueeze(0)).squeeze(0).squeeze(0)
    level_1 = upsampler_1.forward(result_1[i,:,:,:].cpu().float().unsqueeze(0).unsqueeze(0)).squeeze(0).squeeze(0)
    level_2 = result_2[i,:,:,:].cpu()


    combined = torch.cat((level_0,level_1,level_2,torch.zeros((final_res,final_res,final_res))),dim=2)


    plot = k3d.plot()
    plot += k3d.voxels(numpy.ones([grid_size_low,grid_size_low,grid_size_low]),opacity=0.01)

    plot += k3d.voxels(combined.cpu())
    60
    0
    1.5


    plot.display()
 ```

--- a/codewords.py
+++ b/codewords.py
@@ -6,21 +6,19 @@ from dataLoading import dataset


 def chunk_index_tensor():
-    return torch.unflatten(2**torch.arange(0,8),0,(2,2,2))
+    return (2**torch.arange(0,8))
+

 """
-Non-learnable, non-differentiable Model to generate upscaling target grids. 
-In the target voxel grid, each voxel is assigned a token 0,...,255.
-This will downscale the input grid by a factor of 2.
+If we want to use the BCE loss, 
+we will have to arrange the grid in this shape (*, grid_size,grid_size,grid_size,8)
+Instead of (*, grid_size,grid_size,grid_size)
 """
-class GenerateTokenGrid(nn.Module):
+class GenerateBinaryTokenGrid(nn.Module):
+
    def __init__(self):
        super().__init__()

-        index_tensor = chunk_index_tensor()
-        self.chunk_index_tensor = index_tensor.unsqueeze(0).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
-        #now it has shape (1,2,2,2,1,1,1)
-        
        
    def forward(self,x):
        grid_size = x.shape[-1]
@@ -32,54 +30,56 @@ class GenerateTokenGrid(nn.Module):
        chunks = torch.cat(torch.chunk(chunks,  factor,-6),-3)
        chunks = torch.cat(torch.chunk(chunks,  factor,-5),-2)
        chunks = torch.cat(torch.chunk(chunks,  factor,-4),-1)
-        #now the dimensions -6,-5,-4 are the dimensions inside each chunk.
-        weighted_chunks = chunks * self.chunk_index_tensor
-        #now we just have to sum over the dimensions -6,-5,-4.
-        res =  weighted_chunks.sum(dim=(-6,-5,-4))
-        
        
-        return res
-        #each coordinate inside a chunk must be assigned a fixed power of 2.
-        #we will broadcast a tensor over the chunks.
        
    
-        #the input can be mapped to the target, using strided convolutions.
-        #stride =2,  ->we have 256 filters, representing ways to fill a 2x2x2 grid.
-
+        res = chunks.flatten(-6,-4)
+        res = res.permute((0,2,3,4,1))
        
+        return res
+        #instead of summing over the chunks we will now just flatten the chunk dimensions.




 """
-If we want to use the BCE loss, 
-we will have to arrange the grid in this shape (*, grid_size,grid_size,grid_size,8)
-Instead of (*, grid_size,grid_size,grid_size)
+Non-learnable, non-differentiable Model to generate upscaling target grids. 
+In the target voxel grid, each voxel is assigned a token 0,...,255.
+This will downscale the input grid by a factor of 2.
 """
-class GenerateBinaryTokenGrid(GenerateTokenGrid):
-
+class GenerateTokenGrid(nn.Module):
    def __init__(self):
        super().__init__()

+        index_tensor = chunk_index_tensor()
+        self.chunk_index_tensor = index_tensor.unsqueeze(0).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
+        #now it has shape (1,2,2,2,1,1,1)
+        
+        self.model = GenerateBinaryTokenGrid()
        
    def forward(self,x):
-        grid_size = x.shape[-1]
+        flattened_chunks = self.model.forward(x)
+        #now the dimensions -6,-5,-4 are the dimensions inside each chunk.
+        weighted_chunks = flattened_chunks * (2**torch.arange(0,8)).unsqueeze(0).unsqueeze(0).unsqueeze(0).unsqueeze(0)
+        #now we just have to sum over the dimensions -6,-5,-4.
+        res =  weighted_chunks.sum(dim=(-6,-5,-4))
+        
+        
+        return res
+        #each coordinate inside a chunk must be assigned a fixed power of 2.
+        #we will broadcast a tensor over the chunks.
+        
+        
+        #the input can be mapped to the target, using strided convolutions.
+        #stride =2,  ->we have 256 filters, representing ways to fill a 2x2x2 grid.
+
+

-        #grid size 
-        factor = grid_size//2

-        chunks = x.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
-        chunks = torch.cat(torch.chunk(chunks,  factor,-6),-3)
-        chunks = torch.cat(torch.chunk(chunks,  factor,-5),-2)
-        chunks = torch.cat(torch.chunk(chunks,  factor,-4),-1)



-        res = chunks.flatten(-6,-4)
-        res = res.permute((0,2,3,4,1))

-        return res
-        #instead of summing over the chunks we will now just flatten the chunk dimensions.

 """
 Wrapper model, which downscales the "raw" voxel by the specified factor,
@@ -104,13 +104,14 @@ class TokensToBinary(nn.Module):
    def forward(self,x:torch.LongTensor):

        index_tensor = chunk_index_tensor().unsqueeze(0).unsqueeze(0).unsqueeze(0).unsqueeze(0).cuda()
+        #index tensor shape: (1,1,1,1,8)
        #index tensor shape:
        #(1,1,1,1,2,2,2)
        chunks = x.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1).cuda()
        #chunks shape:
-        #(batch_dim,2,2,2,1,1,1)
+        #(batch_dim,g,g,g)
    
-        #we need to broadcast the chunks modulo chunk_tensor over the dimensions: -6,-5,-4
+        #we need to broadcast the chunks modulo chunk_tensor over the dimensions: -1
        remainder_tensor = torch.divide(chunks,index_tensor,rounding_mode="floor")
        remainder_tensor = torch.remainder(remainder_tensor,2)
        #now we know for each position the 1.0 or 0.0
@@ -130,4 +131,22 @@ class TokensToBinary(nn.Module):
        
        return remainder_tensor

+"""
+This is used during inference, to map the output distribution ({0,1,...,7}->{0,1})
+to the token, so that this diagram commutes:
+
+binary grid ----- GenerateTargetGrid -------------------
+        |                                               |
+        |                                               |
+    GenerateBinaryTokenGrid                             |
+        |                                               |
+        |                                               |
+        V                                               V 
+flattened chunk grid  -----distribution_to_tokens---->  token grid
+"""
+def distribution_to_tokens(x:torch.Tensor):
+    x = x.unflatten(-1,(2,2,2)) * chunk_index_tensor().unsqueeze(0).cuda()
+                
+    x = torch.sum(x,dim=(-3,-2,-1)).unsqueeze(-1)
    
+    return x
--- a/upsampling_training.py
+++ b/upsampling_training.py
@@ -39,6 +39,7 @@ def train_layer(
    lr:float,
    num_epochs:int,
    use_bce_loss:bool,
+    clear_plots:bool,
    previous_ckpt = None,
    previous_feature_vector = None,
    batch_size = 16
@@ -283,15 +284,19 @@ def train_layer(
    plt.yscale("log")
    plt.savefig("hierarchicalTransformer/training_loss_curve_"+save_title+".png")
    plt.show()
+    if clear_plots:
+        plt.close()

    plt.plot(train_accuracy_values,label="argmax")
-    plt.plot(train_soft_accuracy_values,label="softmax correct predictions")
+    #plt.plot(train_soft_accuracy_values,label="softmax correct predictions")
    plt.plot(train_probability_values,label="softmax probability measure")
    plt.xlabel("epoch")
    plt.ylabel("accuracy (on training set)")
    plt.legend()
    plt.savefig("hierarchicalTransformer/training_accuracy_"+save_title+".png")
    plt.show()
+    if clear_plots:
+        plt.close()

    plt.plot(validation_losses)
    plt.xlabel("epoch")
@@ -299,12 +304,16 @@ def train_layer(
    plt.yscale("log")
    plt.savefig("hierarchicalTransformer/validation_loss_curve_"+save_title+".png")
    plt.show()
+    if clear_plots:
+        plt.close()

    plt.plot(validation_accuracy_values,label="argmax")
-    plt.plot(validation_soft_accuracy_values,label="softmax correct predictions")
+    #plt.plot(validation_soft_accuracy_values,label="softmax correct predictions")
    plt.plot(validation_probability_values,label="softmax probability measure")
    plt.xlabel("epoch")
    plt.ylabel("accuracy (on validation set)")
    plt.legend()
    plt.savefig("hierarchicalTransformer/validation_accuracy_"+save_title+".png")
    plt.show()
+    if clear_plots:
+        plt.close()
\ No newline at end of file
--- a/upsampling_transformer.py
+++ b/upsampling_transformer.py
@@ -285,6 +285,7 @@ class UpsamplingVoxelTransformer(nn.Module):

                
                next_tokens = next_tokens.unflatten(-1,(2,2,2)) * chunk_index_tensor().unsqueeze(0).cuda()
+                
                next_tokens = torch.sum(next_tokens,dim=(-3,-2,-1)).unsqueeze(-1)