AI-Hypercomputer
diff --git a/‎ ‎ b/‎ ‎
diff --git a/‎.github/workflows/UnitTests.yml‎
Lines changed: 6 additions & 2 deletions b/‎.github/workflows/UnitTests.yml‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎docs/getting_started/first_run.md‎
Lines changed: 9 additions & 5 deletions b/‎docs/getting_started/first_run.md‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎maxdiffusion_dependencies.Dockerfile‎
Lines changed: 3 additions & 3 deletions b/‎maxdiffusion_dependencies.Dockerfile‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎requirements.txt‎
Lines changed: 3 additions & 1 deletion b/‎requirements.txt‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎requirements_with_jax_ai_image.txt‎
Lines changed: 1 addition & 1 deletion b/‎requirements_with_jax_ai_image.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.sh‎
Lines changed: 1 addition & 1 deletion b/‎setup.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/maxdiffusion/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/maxdiffusion/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/maxdiffusion/checkpointing/checkpointing_utils.py‎
Lines changed: 7 additions & 4 deletions b/‎src/maxdiffusion/checkpointing/checkpointing_utils.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎src/maxdiffusion/configs/ltx_video.yml‎
Lines changed: 99 additions & 0 deletions b/‎src/maxdiffusion/configs/ltx_video.yml‎
Lines changed: 99 additions & 0 deletions
@@ -35,7 +35,11 @@ jobs:
     name: "TPU test (${{ matrix.tpu-type }})"
     runs-on: ["self-hosted", "tpu", "${{ matrix.tpu-type }}"]
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.12
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.12'
     - name: Install dependencies
       run: |
         pip install -e .
@@ -50,7 +54,7 @@ jobs:
         ruff check .
     - name: PyTest
       run: | 
-        HF_HUB_CACHE=/mnt/disks/github-runner-disk/ HF_HOME=/mnt/disks/github-runner-disk/ python3 -m pytest -x
+        HF_HUB_CACHE=/mnt/disks/github-runner-disk/ HF_HOME=/mnt/disks/github-runner-disk/ python3 -m pytest -x --deselect=src/maxdiffusion/tests/ltx_transformer_step_test.py
 #  add_pull_ready:
 #    if: github.ref != 'refs/heads/main'
 #    permissions:
 
@@ -8,15 +8,19 @@ We recommend starting with a single host first and then moving to multihost.
 Local development is a convenient way to run MaxDiffusion on a single host. It doesn't scale to
 multiple hosts.
 
-1. [Create and SSH to a single-host TPU (v4-8). ](https://cloud.google.com/tpu/docs/users-guide-tpu-vm#creating_a_cloud_tpu_vm_with_gcloud)
+1. [Create and SSH to a single-host TPU (v6-8). ](https://cloud.google.com/tpu/docs/users-guide-tpu-vm#creating_a_cloud_tpu_vm_with_gcloud)
+* You can find here [here](https://cloud.google.com/tpu/docs/regions-zones) the list of zones that support the v6(Trillium) TPUs
+* We recommend using the base VM image "v2-alpha-tpuv6e", which meets the version requirements: Ubuntu Version 22.04, Python 3.10 and Tensorflow >= 2.12.0
+   
 1. Clone MaxDiffusion in your TPU VM.
+```bash
+git clone https://github.com/AI-Hypercomputer/maxdiffusion.git
+cd maxdiffusion
+```
+
 1. Within the root directory of the MaxDiffusion `git` repo, install dependencies by running:
 ```bash
-If you are running on TPU:
 bash setup.sh MODE=stable DEVICE=tpu
-
-If you are running on GPU:
-bash setup.sh MODE=stable DEVICE=gpu
 ```
 
 ## Getting Starting: Multihost development
 
@@ -1,12 +1,12 @@
-# Use Python 3.10-slim-bullseye as the base image
-FROM python:3.10-slim-bullseye
+# Use Python 3.12-slim-bullseye as the base image
+FROM python:3.12-slim-bullseye
 
 # Environment variable for no-cache-dir and pip root user warning
 ENV PIP_NO_CACHE_DIR=1
 ENV PIP_ROOT_USER_ACTION=ignore
 
 # Set environment variables for Google Cloud SDK and Python 3.10
-ENV PYTHON_VERSION=3.10
+ENV PYTHON_VERSION=3.12
 ENV CLOUD_SDK_VERSION=latest
 
 # Set DEBIAN_FRONTEND to noninteractive to avoid frontend errors
 
@@ -23,8 +23,10 @@ pytest==8.2.2
 tensorflow>=2.17.0
 tensorflow-datasets>=4.9.6
 ruff>=0.1.5,<=0.2
+git+https://github.com/Lightricks/LTX-Video
+git+https://github.com/zmelumian972/xla@torchax/jittable_module_callable#subdirectory=torchax
 opencv-python-headless==4.10.0.84
-orbax-checkpoint==0.10.3
+orbax-checkpoint
 tokenizers==0.21.0
 huggingface_hub>=0.30.2
 transformers==4.48.1
 
@@ -26,7 +26,7 @@ tensorflow>=2.17.0
 tensorflow-datasets>=4.9.6
 ruff>=0.1.5,<=0.2
 opencv-python-headless==4.10.0.84
-orbax-checkpoint==0.10.3
+orbax-checkpoint
 tokenizers==0.21.0
 huggingface_hub>=0.30.2
 transformers==4.48.1
 
@@ -112,4 +112,4 @@ else
 fi
 
 # Install maxdiffusion
-pip3 install -U . || echo "Failed to install maxdiffusion" >&2
+pip3 install -U . || echo "Failed to install maxdiffusion" >&2
@@ -374,6 +374,7 @@
   _import_structure["models.unet_2d_condition_flax"] = ["FlaxUNet2DConditionModel"]
   _import_structure["models.flux.transformers.transformer_flux_flax"] = ["FluxTransformer2DModel"]
   _import_structure["models.vae_flax"] = ["FlaxAutoencoderKL"]
+  _import_structure["models.ltx_video.transformers.transformer3d"] = ["Transformer3DModel"]
   _import_structure["pipelines"].extend(["FlaxDiffusionPipeline"])
   _import_structure["schedulers"].extend(
       [
@@ -453,6 +454,7 @@
     from .models.modeling_flax_utils import FlaxModelMixin
     from .models.unet_2d_condition_flax import FlaxUNet2DConditionModel
     from .models.flux.transformers.transformer_flux_flax import FluxTransformer2DModel
+    from .models.ltx_video.transformers.transformer3d import Transformer3DModel
     from .models.vae_flax import FlaxAutoencoderKL
     from .pipelines import FlaxDiffusionPipeline
     from .schedulers import (
 
@@ -28,7 +28,7 @@
 from flax.training import train_state
 import orbax
 import orbax.checkpoint as ocp
-from orbax.checkpoint.logging import abstract_logger
+from orbax.checkpoint.logging import AbstractLogger
 from orbax.checkpoint.checkpoint_manager import CheckpointManager, CheckpointManagerOptions
 
 STABLE_DIFFUSION_CHECKPOINT = "STABLE_DIFFUSION_CHECKPOINT"
@@ -43,7 +43,7 @@ def create_orbax_checkpoint_manager(
     checkpoint_type: str,
     dataset_type: str = "tf",
     use_async: bool = True,
-    orbax_logger: Optional[abstract_logger.AbstractLogger] = None,
+    orbax_logger: Optional[AbstractLogger] = None,
 ):
   """
   Returns specified Orbax (async or not) CheckpointManager or None if checkpointing is disabled.
@@ -213,8 +213,11 @@ def load_state_if_possible(
     max_logging.log(f"restoring from this run's directory latest step {latest_step}")
     try:
       if not enable_single_replica_ckpt_restoring:
-        item = {checkpoint_item: orbax.checkpoint.args.PyTreeRestore(item=abstract_unboxed_pre_state)}
-        return checkpoint_manager.restore(latest_step, args=orbax.checkpoint.args.Composite(**item))
+        if checkpoint_item == "ltxvid_transformer":
+          return checkpoint_manager.restore(latest_step, args=ocp.args.StandardRestore(abstract_unboxed_pre_state))
+        else:
+          item = {checkpoint_item: orbax.checkpoint.args.PyTreeRestore(item=abstract_unboxed_pre_state)}
+          return checkpoint_manager.restore(latest_step, args=orbax.checkpoint.args.Composite(**item))
 
       def map_to_pspec(data):
         pspec = data.sharding.spec
 
@@ -0,0 +1,99 @@
+#hardware
+hardware: 'tpu'
+skip_jax_distributed_system: False
+
+jax_cache_dir: ''
+weights_dtype: 'bfloat16'
+activations_dtype: 'bfloat16'
+
+
+run_name: ''
+output_dir: ''
+config_path: ''
+save_config_to_gcs: False
+
+#Checkpoints
+text_encoder_model_name_or_path: "ariG23498/t5-v1-1-xxl-flax"
+prompt_enhancer_image_caption_model_name_or_path: "MiaoshouAI/Florence-2-large-PromptGen-v2.0"
+prompt_enhancer_llm_model_name_or_path: "unsloth/Llama-3.2-3B-Instruct"
+frame_rate: 30
+max_sequence_length: 512
+sampler: "from_checkpoint"
+
+# Generation parameters
+pipeline_type: multi-scale
+prompt: "A man in a dimly lit room talks on a vintage telephone, hangs up, and looks down with a sad expression. He holds the black rotary phone to his right ear with his right hand, his left hand holding a rocks glass with amber liquid. He wears a brown suit jacket over a white shirt, and a gold ring on his left ring finger. His short hair is neatly combed, and he has light skin with visible wrinkles around his eyes. The camera remains stationary, focused on his face and upper body. The room is dark, lit only by a warm light source off-screen to the left, casting shadows on the wall behind him. The scene appears to be from a movie. "
+#negative_prompt: "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards"
+height: 512
+width: 512
+num_frames: 88
+flow_shift: 5.0
+downscale_factor: 0.6666666
+spatial_upscaler_model_path: "ltxv-spatial-upscaler-0.9.7.safetensors"
+prompt_enhancement_words_threshold: 120
+stg_mode: "attention_values"
+decode_timestep: 0.05
+decode_noise_scale: 0.025
+seed: 10
+
+
+first_pass:
+  guidance_scale: [1, 1, 6, 8, 6, 1, 1]
+  stg_scale: [0, 0, 4, 4, 4, 2, 1]
+  rescaling_scale: [1, 1, 0.5, 0.5, 1, 1, 1]
+  guidance_timesteps: [1.0, 0.996,  0.9933, 0.9850, 0.9767, 0.9008, 0.6180]
+  skip_block_list: [[], [11, 25, 35, 39], [22, 35, 39], [28], [28], [28], [28]]
+  num_inference_steps: 30
+  skip_final_inference_steps: 3
+  skip_initial_inference_steps: 0
+  cfg_star_rescale: True
+
+second_pass:
+  guidance_scale: [1]
+  stg_scale: [1]
+  rescaling_scale: [1]
+  guidance_timesteps: [1.0]
+  skip_block_list: [27]
+  num_inference_steps: 30
+  skip_initial_inference_steps: 17
+  skip_final_inference_steps: 0
+  cfg_star_rescale: True
+
+#parallelism
+mesh_axes: ['data', 'fsdp', 'tensor']
+logical_axis_rules: [
+                      ['batch', 'data'],
+                      ['activation_heads', 'fsdp'],
+                      ['activation_batch', 'data'],
+                      ['activation_kv', 'tensor'],
+                      ['mlp','tensor'],
+                      ['embed','fsdp'],
+                      ['heads', 'tensor'],
+                      ['norm', 'fsdp'],
+                      ['conv_batch', ['data','fsdp']],
+                      ['out_channels', 'tensor'],
+                      ['conv_out', 'fsdp'],
+                      ['conv_in', 'fsdp']
+                    ]
+data_sharding: [['data', 'fsdp', 'tensor']]
+dcn_data_parallelism: 1  # recommended DCN axis to be auto-sharded
+dcn_fsdp_parallelism: -1
+dcn_tensor_parallelism: 1
+ici_data_parallelism: 1
+ici_fsdp_parallelism: -1  # recommended ICI axis to be auto-sharded
+ici_tensor_parallelism: 1
+
+allow_split_physical_axes: False
+learning_rate_schedule_steps: -1
+max_train_steps: 500
+pretrained_model_name_or_path: ''
+unet_checkpoint: ''
+dataset_name: 'diffusers/pokemon-gpt4-captions'
+train_split: 'train'
+dataset_type: 'tf'
+cache_latents_text_encoder_outputs: True
+per_device_batch_size: 1
+compile_topology_num_slices: -1 
+quantization_local_shard_count: -1
+jit_initializers: True 
+enable_single_replica_ckpt_restoring: False