Skip to content

Commit

Permalink
Add Sth-Sth-v2 Preprocessing & XLA Pretraining Script (#10)
Browse files Browse the repository at this point in the history
* Update xpretrain documentation

* Update README

* Add Sth-Sth-v2 Preprocessing Pipeline

* Add model initialization stub

* Add full XLA pretraining pipeline

* Add v1.0.0 with full preprocessing/XLA pretraining pipeline
  • Loading branch information
siddk authored Mar 7, 2023
1 parent d35c744 commit ed19236
Show file tree
Hide file tree
Showing 13 changed files with 2,632 additions and 8 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<div align="center">
<img src="https://github.com/siddk/voltron-robotics/blob/main/docs/assets/voltron-banner.png" alt="Voltron Logo"/>
<img src="https://raw.githubusercontent.com/siddk/voltron-robotics/main/docs/assets/voltron-banner.png" alt="Voltron Logo"/>
</div>

<div align="center">
Expand Down Expand Up @@ -83,7 +83,7 @@ our paper.

## API

![Voltron Framework](https://github.com/siddk/voltron-robotics/blob/main/docs/assets/voltron-framework.png)
![Voltron Framework](https://raw.githubusercontent.com/siddk/voltron-robotics/main/docs/assets/voltron-framework.png)

The package `voltron` provides the following functionality for using and adapting existing representations:

Expand Down
2 changes: 1 addition & 1 deletion examples/xla-reference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ To get things to work, we had to add some non-intuitive code to facilitate PyTor
data parallel training pipeline). As a result, `xpretrain.py` is here mostly for documentation purposes, with a fully
refactored version `pretrain.py` forthcoming.

We also include the original cloud preprocesssing script `xpreprocess.py` for completeness.
We also include the original cloud preprocessing script `xpreprocess.py` for completeness (this is more general).
833 changes: 831 additions & 2 deletions examples/xla-reference/xpretrain.py

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ authors = [
{name = "Siddharth Karamcheti", email="skaramcheti@cs.stanford.edu"}
]
description = "Voltron: Language-Driven Representation Learning for Robotics."
version = "0.0.1"
version = "1.0.0"
readme = "README.md"
requires-python = ">=3.8"
keywords = ["robotics", "representation learning", "natural language processing", "machine learning"]
Expand All @@ -30,15 +30,19 @@ classifiers = [
dependencies = [
"av",
"gdown",
"google-cloud-storage",
"einops",
"hurry.filesize",
"hydra-core==1.1.1", # Lock Hydra =>> future versions break...
"jsonlines",
"omegaconf==2.1.2", # Lock OmegaConf =>> future versions break...
"opencv-python==4.2.0.32", # Lock OpenCV =>> just in case...
"rich",
"torch",
"torchvision",
"torchaudio",
"transformers",
"wandb",
]

[project.optional-dependencies]
Expand Down
3 changes: 3 additions & 0 deletions voltron/conf/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
from .accelerators import AcceleratorConfig
from .datasets import DatasetConfig
from .models import ModelConfig
from .tracking import TrackingConfig
52 changes: 52 additions & 0 deletions voltron/conf/accelerators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
accelerator.py
Base Hydra Structured Configs for defining various accelerator schemes. Uses a simple single inheritance structure.
"""
from dataclasses import dataclass

from hydra.core.config_store import ConfigStore
from omegaconf import MISSING


@dataclass
class AcceleratorConfig:
accelerator: str = MISSING
num_accelerators: int = MISSING
num_workers: int = MISSING


@dataclass
class TPUv2OneConfig(AcceleratorConfig):
accelerator = "tpu"
num_accelerators = 1
num_workers = 4


@dataclass
class TPUv2EightConfig(AcceleratorConfig):
accelerator = "tpu"
num_accelerators = 8
num_workers = 4


@dataclass
class TPUv3OneConfig(AcceleratorConfig):
accelerator = "tpu"
num_accelerators = 1
num_workers = 8


@dataclass
class TPUv3EightConfig(AcceleratorConfig):
accelerator = "tpu"
num_accelerators = 8
num_workers = 8


# Create a configuration group `accelerator` and populate with the above...
cs = ConfigStore.instance()
cs.store(group="accelerator", name="tpu-v2-1", node=TPUv2OneConfig)
cs.store(group="accelerator", name="tpu-v2-8", node=TPUv2EightConfig)
cs.store(group="accelerator", name="tpu-v3-1", node=TPUv3OneConfig)
cs.store(group="accelerator", name="tpu-v3-8", node=TPUv3EightConfig)
6 changes: 5 additions & 1 deletion voltron/conf/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
class DatasetConfig:
name: str = MISSING
path: str = MISSING
artifact_path: str = to_absolute_path("/mnt/home")
artifact_path: str = to_absolute_path("data/processed/sth-sth-v2")

# Streaming Parameters (assumes fully preprocessed dataset lives at `stream_prefix/...`)
stream: bool = True
stream_prefix: str = "data/processed"

# Dataset-Specific Parameters
resolution: int = 224
Expand Down
Loading

0 comments on commit ed19236

Please # to comment.