Skip to content

feat: Set up comprehensive Python testing infrastructure with Poetry #341

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,48 @@ AUTHORS
ChangeLog
.DS_Store
.mypy_cache

# Testing artifacts
.pytest_cache/
.coverage
htmlcov/
coverage.xml
*.cover
*.py,cover
.hypothesis/
pytest_cache/

# Claude settings
.claude/*

# Poetry
# Note: Do not ignore poetry.lock - it should be committed

# Virtual environments
venv/
ENV/
env/
.venv/
.env

# IDE specific files
.vscode/
*.sublime-project
*.sublime-workspace

# Build artifacts
__pycache__/
*.so
*.dylib
*.dll

# OS specific
Thumbs.db
.DS_Store
Desktop.ini

# Temporary files
*.tmp
*.temp
*.log
.cache/
124 changes: 124 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
[tool.poetry]
name = "tensorflow-transform"
version = "1.17.1"
description = "A library for data preprocessing with TensorFlow"
authors = ["Google Inc. <tensorflow-extended-dev@googlegroups.com>"]
license = "Apache-2.0"
readme = "README.md"
homepage = "https://www.tensorflow.org/tfx/transform/get_started"
repository = "https://github.com/tensorflow/transform"
keywords = ["tensorflow", "transform", "tfx"]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3 :: Only",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Scientific/Engineering :: Mathematics",
"Topic :: Software Development",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Libraries :: Python Modules",
]
packages = [{include = "tensorflow_transform"}]

[tool.poetry.dependencies]
python = ">=3.9,<4"
absl-py = ">=0.9,<2.0.0"
apache-beam = {version = ">=2.53,<3", extras = ["gcp"], python = ">=3.11"}
numpy = ">=1.22.0"
protobuf = [
{version = ">=4.25.2,<6.0.0", python = ">=3.11"},
{version = ">=4.21.6,<6.0.0", python = "<3.11"}
]
pyarrow = ">=10,<11"
pydot = ">=1.2,<2"
tensorflow = ">=2.17,<2.18"
tensorflow-metadata = ">=1.15.0"
tf-keras = ">=2"
tfx-bsl = ">=1.15.0"

[tool.poetry.group.dev.dependencies]
pytest = "^8.0.0"
pytest-cov = "^4.1.0"
pytest-mock = "^3.12.0"

[tool.poetry.scripts]
test = "pytest:main"
tests = "pytest:main"

[tool.pytest.ini_options]
minversion = "8.0"
addopts = [
"--strict-markers",
"--tb=short",
"--cov=tensorflow_transform",
"--cov-report=term-missing:skip-covered",
"--cov-report=html",
"--cov-report=xml",
"--cov-fail-under=80",
"--doctest-modules",
"-v"
]
testpaths = ["tests"]
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
markers = [
"unit: marks tests as unit tests (fast, isolated)",
"integration: marks tests as integration tests (may require external resources)",
"slow: marks tests as slow (deselect with '-m \"not slow\"')"
]
filterwarnings = [
"ignore::DeprecationWarning",
"ignore::PendingDeprecationWarning"
]

[tool.coverage.run]
source = ["tensorflow_transform"]
branch = true
parallel = true
omit = [
"*/tests/*",
"*/test_*.py",
"*/*_test.py",
"*/setup.py",
"*/version.py",
"*/__pycache__/*",
"*/site-packages/*"
]

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"def __str__",
"raise AssertionError",
"raise NotImplementedError",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
"if typing.TYPE_CHECKING:",
"@abstractmethod",
"@abc.abstractmethod"
]
precision = 2
skip_covered = true
show_missing = true

[tool.coverage.html]
directory = "htmlcov"

[tool.coverage.xml]
output = "coverage.xml"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
94 changes: 94 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# TensorFlow Transform Testing Infrastructure

This directory contains the testing infrastructure for TensorFlow Transform.

## Structure

```
tests/
├── README.md # This file
├── __init__.py # Package marker
├── conftest.py # Shared pytest fixtures and configuration
├── test_setup_validation.py # Full validation tests (requires all dependencies)
├── test_minimal_setup.py # Minimal tests that work without all dependencies
├── unit/ # Unit tests directory
│ └── __init__.py
└── integration/ # Integration tests directory
└── __init__.py
```

## Running Tests

### Using Poetry Scripts

```bash
# Run all tests
poetry run test

# Alternative command (both work)
poetry run tests

# Run specific test file
poetry run test tests/test_minimal_setup.py

# Run with specific markers
poetry run test -m unit
poetry run test -m "not slow"
```

### Using pytest directly

```bash
# Run all tests
python -m pytest

# Run with coverage
python -m pytest --cov=tensorflow_transform

# Run without coverage (useful for debugging)
python -m pytest --no-cov
```

## Test Markers

- `@pytest.mark.unit` - Fast, isolated unit tests
- `@pytest.mark.integration` - Integration tests that may require external resources
- `@pytest.mark.slow` - Tests that take a long time to run

## Available Fixtures

See `conftest.py` for all available fixtures. Key fixtures include:

- `temp_dir` - Temporary directory that's cleaned up after test
- `temp_file` - Temporary file that's cleaned up after test
- `mock_config` - Sample configuration dictionary
- `sample_data` - Sample data for testing transformations
- `tf_example_data` - Temporary TFRecord file with example data
- `mock_preprocessing_fn` - Simple preprocessing function for testing
- `mock_schema` - Simple schema for testing

## Coverage Configuration

Coverage is configured to:
- Require 80% minimum coverage
- Generate HTML reports in `htmlcov/`
- Generate XML report as `coverage.xml`
- Exclude test files and common patterns from coverage

## Known Issues

### ARM64 Architecture Support

Some dependencies like `tfx-bsl` may not have pre-built wheels for ARM64 architecture (e.g., Apple Silicon Macs, ARM Linux). If you encounter installation issues:

1. Try running the minimal test suite: `poetry run test tests/test_minimal_setup.py --no-cov`
2. Consider using x86_64 emulation or a compatible environment
3. Build dependencies from source if needed

## Writing New Tests

1. Place unit tests in `tests/unit/`
2. Place integration tests in `tests/integration/`
3. Use appropriate markers (`@pytest.mark.unit`, etc.)
4. Import and use fixtures from `conftest.py`
5. Follow existing test patterns and naming conventions
Empty file added tests/__init__.py
Empty file.
Loading