diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh
index fd88207eb4..59f9fa697b 100644
--- a/.github/scripts/install-torch-tensorrt.sh
+++ b/.github/scripts/install-torch-tensorrt.sh
@@ -1,6 +1,12 @@
 #set -exou pipefail
 set -x
 
+dnf install -y sqlite-devel
+
+ldconfig -p | grep sqlite
+
+find /usr -print | grep libsqlite3.so
+
 TORCH_TORCHVISION=$(grep "^torch" ${PWD}/py/requirements.txt)
 INDEX_URL=https://download.pytorch.org/whl/${CHANNEL}/${CU_VERSION}
 PLATFORM=$(python -c "import sys; print(sys.platform)")
diff --git a/.github/workflows/build-test-linux-aarch64-jetpack.yml b/.github/workflows/build-test-linux-aarch64-jetpack.yml
index 744a802bfa..29b6fea881 100644
--- a/.github/workflows/build-test-linux-aarch64-jetpack.yml
+++ b/.github/workflows/build-test-linux-aarch64-jetpack.yml
@@ -1,7 +1,11 @@
 name: Build and test Linux aarch64 wheels for Jetpack
 
 on:
-  pull_request:
+  #pull_request:
+  workflow_run:
+    workflows: ["Build and test Linux x86_64 wheels"]
+    types:
+      - completed
   push:
     branches:
       - main
@@ -15,6 +19,7 @@ on:
 
 jobs:
   generate-matrix:
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
     uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
     with:
       package-type: wheel
diff --git a/.github/workflows/build-test-linux-aarch64.yml b/.github/workflows/build-test-linux-aarch64.yml
index 5358cf4b26..4ffbe31b66 100644
--- a/.github/workflows/build-test-linux-aarch64.yml
+++ b/.github/workflows/build-test-linux-aarch64.yml
@@ -1,7 +1,11 @@
 name: Build and test Linux aarch64 wheels
 
 on:
-  pull_request:
+  #pull_request:
+  workflow_run:
+    workflows: ["Build and test Linux x86_64 wheels"]
+    types:
+      - completed
   push:
     branches:
       - main
@@ -15,6 +19,7 @@ on:
 
 jobs:
   generate-matrix:
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
     uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
     with:
       package-type: wheel
diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml
index 09dde7a2af..b04dce73a2 100644
--- a/.github/workflows/build-test-linux-x86_64.yml
+++ b/.github/workflows/build-test-linux-x86_64.yml
@@ -75,9 +75,62 @@ jobs:
       smoke-test-script: ${{ matrix.smoke-test-script }}
       trigger-event: ${{ github.event_name }}
 
+  tests-py-fail-fast:
+    name: Test fail fast [Python]
+    needs: [filter-matrix, build]
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - repository: pytorch/tensorrt
+            package-name: torch_tensorrt
+            pre-script: packaging/pre_build_script.sh
+            post-script: packaging/post_build_script.sh
+            smoke-test-script: packaging/smoke_test_script.sh
+    uses: ./.github/workflows/linux-test.yml
+    with:
+      job-name: tests-py-critical-fail-fast
+      repository: "pytorch/tensorrt"
+      ref: ""
+      test-infra-repository: pytorch/test-infra
+      test-infra-ref: main
+      build-matrix: ${{ needs.filter-matrix.outputs.matrix }}
+      pre-script: ${{ matrix.pre-script }}
+      script: |
+        export USE_HOST_DEPS=1
+        export CI_BUILD=1
+        export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH
+        pushd .
+        python -m pip install -r tests/py/requirements.txt
+        cov_param="--cov=torch_tensorrt --cov-report= --cov-config=pyproject.toml"
+
+        # test dynamo backend
+        python -m pytest -m critical ${cov_param} -n 4 tests/py/dynamo/backend/
+
+        # test dynamo models
+        python -m pytest -m critical ${cov_param} -ra --cov-append --ir dynamo tests/py/dynamo/models/
+
+        # test dynamo automatic plugin
+        python -m pytest -m critical ${cov_param} --cov-append tests/py/dynamo/automatic_plugin/
+
+        # test dynamo partitioning
+        python -m pytest -m critical ${cov_param} --cov-append tests/py/dynamo/partitioning/
+
+        # test dynamo lowering
+        python -m pytest -m critical ${cov_param} --cov-append tests/py/dynamo/lowering/
+
+        # test dynamo runtime
+        python -m pytest ${cov_param} --cov-append tests/py/dynamo/runtime/test_000_*
+        python -m pytest -m critical ${cov_param} --cov-append --ignore tests/py/dynamo/runtime/test_000_* tests/py/dynamo/runtime/
+
+        # test core
+        python -m pytest -m critical ${cov_param} --cov-append tests/py/core/
+
+        coverage report --fail-under=20
+
   tests-py-torchscript-fe:
     name: Test torchscript frontend [Python]
-    needs: [filter-matrix, build]
+    needs: [filter-matrix, build, tests-py-fail-fast]
     strategy:
       fail-fast: false
       matrix:
@@ -113,7 +166,7 @@ jobs:
 
   tests-py-dynamo-converters:
     name: Test dynamo converters [Python]
-    needs: [filter-matrix, build]
+    needs: [filter-matrix, build, tests-py-fail-fast]
     strategy:
       fail-fast: false
       matrix:
@@ -147,7 +200,7 @@ jobs:
 
   tests-py-dynamo-fe:
     name: Test dynamo frontend [Python]
-    needs: [filter-matrix, build]
+    needs: [filter-matrix, build, tests-py-fail-fast]
     strategy:
       fail-fast: false
       matrix:
@@ -178,7 +231,7 @@ jobs:
 
   tests-py-dynamo-serde:
     name: Test dynamo export serde [Python]
-    needs: [filter-matrix, build]
+    needs: [filter-matrix, build, tests-py-fail-fast]
     strategy:
       fail-fast: false
       matrix:
@@ -210,7 +263,7 @@ jobs:
 
   tests-py-torch-compile-be:
     name: Test torch compile backend [Python]
-    needs: [filter-matrix, build]
+    needs: [filter-matrix, build, tests-py-fail-fast]
     strategy:
       fail-fast: false
       matrix:
@@ -243,7 +296,7 @@ jobs:
 
   tests-py-dynamo-core:
     name: Test dynamo core [Python]
-    needs: [filter-matrix, build]
+    needs: [filter-matrix, build, tests-py-fail-fast]
     strategy:
       fail-fast: false
       matrix:
@@ -276,7 +329,7 @@ jobs:
 
   tests-py-dynamo-cudagraphs:
     name: Test dynamo cudagraphs [Python]
-    needs: [filter-matrix, build]
+    needs: [filter-matrix, build, tests-py-fail-fast]
     strategy:
       fail-fast: false
       matrix:
@@ -309,7 +362,7 @@ jobs:
 
   tests-py-core:
     name: Test core [Python]
-    needs: [filter-matrix, build]
+    needs: [filter-matrix, build, tests-py-fail-fast]
     strategy:
       fail-fast: false
       matrix:
diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml
index 7a974411c8..b7cb143a50 100644
--- a/.github/workflows/build-test-windows.yml
+++ b/.github/workflows/build-test-windows.yml
@@ -1,7 +1,11 @@
 name: Build and test Windows wheels
 
 on:
-  pull_request:
+  #pull_request:
+  workflow_run:
+    workflows: ["Build and test Linux x86_64 wheels"]
+    types:
+      - completed
   push:
     branches:
       - main
@@ -15,6 +19,7 @@ on:
 
 jobs:
   generate-matrix:
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
     uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
     with:
       package-type: wheel
diff --git a/pyproject.toml b/pyproject.toml
index a28eb1b046..a9b07ced0b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -88,6 +88,7 @@ dev = [
     "isort",
     "ruff",
     "pytest",
+    "pytest-cov",
     "pytest-xdist",
     "parameterized>=0.2.0",
     "expecttest==0.1.6",
@@ -113,6 +114,44 @@ Changelog = "https://github.com/pytorch/tensorrt/releases"
 package-dir = { "" = "py" }
 include-package-data = false
 
+[tool.coverage.paths]
+source = [
+    "py/torch_tensorrt",
+    "*/site-packages/torch_tensorrt"
+]
+
+[tool.coverage.run]
+source = ["torch_tensorrt"]
+omit = [
+    "tests/*"
+]
+data_file=".coverage"
+relative_files=true
+branch = true
+
+[tool.coverage.report]
+skip_covered = true
+ignore_errors = true
+exclude_lines = [
+    "pragma: no cover",
+    # Don't complain about missing debug or verbose code
+    "def __repr__",
+    "if verbose",
+    # Don't complain if tests don't hit defensive exception handling code
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "raise RuntimeError",
+    "raise ValueError",
+    "raise KeyError",
+    "raise AttributeError",
+    "except ImportError",
+    # Don't complain if non-runnable code isn't run
+    "if __name__ == \"__main__\":",
+    "if TYPE_CHECKING:",
+    # Don't complain about abstract methods, they aren't run
+    "@(abc\\.)?abstractmethod",
+]
+
 [tool.uv]
 package = true
 environments = ["sys_platform == 'linux'", "sys_platform == 'windows'"]
diff --git a/tests/py/core/test_classes.py b/tests/py/core/test_classes.py
index 62abeb6b1a..3bf7fc8ae8 100644
--- a/tests/py/core/test_classes.py
+++ b/tests/py/core/test_classes.py
@@ -2,16 +2,17 @@
 import unittest
 from typing import Dict
 
+import pytest
+import tensorrt as trt
 import torch
 import torch_tensorrt
 import torch_tensorrt as torchtrt
 import torchvision.models as models
 from torch_tensorrt.dynamo.runtime._TorchTensorRTModule import TorchTensorRTModule
 
-import tensorrt as trt
-
 
 class TestDevice(unittest.TestCase):
+    @pytest.mark.critical
     def test_from_string_constructor(self):
         device = torchtrt.Device("cuda:0")
         self.assertEqual(device.device_type, torchtrt.DeviceType.GPU)
diff --git a/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py b/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py
index 8ab47def08..024e28d3fd 100644
--- a/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py
+++ b/tests/py/dynamo/automatic_plugin/test_automatic_plugin.py
@@ -1,5 +1,6 @@
 from typing import Tuple
 
+import pytest
 import torch
 import torch.nn as nn
 import torch_tensorrt
@@ -66,6 +67,7 @@ class TestAutomaticPlugin(DispatchTestCase):
             ((256, 256), torch.int),
         ]
     )
+    @pytest.mark.critical
     def test_mul_plugin_float(self, input_shape, dtype):
         class elementwise_mul(nn.Module):
             def forward(self, lhs, rhs):
diff --git a/tests/py/dynamo/backend/test_backend_compiler.py b/tests/py/dynamo/backend/test_backend_compiler.py
index 6369d3805c..39bd3a9c17 100644
--- a/tests/py/dynamo/backend/test_backend_compiler.py
+++ b/tests/py/dynamo/backend/test_backend_compiler.py
@@ -1,6 +1,7 @@
 # type: ignore
 from copy import deepcopy
 
+import pytest
 import torch
 import torch_tensorrt
 from torch.testing._internal.common_utils import TestCase, run_tests
@@ -10,6 +11,7 @@
 
 
 class TestTRTModuleNextCompilation(TestCase):
+    @pytest.mark.critical
     def test_trt_module_next_full_support(self):
         class FullySupportedMultiOp(torch.nn.Module):
             def forward(self, x, y):
diff --git a/tests/py/dynamo/distributed/test_nccl_ops.py b/tests/py/dynamo/distributed/test_nccl_ops.py
index 89c94300b7..67bd09596b 100644
--- a/tests/py/dynamo/distributed/test_nccl_ops.py
+++ b/tests/py/dynamo/distributed/test_nccl_ops.py
@@ -1,5 +1,6 @@
 import os
 
+import pytest
 import torch
 import torch.distributed as dist
 import torch.nn as nn
@@ -17,6 +18,7 @@
 
 
 class TestGatherNcclOpsConverter(DispatchTestCase):
+    @pytest.mark.critical
     @parameterized.expand([8])
     def test_nccl_ops(self, linear_layer_dim):
         class DistributedGatherModel(nn.Module):
diff --git a/tests/py/dynamo/lowering/test_aten_lowering_passes.py b/tests/py/dynamo/lowering/test_aten_lowering_passes.py
index 69c91db475..2571ce5443 100644
--- a/tests/py/dynamo/lowering/test_aten_lowering_passes.py
+++ b/tests/py/dynamo/lowering/test_aten_lowering_passes.py
@@ -1,3 +1,4 @@
+import pytest
 import torch
 import torch_tensorrt
 from torch.testing._internal.common_utils import TestCase, run_tests
@@ -6,6 +7,7 @@
 
 
 class TestInputAsOutput(TestCase):
+    @pytest.mark.critical
     def test_input_as_output(self):
         class InputAsOutput(torch.nn.Module):
             def forward(self, x, y):
@@ -56,6 +58,7 @@ def forward(self, x, y):
 
 
 class TestLoweringPassMembership(TestCase):
+    @pytest.mark.critical
     def insert_at_end(self):
         from torch_tensorrt.dynamo.lowering.passes import (
             ATEN_LOWERING_PASSES,
diff --git a/tests/py/dynamo/lowering/test_decompositions.py b/tests/py/dynamo/lowering/test_decompositions.py
index b63e0f3bf7..1eae282a48 100644
--- a/tests/py/dynamo/lowering/test_decompositions.py
+++ b/tests/py/dynamo/lowering/test_decompositions.py
@@ -9,11 +9,12 @@
 )
 from torch.testing._internal.common_utils import TestCase, run_tests
 from torch_tensorrt.dynamo.utils import ATOL, RTOL
-
+import pytest
 from ..testing_utilities import DECIMALS_OF_AGREEMENT, lower_graph_testing
 
 
 class TestLowering(TestCase):
+    @pytest.mark.critical
     def test_lowering_inplace_op(self):
         class InPlace(torch.nn.Module):
             def __init__(self, *args, **kwargs) -> None:
diff --git a/tests/py/dynamo/models/test_dyn_models.py b/tests/py/dynamo/models/test_dyn_models.py
index d5627499f5..f8b9b59aea 100644
--- a/tests/py/dynamo/models/test_dyn_models.py
+++ b/tests/py/dynamo/models/test_dyn_models.py
@@ -11,6 +11,7 @@
 assertions = unittest.TestCase()
 
 
+@pytest.mark.critical
 @pytest.mark.unit
 def test_base_dynamic(ir):
     """
@@ -175,6 +176,7 @@ def forward(self, x):
     )
 
 
+@pytest.mark.critical
 @pytest.mark.unit
 def test_resnet_dynamic(ir):
     """
diff --git a/tests/py/dynamo/models/test_engine_cache.py b/tests/py/dynamo/models/test_engine_cache.py
index 0bc7c665b3..158b9edb43 100644
--- a/tests/py/dynamo/models/test_engine_cache.py
+++ b/tests/py/dynamo/models/test_engine_cache.py
@@ -57,6 +57,7 @@ def load(self, hash: str, prefix: str = "blob") -> Optional[bytes]:
 
 
 class TestHashFunction(TestCase):
+    @pytest.mark.critical
     def test_reexport_is_equal(self):
         pyt_model = models.resnet18(pretrained=True).eval().to("cuda")
         example_inputs = (torch.randn((100, 3, 224, 224)).to("cuda"),)
diff --git a/tests/py/dynamo/models/test_export_kwargs_serde.py b/tests/py/dynamo/models/test_export_kwargs_serde.py
index 928d62e7ba..242dbb54fd 100644
--- a/tests/py/dynamo/models/test_export_kwargs_serde.py
+++ b/tests/py/dynamo/models/test_export_kwargs_serde.py
@@ -23,6 +23,7 @@
 
 
 @pytest.mark.unit
+@pytest.mark.critical
 def test_custom_model():
     class net(nn.Module):
         def __init__(self):
@@ -83,6 +84,7 @@ def forward(self, x, b=5, c=None, d=None):
 
 
 @pytest.mark.unit
+@pytest.mark.critical
 def test_custom_model_with_dynamo_trace():
     class net(nn.Module):
         def __init__(self):
diff --git a/tests/py/dynamo/models/test_export_serde.py b/tests/py/dynamo/models/test_export_serde.py
index 52e5eefb63..017214b25f 100644
--- a/tests/py/dynamo/models/test_export_serde.py
+++ b/tests/py/dynamo/models/test_export_serde.py
@@ -17,6 +17,7 @@
 trt_ep_path = os.path.join(tempfile.gettempdir(), "trt.ep")
 
 
+@pytest.mark.critical
 @pytest.mark.unit
 def test_base_full_compile(ir):
     """
diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py
index b0ebbf5fa4..86251a38c9 100644
--- a/tests/py/dynamo/models/test_models.py
+++ b/tests/py/dynamo/models/test_models.py
@@ -85,6 +85,7 @@ def test_resnet18_cpu_offload(ir):
 
 
 @pytest.mark.unit
+@pytest.mark.critical
 def test_mobilenet_v2(ir):
     model = models.mobilenet_v2(pretrained=True).eval().to("cuda")
     input = torch.randn((1, 3, 224, 224)).to("cuda")
diff --git a/tests/py/dynamo/models/test_reexport.py b/tests/py/dynamo/models/test_reexport.py
index 297410ae55..4a341dc9ac 100644
--- a/tests/py/dynamo/models/test_reexport.py
+++ b/tests/py/dynamo/models/test_reexport.py
@@ -71,6 +71,7 @@ def forward(self, x):
 
 
 @pytest.mark.unit
+@pytest.mark.critical
 def test_base_full_compile_multiple_outputs(ir):
     """
     This tests export serde functionality on a base model
diff --git a/tests/py/dynamo/partitioning/test_fast_partitioning.py b/tests/py/dynamo/partitioning/test_fast_partitioning.py
index 30f691d9e9..728b3b9f84 100644
--- a/tests/py/dynamo/partitioning/test_fast_partitioning.py
+++ b/tests/py/dynamo/partitioning/test_fast_partitioning.py
@@ -1,6 +1,7 @@
 from copy import deepcopy
 
 import numpy as np
+import pytest
 import torch
 from torch.testing._internal.common_utils import TestCase, run_tests
 from torch_tensorrt.dynamo import partitioning
@@ -55,6 +56,7 @@ def forward(self, x, y):
             "Single operators can be segmented if full compilation is required",
         )
 
+    @pytest.mark.critical
     def test_partition_fully_supported_multi_op(self):
         class FullySupportedMultiOp(torch.nn.Module):
             def __init__(self, *args, **kwargs) -> None:
@@ -83,6 +85,7 @@ def forward(self, x, y):
             "All operators are supported, there should be one segment",
         )
 
+    @pytest.mark.critical
     def test_partition_partially_supported_multi_op(self):
         class PartiallySupportedMultiOp(torch.nn.Module):
             def __init__(self, *args, **kwargs) -> None:
@@ -112,6 +115,7 @@ def forward(self, x, y):
             "Unsupported operators interleave supported ones, expected 2 segments",
         )
 
+    @pytest.mark.critical
     def test_partition_partially_supported_with_torch_executed_ops(self):
         class PartiallySupportedMultiOp(torch.nn.Module):
             def __init__(self, *args, **kwargs) -> None:
diff --git a/tests/py/dynamo/partitioning/test_global_partitioning.py b/tests/py/dynamo/partitioning/test_global_partitioning.py
index 887fa35659..aaf5add515 100644
--- a/tests/py/dynamo/partitioning/test_global_partitioning.py
+++ b/tests/py/dynamo/partitioning/test_global_partitioning.py
@@ -95,6 +95,7 @@ def forward(self, x, y):
             "Single operators can be segmented if full compilation is required",
         )
 
+    @pytest.mark.critical
     def test_partition_fully_supported_multi_op(self):
         class FullySupportedMultiOp(torch.nn.Module):
             def __init__(self, *args, **kwargs) -> None:
diff --git a/tests/py/dynamo/runtime/test_output_allocator.py b/tests/py/dynamo/runtime/test_output_allocator.py
index c915f42173..2ec2e229ed 100644
--- a/tests/py/dynamo/runtime/test_output_allocator.py
+++ b/tests/py/dynamo/runtime/test_output_allocator.py
@@ -48,6 +48,7 @@ class TestOutputAllocatorStaticModel(TestCase):
             ("cpp_runtime", False),
         ]
     )
+    @pytest.mark.critical
     def test_cudagraphs_and_output_allocator(self, _, use_python_runtime):
         model = StaticModel().eval().cuda()
         inputs = [torch.randn((2, 3), dtype=torch.float).cuda()]
@@ -157,6 +158,7 @@ class TestOutputAllocatorDDSModel(TestCase):
             ("cpp_runtime", False),
         ]
     )
+    @pytest.mark.critical
     def test_cudagraphs_and_output_allocator(self, _, use_python_runtime):
         model = DDSModel().eval().cuda()
         inputs = (torch.randint(low=0, high=3, size=(10,), dtype=torch.int).to("cuda"),)
diff --git a/tests/py/dynamo/runtime/test_pre_allocated_outputs.py b/tests/py/dynamo/runtime/test_pre_allocated_outputs.py
index b8c7b61fb3..2bffc1b35f 100644
--- a/tests/py/dynamo/runtime/test_pre_allocated_outputs.py
+++ b/tests/py/dynamo/runtime/test_pre_allocated_outputs.py
@@ -1,3 +1,4 @@
+import pytest
 import torch
 import torch_tensorrt as torchtrt
 from parameterized import parameterized
@@ -14,6 +15,7 @@ class TestPreAllocatedOutputs(TestCase):
             ("cpp_runtime", False),
         ]
     )
+    @pytest.mark.critical
     def test_pre_allocated_outputs_default(self, _, use_python_runtime):
         class SampleModel(torch.nn.Module):
             def forward(self, x):
diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt
index 13aa40fe44..4a372f75c6 100644
--- a/tests/py/requirements.txt
+++ b/tests/py/requirements.txt
@@ -6,6 +6,7 @@ numpy
 parameterized>=0.2.0
 pytest>=8.2.1
 pytest-xdist>=3.6.1
+pytest-cov
 pyyaml
 timm>=1.0.3
 flashinfer-python;  python_version < "3.13"