diff --git a/.flake8 b/.flake8
index 200b715f..ac09a120 100644
--- a/.flake8
+++ b/.flake8
@@ -1,4 +1,10 @@
 [flake8]
+exclude = 
+    .git,
+    __pycache__,
+    build,
+    dist,
+    my_notebooks
 extend-ignore =
 	# Allow whitespace before ':' because in some cases this whitespace
 	# avoids confusing the operator precedence,
diff --git a/.github/ISSUE_TEMPLATE/release.md b/.github/ISSUE_TEMPLATE/release.md
index 37795611..76aea684 100644
--- a/.github/ISSUE_TEMPLATE/release.md
+++ b/.github/ISSUE_TEMPLATE/release.md
@@ -20,10 +20,11 @@ _to be written during release process_
 - [ ] Create PR to merge from current develop into release branch
 - [ ] Write Changelog in PR and request review
 - [ ] Review the PR (if OK - merge, but DO NOT delete the branch)
-- [ ] Minimize packages in requirements.txt and conda-forge submission. Update packages in setup.py
+- [ ] On `Release` ,Minimize packages in requirements.txt and conda-forge submission. Update packages in pyproject.toml
 - [ ] Check unit tests -> Check all tests pass on CPU and [GPU (e.g. on colab)](https://colab.research.google.com/drive/1lFpdtY5zV7VpW88aazedA3n4khedHDQP?usp=sharing#scrollTo=IbU2vypPQ-Ej) and that there are tests for all important features
 - [ ] Check documentation -> Check presence of documentation for all features by locally building the docs on the release
-- [ ] Change version number in setup.py and docs (under conf.py)
+- [ ] Change version number in pyproject.toml and docs (under conf.py) and in `__init__.py`
+- [ ] In `__init__.py`, set `TORCHQUAD_DISABLE_LOGGING` to `True`
 - [ ] Trigger the Upload Python Package to testpypi GitHub Action (https://github.com/esa/torchquad/actions/workflows/deploy_to_test_pypi.yml) on the release branch (need to be logged in)
 - [ ] Test the build on testpypi (with `pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple torchquad`)
 - [ ] Finalize release on the release branch
diff --git a/.github/workflows/autoblack.yml b/.github/workflows/autoblack.yml
index 12d48bc0..2a63ecea 100644
--- a/.github/workflows/autoblack.yml
+++ b/.github/workflows/autoblack.yml
@@ -1,9 +1,4 @@
-# GitHub Action that uses Black to reformat the Python code in an incoming pull request.
-# If all Python code in the pull request is compliant with Black then this Action does nothing.
-# Othewrwise, Black is run and its changes are committed back to the incoming pull request.
-# https://github.com/cclauss/autoblack
-
-name: autoblack
+name: check_formatting
 on: [pull_request]
 jobs:
   build:
@@ -15,6 +10,6 @@ jobs:
         with:
           python-version: 3.11
       - name: Install Black
-        run: pip install black==24.4.2
-      - name: Run black --check .
-        run: black --check .
+        run: pip install black==25.1.0
+      - name: Run black --check --line-length 100 .
+        run: black --check --line-length 100 .
diff --git a/.github/workflows/deploy_to_pypi.yml b/.github/workflows/deploy_to_pypi.yml
index 5b0e0910..1a326c26 100644
--- a/.github/workflows/deploy_to_pypi.yml
+++ b/.github/workflows/deploy_to_pypi.yml
@@ -14,12 +14,13 @@ jobs:
           python-version: "3.10"
       - name: Install dependencies
         run: |
-          pip install setuptools wheel twine
+          pip install build twine
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
       - name: Build and publish to PyPI
         env:
           TWINE_USERNAME: "__token__"
           TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
+          TORCHQUAD_RELEASE_BUILD: "True"
         run: |
-          python setup.py sdist bdist_wheel
+          python -m build
           twine upload dist/*
diff --git a/.github/workflows/deploy_to_test_pypi.yml b/.github/workflows/deploy_to_test_pypi.yml
index 82ed3bf9..3e1046e7 100644
--- a/.github/workflows/deploy_to_test_pypi.yml
+++ b/.github/workflows/deploy_to_test_pypi.yml
@@ -17,12 +17,13 @@ jobs:
           python-version: "3.10"
       - name: Install dependencies
         run: |
-          pip install setuptools wheel twine
+          pip install build twine
           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
       - name: Build and publish to Test PyPI
         env:
           TWINE_USERNAME: "__token__"
           TWINE_PASSWORD: ${{ secrets.TEST_PYPI_TOKEN }}
+          TORCHQUAD_RELEASE_BUILD: "True"
         run: |
-          python setup.py sdist bdist_wheel
+          python -m build
           twine upload -r testpypi dist/*
\ No newline at end of file
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 00fecde7..fe84dcd7 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -54,17 +54,18 @@ jobs:
         shell: bash -l {0}
         run: |
           micromamba activate torchquad
-          cd torchquad/tests/
+          pip install -e .
+          cd tests/
           pip install pytest
           pip install pytest-error-for-skips
           pip install pytest-cov
-          pytest -ra --error-for-skips --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=../../torchquad . | tee pytest-coverage.txt
+          pytest -ra --error-for-skips --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=../torchquad . | tee pytest-coverage.txt
       - name: pytest coverage comment
         uses: MishaKav/pytest-coverage-comment@main
         if: github.event_name == 'pull_request'
         continue-on-error: true
         with:
-          pytest-coverage-path: ./torchquad/tests/pytest-coverage.txt
+          pytest-coverage-path: ./tests/pytest-coverage.txt
           title: Coverage Report
           badge-title: Overall Coverage
           hide-badge: false
@@ -72,4 +73,4 @@ jobs:
           create-new-comment: false
           hide-comment: false
           report-only-changed-files: false
-          junitxml-path: ./torchquad/tests/pytest.xml
+          junitxml-path: ./tests/pytest.xml
diff --git a/.gitignore b/.gitignore
index 035839d8..a3c0f45b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -132,3 +132,5 @@ my_notebooks
 .vscode
 pytest-coverage.txt
 pytest.xml
+CLAUDE.md
+.claude/
diff --git a/README.md b/README.md
index dc1d62c2..d3ab7680 100644
--- a/README.md
+++ b/README.md
@@ -148,9 +148,10 @@ import torchquad
 torchquad._deployment_test()
 ```
 
-After cloning the repository, developers can check the functionality of `torchquad` by running the following command in the `torchquad/tests` directory:
+After cloning the repository, developers can check the functionality of `torchquad` by running
 
 ```sh
+pip install -e .
 pytest
 ```
 
@@ -192,8 +193,48 @@ integral_value = mc.integrate(
     backend="torch",
 )
 ```
-To change the logger verbosity, set the `TORCHQUAD_LOG_LEVEL` environment
-variable; for example `export TORCHQUAD_LOG_LEVEL=DEBUG`.
+## Logging Configuration
+
+By default, torchquad disables its internal logging when installed from PyPI to avoid interfering with other loggers in your application. To enable logging change `TORCHQUAD_DISABLE_LOGGING` in `__init__.py`:
+
+1. **Set the log level**: Use the `TORCHQUAD_LOG_LEVEL` environment variable:
+   ```bash
+   export TORCHQUAD_LOG_LEVEL=DEBUG   # For detailed debugging
+   export TORCHQUAD_LOG_LEVEL=INFO    # For general information  
+   export TORCHQUAD_LOG_LEVEL=WARNING # For warnings only (default when enabled)
+   ```
+
+2. **Enable logging programmatically**:
+   ```python
+   import torchquad
+   torchquad.set_log_level("DEBUG")  # This will enable and configure logging
+   ```
+
+## Multi-GPU Usage
+
+torchquad supports multi-GPU systems through standard PyTorch practices. The recommended approach is to use the `CUDA_VISIBLE_DEVICES` environment variable to control GPU selection:
+
+```bash
+# Use specific GPU
+export CUDA_VISIBLE_DEVICES=0  # Use GPU 0
+python your_script.py
+
+export CUDA_VISIBLE_DEVICES=1  # Use GPU 1  
+python your_script.py
+
+# Use multiple GPUs with separate processes
+export CUDA_VISIBLE_DEVICES=0 && python integration_script.py &
+export CUDA_VISIBLE_DEVICES=1 && python integration_script.py &
+```
+
+For parallel processing across multiple GPUs, we recommend spawning separate processes rather than trying to coordinate multiple GPUs within a single process. This approach:
+
+- Provides clean separation between GPU processes
+- Avoids complex device management
+- Follows PyTorch best practices
+- Enables easy load balancing and error handling
+
+For detailed examples and advanced multi-GPU patterns, see the [Multi-GPU Usage section](https://torchquad.readthedocs.io/en/main/tutorial.html#multi-gpu-usage) in our documentation.
 
 You can find all available integrators [here](https://torchquad.readthedocs.io/en/main/integration_methods.html).
 
@@ -206,13 +247,60 @@ See the [open issues](https://github.com/esa/torchquad/issues) for a list of pro
 <!-- PERFORMANCE -->
 ## Performance
 
-Using GPUs torchquad scales particularly well with integration methods that offer easy parallelization. For example, below you see error and runtime results for integrating the function `f(x,y,z) = sin(x * (y+1)²) * (z+1)` on a consumer-grade desktop PC.
+Using GPUs, torchquad scales particularly well with integration methods that offer easy parallelization. The benchmarks below demonstrate performance across challenging functions from 1D to 15D, comparing torchquad's GPU-accelerated methods against scipy's CPU implementations.
+
+<!-- TODO Update plot links -->
+### Convergence Analysis
+![](https://github.com/esa/torchquad/blob/benchmark-0.4.1/resources/torchquad_convergence.png?raw=true)
+*Convergence comparison across challenging test functions from 1D to 15D. GPU-accelerated torchquad methods demonstrate great performance, particularly for high-dimensional integration where scipy's nquad becomes computationally infeasible. Beyond 1D, torchquad significantly outperforms scipy in efficiency.*
+
+### Runtime vs Error Efficiency  
+![](https://github.com/esa/torchquad/blob/benchmark-0.4.1/resources/torchquad_runtime_vs_error.png?raw=true)
+*Runtime-error trade-offs across dimensions. Lower-left positions indicate better performance. While scipy's traditional methods are competitive for simple 1D problems, torchquad's GPU acceleration provides orders of magnitude better performance for multi-dimensional integration, achieving both faster computation and lower errors.*
+
+### Scaling Performance
+![](https://github.com/esa/torchquad/blob/benchmark-0.4.1/resources/torchquad_scaling_analysis.png?raw=true)
+*Scaling investigation across problem sizes and dimensions of the different methods in torchquad.*
+
+### Vectorized Integration Speedup
+![](https://github.com/esa/torchquad/blob/benchmark-0.4.1/resources/torchquad_vectorized_speedup.png?raw=true)
+*Strong performance gains when evaluating multiple integrands simultaneously. The vectorized approach shows exponential speedup (up to 200x) compared to sequential evaluation, making torchquad ideal for parameter sweeps, uncertainty quantification, and machine learning applications requiring batch integration.*
+
+### Framework Comparison  
+![](https://github.com/esa/torchquad/blob/benchmark-0.4.1/resources/torchquad_framework_comparison.png?raw=true)
+*Cross-framework performance comparison for 1D integration using Monte Carlo and Simpson methods. Demonstrates torchquad's consistent API across PyTorch, TensorFlow, JAX, and NumPy backends, with GPU acceleration providing significant performance advantages for large number of function evaluations. All frameworks achieve similar accuracy while showcasing the computational benefits of GPU acceleration for parallel integration methods.*
+
+### Running Benchmarks
+
+To reproduce these benchmarks or test performance on your hardware:
+
+```bash
+# Run all benchmarks (convergence, framework comparison, scaling, vectorized)
+python benchmarking/modular_benchmark.py --dimensions 1,3,7,15
+
+# Run specific benchmark types
+python benchmarking/modular_benchmark.py --convergence-only --dimensions 1,3,7,15
+python benchmarking/modular_benchmark.py --scaling-only
+python benchmarking/modular_benchmark.py --framework-only
+
+# Generate all plots from results
+python benchmarking/plot_results.py
+
+# Configure benchmark parameters
+# Edit benchmarking/benchmarking_cfg.toml to adjust:
+# - Evaluation point ranges
+# - Framework backends to test
+# - Timeout limits  
+# - Method selection
+# - scipy integration tolerances
+```
 
-![](https://github.com/esa/torchquad/blob/main/resources/torchquad_runtime.png?raw=true)
-*Runtime results of the integration. Note the far superior scaling on the GPU (solid line) in comparison to the CPU (dashed and dotted) for both methods.*
+**New Features:**
+- **Analytic Reference Values**: Uses SymPy for exact analytic solutions where possible, providing highly accurate reference values for error calculations
+- **Enhanced Test Functions**: Analytically tractable but numerically challenging functions that better demonstrate convergence behavior
+- **Framework Comparison**: Cross-backend performance benchmarking across PyTorch, TensorFlow, JAX, and NumPy with GPU/CPU device comparisons
 
-![](https://github.com/esa/torchquad/blob/main/resources/torchquad_convergence.png?raw=true)
-*Convergence results of the integration. Note that Simpson quickly reaches floating point precision. Monte Carlo is not competitive here given the low dimensionality of the problem.*
+**Hardware:** RTX 4060 Ti 16GB, i5-13400F, Precision: float32
 
 <!-- CONTRIBUTING -->
 ## Contributing
@@ -245,12 +333,12 @@ Please note that PRs should be created from and into the `develop` branch. For e
 3. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
 4. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
 5. Push to the Branch (`git push origin feature/AmazingFeature`)
-6. Open a Pull Request on the `develop` branch, *not* `main` (NB: We autoformat every PR with black. Our GitHub actions may create additional commits on your PR for that reason.)
+6. Open a Pull Request on the `develop` branch, *not* `main`
 
 and we will have a look at your contribution as soon as we can.
 
-Furthermore, please make sure that your PR passes all automated tests. Review will only happen after that.
-Only PRs created on the `develop` branch with all tests passing will be considered. The only exception to this rule is if you want to update the documentation in relation to the current release on conda / pip. In that case you may ask to merge directly into `main`.
+Furthermore, please make sure that your PR passes all automated tests, you can ping `@gomezzz` to run the CI. Review will only happen after that.
+Only PRs created on the `develop` branch with all tests passing will be considered. The only exception to this rule is if you want to update the documentation in relation to the current release on conda / pip. In that case you open a PR directly into `main`.
 
 <!-- LICENSE -->
 ## License
diff --git a/benchmarking/benchmarking_cfg.toml b/benchmarking/benchmarking_cfg.toml
new file mode 100644
index 00000000..f8cddee6
--- /dev/null
+++ b/benchmarking/benchmarking_cfg.toml
@@ -0,0 +1,147 @@
+# Benchmarking configuration for torchquad
+# Adjust these parameters to control benchmark execution time and detail
+
+[general]
+device_info = "RTX 4060 Ti 16GB, i5-13400F"
+precision = "float32"
+save_path = "resources"
+log_level = "INFO"
+
+[convergence]
+# Enable/disable specific dimensions
+enable_1d = true
+enable_3d = true
+enable_7d = true
+enable_15d = true
+
+# Reference calculation points (for MC reference)
+reference_points_1d = 256_000_000
+reference_points_3d = 256_000_000
+reference_points_7d = 500_000_000
+reference_points_15d = 333_000_000
+
+# Evaluation points for each dimension
+# Adjust these to control execution time
+[convergence.points_1d]
+simpson = [10_000, 100_000, 1_000_000, 10_000_000, 100_000_000]
+gauss_legendre = [100, 500, 1000, 5000]
+monte_carlo = [10_000, 100_000, 1_000_000, 10_000_000, 100_000_000, 500_000_000]
+vegas = [100_000, 1_000_000, 10_000_000, 100_000_000]
+scipy_grids = [10001, 100_001, 1_000_001]
+
+[convergence.points_3d]
+simpson = [10_000, 100_000, 1_000_000, 10_000_000, 100_000_000]
+gauss_legendre = [100, 500, 1000, 5000]
+monte_carlo = [10_000, 100_000, 1_000_000, 10_000_000, 100_000_000, 250_000_000]
+vegas = [100_000, 1_000_000, 10_000_000, 100_000_000, 250_000_000]
+
+[convergence.points_7d]
+simpson = [10_000, 100_000, 1_000_000, 10_000_000, 100_000_000]
+gauss_legendre = [1000, 5000, 100_00, 50_000, 100_000, 500_000]
+monte_carlo = [
+    10_000,
+    100_000,
+    1_000_000,
+    10_000_000,
+    50_000_000,
+    100_000_000,
+    150_000_000,
+]
+vegas = [100_000, 1_000_000, 10_000_000, 100_000_000, 250_000_000]
+
+[convergence.points_15d]
+simpson = [10_000, 100_000, 1_000_000, 10_000_000, 100_000_000, 250_000_000]
+gauss_legendre = [50000, 100000, 500_000, 1_000_000]
+monte_carlo = [10_000, 100_000, 1_000_000, 10_000_000, 100_000_000, 150_000_000]
+vegas = [100_000, 1_000_000, 10_000_000, 100_000_000, 250_000_000, 300_000_000]
+
+[scaling]
+# Scaling analysis configuration for runtime/feval from 10K to 100M
+# Function evaluations to test
+feval_counts = [
+    50_000,
+    100_000,
+    500_000,
+    1_000_000,
+    5_000_000,
+    10_000_000,
+    50_000_000,
+    100_000_000,
+]
+
+# Gauss-Legendre needs different iteration numbers due to different scaling
+gauss_legendre_fevals_1d = [100, 250, 500, 1000]
+gauss_legendre_fevals_7d = [5000, 10_000, 50_000, 100_000]
+
+# Maximum fevals for different methods
+max_fevals_grid_1d = 100_000_000
+max_fevals_grid_7d = 100_000_000
+max_fevals_mc = 100_000_000
+
+# Number of runs for error estimation
+num_runs = 4
+warmup_runs = 1
+
+[vectorized]
+grid_sizes = [1, 5, 20, 50, 100, 200]
+num_runs = 3
+integration_points = 10001
+
+[scipy]
+# scipy integration tolerances for high dimensions
+nquad_limit_1d = 200
+nquad_limit_3d = 5
+nquad_limit_7d = 2
+nquad_limit_15d = 1
+nquad_epsabs = 1e-2
+nquad_epsrel = 1e-2
+nquad_epsabs_15d = 1e-3
+nquad_epsrel_15d = 1e-3
+
+[framework_comparison]
+# Framework comparison benchmark configuration for 1D MC and Simpson
+enable = true
+dimension = 1
+methods = ["monte_carlo", "simpson"]
+
+# Backends to test (format: "backend_device")
+backends = [
+    "torch_gpu",
+    "torch_cpu",
+    "tensorflow_gpu",
+    "tensorflow_cpu",
+    "numpy_cpu",
+    "jax_cpu",
+]
+
+# Evaluation points for framework comparison (1D only)
+points_monte_carlo = [
+    10000,
+    50000,
+    100000,
+    500000,
+    1000000,
+    1_000_000,
+    10_000_000,
+    50_000_000,
+    100_000_000,
+]
+points_simpson = [
+    10000,
+    50000,
+    100000,
+    500000,
+    1_000_000,
+    10_000_000,
+    50_000_000,
+    100_000_000,
+]
+
+# Number of runs for timing stability
+num_runs = 3
+warmup_runs = 1
+
+[timeouts]
+# Maximum time in seconds for each benchmark component
+max_time_per_method = 300 # 5 minutes per method
+max_time_total = 1800     # 30 minutes total
diff --git a/benchmarking/framework_worker.py b/benchmarking/framework_worker.py
new file mode 100644
index 00000000..dd4eefb4
--- /dev/null
+++ b/benchmarking/framework_worker.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+"""
+Worker script for individual backend framework testing.
+This runs in isolation to avoid TensorFlow device configuration conflicts.
+"""
+
+import sys
+import json
+import time
+import gc
+import warnings
+
+warnings.filterwarnings("ignore")
+
+
+def setup_backend(backend_name: str, device: str):
+    """Setup backend with appropriate device configuration."""
+    if backend_name == "torch":
+        import os
+
+        if device == "cpu":
+            # Force CPU by hiding GPU from PyTorch BEFORE importing
+            os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
+        import torch
+        from torchquad import set_up_backend, enable_cuda
+
+        if device == "gpu" and torch.cuda.is_available():
+            set_up_backend("torch", data_type="float32")
+            enable_cuda(data_type="float32")
+        else:
+            set_up_backend("torch", data_type="float32")
+            torch.set_default_dtype(torch.float32)
+            # Double-check CPU usage for PyTorch
+            if device == "cpu":
+                torch.set_default_device("cpu")
+
+    elif backend_name == "tensorflow":
+        import os
+
+        if device == "cpu":
+            # Force CPU by hiding GPU from TensorFlow BEFORE importing
+            os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+            os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "false"
+
+        import tensorflow as tf
+        from torchquad import set_up_backend
+
+        # Set device policy before torchquad setup
+        if device == "cpu":
+            # Ensure CPU-only execution
+            tf.config.experimental.set_visible_devices([], "GPU")
+
+        set_up_backend("tensorflow", data_type="float32")
+
+    elif backend_name == "jax":
+        import os
+
+        if device == "cpu":
+            os.environ["JAX_PLATFORM_NAME"] = "cpu"
+
+        from torchquad import set_up_backend
+
+        set_up_backend("jax", data_type="float32")
+
+    elif backend_name == "numpy":
+        from torchquad import set_up_backend
+
+        set_up_backend("numpy", data_type="float32")
+    else:
+        raise ValueError(f"Unknown backend: {backend_name}")
+
+
+def get_test_function(backend_name: str):
+    """Get backend-specific test function."""
+    if backend_name == "torch":
+        import torch
+        import numpy as np
+
+        def torch_func(x):
+            step_func = torch.where(x[:, 0] > 0.7, 1.0, 0.0)
+            oscillatory = torch.sin(30 * torch.pi * x[:, 0]) * torch.exp(-10 * (x[:, 0] - 0.3) ** 2)
+            rapid_osc = 0.5 * torch.cos(50 * torch.pi * x[:, 0])
+            return oscillatory + rapid_osc + step_func + 0.1
+
+        return torch_func
+
+    elif backend_name == "tensorflow":
+        import tensorflow as tf
+        import numpy as np
+
+        def tf_func(x):
+            step_func = tf.where(x[:, 0] > 0.7, 1.0, 0.0)
+            oscillatory = tf.sin(30 * tf.constant(np.pi) * x[:, 0]) * tf.exp(
+                -10 * (x[:, 0] - 0.3) ** 2
+            )
+            rapid_osc = 0.5 * tf.cos(50 * tf.constant(np.pi) * x[:, 0])
+            return oscillatory + rapid_osc + step_func + 0.1
+
+        return tf_func
+
+    elif backend_name == "jax":
+        import jax.numpy as jnp
+
+        def jax_func(x):
+            step_func = jnp.where(x[:, 0] > 0.7, 1.0, 0.0)
+            oscillatory = jnp.sin(30 * jnp.pi * x[:, 0]) * jnp.exp(-10 * (x[:, 0] - 0.3) ** 2)
+            rapid_osc = 0.5 * jnp.cos(50 * jnp.pi * x[:, 0])
+            return oscillatory + rapid_osc + step_func + 0.1
+
+        return jax_func
+
+    elif backend_name == "numpy":
+        import numpy as np
+
+        def numpy_func(x):
+            step_func = np.where(x[:, 0] > 0.7, 1.0, 0.0)
+            oscillatory = np.sin(30 * np.pi * x[:, 0]) * np.exp(-10 * (x[:, 0] - 0.3) ** 2)
+            rapid_osc = 0.5 * np.cos(50 * np.pi * x[:, 0])
+            return oscillatory + rapid_osc + step_func + 0.1
+
+        return numpy_func
+    else:
+        raise ValueError(f"Unknown backend: {backend_name}")
+
+
+def create_integrator(method_name: str):
+    """Create integrator for the method."""
+    if method_name == "monte_carlo":
+        from torchquad import MonteCarlo
+
+        return MonteCarlo()
+    elif method_name == "simpson":
+        from torchquad import Simpson
+
+        return Simpson()
+    else:
+        raise ValueError(f"Unknown method: {method_name}")
+
+
+def run_backend_benchmark(
+    backend_name: str,
+    device: str,
+    method_name: str,
+    eval_points: list,
+    reference: float,
+    num_runs: int,
+    warmup_runs: int,
+):
+    """Run benchmark for a specific backend-method combination."""
+    try:
+        # Setup backend
+        setup_backend(backend_name, device)
+
+        # Get test function and integrator
+        test_func = get_test_function(backend_name)
+        integrator = create_integrator(method_name)
+
+        domain = [[0, 1]]
+        results = {"n_points": [], "errors": [], "times": [], "times_mean": [], "times_std": []}
+
+        for n_points in eval_points:
+            run_times = []
+            run_errors = []
+
+            # Perform multiple runs
+            for run in range(warmup_runs + num_runs):
+                is_warmup = run < warmup_runs
+
+                # Clear caches
+                gc.collect()
+
+                start_time = time.perf_counter()
+
+                # Run integration
+                if method_name == "monte_carlo":
+                    result = integrator.integrate(
+                        test_func, dim=1, N=n_points, integration_domain=domain, seed=42 + run
+                    )
+                else:
+                    result = integrator.integrate(
+                        test_func, dim=1, N=n_points, integration_domain=domain
+                    )
+
+                end_time = time.perf_counter()
+                elapsed = end_time - start_time
+
+                # Extract result value (backend-agnostic)
+                if hasattr(result, "item"):
+                    result_value = result.item()
+                elif hasattr(result, "numpy"):
+                    result_value = float(result.numpy())
+                else:
+                    result_value = float(result)
+
+                error = abs(result_value - reference)
+                error = max(error, 1e-16)
+
+                # Only record non-warmup runs
+                if not is_warmup:
+                    run_times.append(elapsed)
+                    run_errors.append(error)
+
+            if run_times:
+                import statistics
+
+                mean_time = statistics.mean(run_times)
+                std_time = statistics.stdev(run_times) if len(run_times) > 1 else 0
+                mean_error = statistics.mean(run_errors)
+
+                results["n_points"].append(n_points)
+                results["errors"].append(mean_error)
+                results["times"].append(mean_time)
+                results["times_mean"].append(mean_time)
+                results["times_std"].append(std_time)
+
+        return {"success": True, "results": results}
+
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+
+
+def main():
+    """Main worker function."""
+    if len(sys.argv) != 2:
+        print("Usage: framework_worker.py <json_config>")
+        sys.exit(1)
+
+    config = json.loads(sys.argv[1])
+
+    result = run_backend_benchmark(
+        config["backend_name"],
+        config["device"],
+        config["method_name"],
+        config["eval_points"],
+        config["reference"],
+        config["num_runs"],
+        config["warmup_runs"],
+    )
+
+    print(json.dumps(result))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarking/modular_benchmark.py b/benchmarking/modular_benchmark.py
new file mode 100644
index 00000000..0edfb0b3
--- /dev/null
+++ b/benchmarking/modular_benchmark.py
@@ -0,0 +1,1091 @@
+#!/usr/bin/env python3
+"""
+Modular torchquad benchmark with configuration support and incremental execution.
+
+This benchmark is designed to:
+1. Load configuration from benchmarking_cfg.toml
+2. Execute benchmarks incrementally by dimension
+3. Provide detailed logging and timing information
+4. Allow interruption and resumption of benchmarks
+5. Support adjustable parameters for different hardware
+
+Usage:
+    python benchmarking/modular_benchmark.py [--config benchmarking_cfg.toml] [--dim 1,3,7,15]
+"""
+
+import numpy as np
+import torch
+import time
+import warnings
+import logging
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+from scipy.integrate import quad, nquad
+
+try:
+    from scipy.integrate import trapezoid as trapz, simpson as simps
+except ImportError:
+    from scipy.integrate import trapz, simps
+from typing import Dict, List, Optional
+import gc
+import json
+
+try:
+    import toml
+except ImportError:
+    # Fallback to basic TOML parsing if toml module not available
+    toml = None
+
+# torchquad imports
+from torchquad import Simpson, GaussLegendre, MonteCarlo, VEGAS, enable_cuda, Boole, Trapezoid
+from torchquad.utils.set_precision import set_precision
+
+
+class ModularBenchmark:
+    """Modular benchmarking suite with configuration support."""
+
+    def __init__(self, config_path: str = "benchmarking/benchmarking_cfg.toml"):
+        self.config = self.load_config(config_path)
+        self.save_path = Path(self.config["general"]["save_path"])
+        self.save_path.mkdir(exist_ok=True)
+        self.setup_logging()
+        self.setup_backend()
+
+        # Results storage
+        self.results = {}
+        self.timing_info = {}
+
+    def load_config(self, config_path: str) -> dict:
+        """Load configuration from TOML file."""
+        if toml is None:
+            print("TOML module not available. Using default configuration.")
+            return self.get_default_config()
+
+        try:
+            with open(config_path, "r") as f:
+                config = toml.load(f)
+            print(f"Loaded configuration from {config_path}")
+            return config
+        except FileNotFoundError:
+            print(f"Configuration file {config_path} not found. Using defaults.")
+            return self.get_default_config()
+        except Exception as e:
+            print(f"Error loading config: {e}. Using defaults.")
+            return self.get_default_config()
+
+    def get_default_config(self) -> dict:
+        """Return default configuration if TOML file not found."""
+        return {
+            "general": {
+                "device_info": "Unknown GPU",
+                "precision": "float32",
+                "save_path": "resources",
+                "log_level": "INFO",
+            },
+            "convergence": {
+                "enable_1d": True,
+                "enable_3d": True,
+                "enable_7d": True,
+                "enable_15d": True,
+                "reference_points_1d": 1000000,
+                "reference_points_3d": 2000000,
+                "reference_points_7d": 1000000,
+                "reference_points_15d": 500000,
+                "points_1d": {
+                    "simpson": [10, 50, 100, 500, 1000, 5000],
+                    "gauss_legendre": [10, 50, 100, 500, 1000, 5000],
+                    "monte_carlo": [100, 1000, 10000, 50000, 100000],
+                    "vegas": [100, 1000, 10000, 50000],
+                    "scipy_grids": [51, 251, 1001],
+                },
+                "points_3d": {
+                    "simpson": [27, 125, 512, 1000, 4096],
+                    "gauss_legendre": [27, 125, 512, 1000, 4096],
+                    "monte_carlo": [1000, 10000, 50000, 100000, 500000],
+                    "vegas": [1000, 10000, 50000, 100000],
+                },
+                "points_7d": {
+                    "simpson": [128, 512, 2187],  # Limited for grid methods
+                    "gauss_legendre": [128, 512, 2187],  # Limited for grid methods
+                    "monte_carlo": [1000, 10000, 50000, 100000, 500000],
+                    "vegas": [1000, 10000, 50000, 100000],
+                },
+                "points_15d": {
+                    "simpson": [],  # Skip for 15D - too expensive
+                    "gauss_legendre": [],  # Skip for 15D - too expensive
+                    "monte_carlo": [10000, 50000, 100000, 500000],
+                    "vegas": [10000, 50000, 100000],
+                },
+            },
+            "scipy": {
+                "nquad_limit_1d": 200,
+                "nquad_limit_3d": 10,  # Reduced from 50 - was too slow
+                "nquad_limit_7d": 2,  # Very limited for 7D
+                "nquad_limit_15d": 2,  # Very limited for 15D
+                "nquad_epsabs_15d": 1e-2,  # Looser tolerance
+                "nquad_epsrel_15d": 1e-2,  # Looser tolerance
+            },
+            "timeouts": {
+                "max_time_per_method": 120,  # 2 minutes per method
+                "max_time_total": 900,  # 15 minutes total
+            },
+        }
+
+    def setup_logging(self):
+        """Configure logging based on config."""
+        log_level = getattr(logging, self.config["general"]["log_level"])
+
+        # Configure logging
+        logging.basicConfig(
+            level=log_level,
+            format="%(asctime)s - %(levelname)s - %(message)s",
+            handlers=[
+                logging.StreamHandler(),
+                logging.FileHandler(self.save_path / "benchmark.log"),
+            ],
+        )
+        self.logger = logging.getLogger(__name__)
+
+    def setup_backend(self):
+        """Configure backend with GPU acceleration."""
+        precision = self.config["general"]["precision"]
+        try:
+            set_precision(precision, backend="torch")
+            enable_cuda(data_type=precision)
+            self.device = "CUDA" if torch.cuda.is_available() else "CPU"
+
+            if torch.cuda.is_available():
+                gpu_name = torch.cuda.get_device_name()
+                gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
+                self.logger.info(f"GPU: {gpu_name}, Memory: {gpu_memory:.1f} GB")
+
+            self.logger.info(f"Using device: {self.device}, Precision: {precision}")
+        except Exception as e:
+            self.logger.warning(f"GPU setup failed, using CPU: {e}")
+            set_precision(precision, backend="torch")
+            self.device = "CPU"
+
+    # Test functions - challenging functions that show convergence differences
+    @staticmethod
+    def challenging_1d(x):
+        """1D: sin(30πx)exp(-10(x-0.3)²) + 0.5cos(50πx) + discontinuous step"""
+        step_func = torch.where(x[:, 0] > 0.7, 1.0, 0.0)
+        oscillatory = torch.sin(30 * torch.pi * x[:, 0]) * torch.exp(-10 * (x[:, 0] - 0.3) ** 2)
+        rapid_osc = 0.5 * torch.cos(50 * torch.pi * x[:, 0])
+        return oscillatory + rapid_osc + step_func + 0.1
+
+    @staticmethod
+    def challenging_1d_np(x):
+        """NumPy version for scipy."""
+        step_func = np.where(x > 0.7, 1.0, 0.0)
+        oscillatory = np.sin(30 * np.pi * x) * np.exp(-10 * (x - 0.3) ** 2)
+        rapid_osc = 0.5 * np.cos(50 * np.pi * x)
+        return oscillatory + rapid_osc + step_func + 0.1
+
+    @staticmethod
+    def challenging_3d(x):
+        """3D: Multiple sharp peaks + oscillatory + sharp ridge"""
+        peak1 = torch.exp(-20 * torch.sum((x - 0.2) ** 2, dim=1))
+        peak2 = torch.exp(-20 * torch.sum((x - 0.8) ** 2, dim=1))
+        peak3 = torch.exp(-20 * torch.sum((x - 0.5) ** 2, dim=1))
+        oscillatory = 0.2 * torch.prod(torch.sin(15 * torch.pi * x), dim=1)
+        ridge = torch.exp(-100 * (x[:, 0] - x[:, 1]) ** 2) * torch.exp(
+            -100 * (x[:, 1] - x[:, 2]) ** 2
+        )
+        return peak1 + peak2 + peak3 + oscillatory + 0.5 * ridge + 0.1
+
+    @staticmethod
+    def challenging_3d_np(*x):
+        """NumPy version for scipy."""
+        x_arr = np.array(x)
+        peak1 = np.exp(-20 * np.sum((x_arr - 0.2) ** 2))
+        peak2 = np.exp(-20 * np.sum((x_arr - 0.8) ** 2))
+        peak3 = np.exp(-20 * np.sum((x_arr - 0.5) ** 2))
+        oscillatory = 0.2 * np.prod(np.sin(15 * np.pi * x_arr))
+        ridge = np.exp(-100 * (x_arr[0] - x_arr[1]) ** 2) * np.exp(
+            -100 * (x_arr[1] - x_arr[2]) ** 2
+        )
+        return peak1 + peak2 + peak3 + oscillatory + 0.5 * ridge + 0.1
+
+    @staticmethod
+    def challenging_7d(x):
+        """7D: Rastrigin-like with Gaussian envelope + sharp peak"""
+        rastrigin = torch.sum(x**2 - 0.3 * torch.cos(8 * torch.pi * x), dim=1)
+        gaussian = 2.0 * torch.exp(-3 * torch.sum((x - 0.5) ** 2, dim=1))
+        sharp_peak = 5.0 * torch.exp(-50 * torch.sum((x - 0.3) ** 2, dim=1))
+        return 0.1 * rastrigin + gaussian + sharp_peak + 0.2
+
+    @staticmethod
+    def challenging_7d_np(*x):
+        """NumPy version for scipy."""
+        x_arr = np.array(x)
+        rastrigin = np.sum(x_arr**2 - 0.3 * np.cos(8 * np.pi * x_arr))
+        gaussian = 2.0 * np.exp(-3 * np.sum((x_arr - 0.5) ** 2))
+        sharp_peak = 5.0 * np.exp(-50 * np.sum((x_arr - 0.3) ** 2))
+        return 0.1 * rastrigin + gaussian + sharp_peak + 0.2
+
+    @staticmethod
+    def challenging_15d(x):
+        """15D: High-dimensional with multiple scales"""
+        linear_sum = torch.sum(torch.sin(torch.pi * x) * x, dim=1)
+        gaussian = torch.exp(-torch.sum((x - 0.5) ** 2, dim=1))
+        quadratic = 0.1 * torch.sum(x**2, dim=1)
+        return linear_sum + 2.0 * gaussian + quadratic + 0.5
+
+    @staticmethod
+    def challenging_15d_np(*x):
+        """NumPy version for scipy."""
+        x_arr = np.array(x)
+        linear_sum = np.sum(np.sin(np.pi * x_arr) * x_arr)
+        gaussian = np.exp(-np.sum((x_arr - 0.5) ** 2))
+        quadratic = 0.1 * np.sum(x_arr**2)
+        return linear_sum + 2.0 * gaussian + quadratic + 0.5
+
+    def get_reference_value(self, func, dim: int, domain: List[List[float]]) -> float:
+        """Get analytical reference value computed using SymPy."""
+        func_name = f"{dim}D"
+        self.logger.info(f"Using analytical reference value for {func_name}...")
+
+        # Analytical reference values computed using SymPy
+        # These have been validated against high-precision VEGAS/Boole computations
+        analytical_references = {
+            1: 4.0422850545e-01,  # Computed analytically using SymPy
+            3: 2.6605308056e-01,  # Validated against Boole's rule
+            7: 8.4401047230e-01,  # Validated against VEGAS
+            15: 6.3714799881e00,  # Validated against VEGAS
+        }
+
+        if dim in analytical_references:
+            reference = analytical_references[dim]
+            self.logger.info(f"Analytical reference for {dim}D: {reference:.8e}")
+            return reference
+        else:
+            # Fallback to numerical computation for unsupported dimensions
+            self.logger.warning(
+                f"No analytical reference for {dim}D, using numerical computation..."
+            )
+            try:
+                if dim <= 3:
+                    ref = Boole()
+                    ref_points = self.config["convergence"].get(f"reference_points_{dim}d", 1000000)
+                    ref_result = ref.integrate(
+                        func, dim=dim, N=ref_points, integration_domain=domain
+                    )
+                    self.logger.info(
+                        f"Boole's rule reference ({ref_points} pts): {ref_result.item():.8e}"
+                    )
+                else:
+                    vegas_ref = VEGAS()
+                    ref_points_key = f"reference_points_{dim}d"
+                    ref_points = self.config["convergence"].get(ref_points_key, 1000000)
+
+                    ref_result = vegas_ref.integrate(
+                        func, dim=dim, N=ref_points, integration_domain=domain, seed=12345
+                    )
+                    self.logger.info(f"VEGAS reference ({ref_points} pts): {ref_result.item():.8e}")
+                return ref_result.item()
+
+            except Exception as e:
+                self.logger.error(f"Reference calculation failed: {e}")
+                return 1.0  # Fallback value
+
+    def benchmark_method(
+        self,
+        method_name: str,
+        integrator,
+        func,
+        dim: int,
+        domain: List[List[float]],
+        n_points: List[int],
+        reference: float,
+        timeout: float = 300,
+    ) -> Dict:
+        """Benchmark a single integration method."""
+        self.logger.info(f"Benchmarking {method_name} for {dim}D...")
+
+        method_start = time.perf_counter()
+        errors = []
+        times = []
+        actual_n = []
+
+        for i, n in enumerate(n_points):
+            if time.perf_counter() - method_start > timeout:
+                self.logger.warning(f"{method_name} timeout reached after {timeout}s")
+                break
+
+            try:
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                    gc.collect()
+
+                start_time = time.perf_counter()
+
+                # Method-specific integration calls
+                if method_name == "vegas":
+                    result = integrator.integrate(
+                        func,
+                        dim=dim,
+                        N=n,
+                        integration_domain=domain,
+                        max_iterations=5,
+                        use_warmup=True,
+                        seed=42,
+                    )
+                elif method_name == "monte_carlo":
+                    result = integrator.integrate(
+                        func, dim=dim, N=n, integration_domain=domain, seed=42
+                    )
+                else:
+                    result = integrator.integrate(func, dim=dim, N=n, integration_domain=domain)
+
+                end_time = time.perf_counter()
+
+                error = abs(result.item() - reference)
+                error = max(error, 1e-16)  # Minimum plottable error
+
+                errors.append(error)
+                times.append(end_time - start_time)
+                actual_n.append(n)
+                torch.cuda.empty_cache()
+
+                # Log progress
+                if i % 2 == 0 or n >= 100000:
+                    self.logger.info(
+                        f"  N={n:>8}: error={error:.2e}, time={end_time - start_time:.4f}s"
+                    )
+
+            except Exception as e:
+                self.logger.warning(f"  N={n}: Failed - {str(e)[:50]}...")
+                if len(actual_n) > 3:  # Continue if we have some results
+                    break
+
+        total_time = time.perf_counter() - method_start
+        self.logger.info(f"{method_name} completed in {total_time:.2f}s")
+        return {"n_points": actual_n, "errors": errors, "times": times, "total_time": total_time}
+
+    def benchmark_scipy_methods(self, func_np, dim: int, reference: float) -> Dict:
+        """Benchmark scipy integration methods."""
+        self.logger.info(f"Benchmarking scipy methods for {dim}D...")
+
+        scipy_results = {}
+
+        # scipy.integrate.nquad
+        try:
+            start_time = time.perf_counter()
+
+            if dim == 1:
+                limit = self.config["scipy"]["nquad_limit_1d"]
+                scipy_result, _ = quad(func_np, 0, 1, limit=limit)
+            elif dim <= 7:
+                limit_key = f"nquad_limit_{dim}d"
+                limit = self.config["scipy"].get(limit_key, 20)
+                epsabs = self.config["scipy"]["nquad_epsabs"]
+                epsrel = self.config["scipy"]["nquad_epsrel"]
+                scipy_result, _ = nquad(
+                    func_np,
+                    [(0, 1)] * dim,
+                    opts={"limit": limit, "epsabs": epsabs, "epsrel": epsrel},
+                )
+            elif dim == 15:
+                # Attempt 15D with loose tolerances
+                epsabs = self.config["scipy"]["nquad_epsabs_15d"]
+                epsrel = self.config["scipy"]["nquad_epsrel_15d"]
+                limit = self.config["scipy"]["nquad_limit_15d"]
+
+                scipy_result, _ = nquad(
+                    func_np,
+                    [(0, 1)] * dim,
+                    opts={"limit": limit, "epsabs": epsabs, "epsrel": epsrel},
+                )
+                self.logger.info("15D nquad succeeded with loose tolerances")
+
+            end_time = time.perf_counter()
+            scipy_error = max(abs(scipy_result - reference), 1e-16)
+            scipy_results["nquad"] = {
+                "result": scipy_result,
+                "error": scipy_error,
+                "time": end_time - start_time,
+            }
+            self.logger.info(
+                f"SciPy nquad: error={scipy_error:.2e}, time={end_time - start_time:.4f}s"
+            )
+
+        except Exception as e:
+            self.logger.warning(f"SciPy nquad failed: {e}")
+
+        # scipy trapz and simps for 1D only
+        if dim == 1:
+            grid_sizes = self.config["convergence"]["points_1d"].get("scipy_grids", [51, 251, 1001])
+
+            for grid_n in grid_sizes:
+                x_grid = np.linspace(0, 1, grid_n)
+                y_values = func_np(x_grid)
+
+                # Trapezoid
+                try:
+                    start_time = time.perf_counter()
+                    result = trapz(y_values, x_grid)
+                    end_time = time.perf_counter()
+
+                    error = max(abs(result - reference), 1e-16)
+                    scipy_results[f"trapz_{grid_n}"] = {
+                        "result": result,
+                        "error": error,
+                        "time": end_time - start_time,
+                        "n_points": grid_n,
+                    }
+
+                except Exception as e:
+                    self.logger.warning(f"SciPy trapz (N={grid_n}): Failed - {e}")
+
+                # Simpson
+                if simps is not None:
+                    try:
+                        start_time = time.perf_counter()
+                        result = simps(y_values, x=x_grid)
+                        end_time = time.perf_counter()
+
+                        error = max(abs(result - reference), 1e-16)
+                        scipy_results[f"simps_{grid_n}"] = {
+                            "result": result,
+                            "error": error,
+                            "time": end_time - start_time,
+                            "n_points": grid_n,
+                        }
+
+                    except Exception as e:
+                        self.logger.warning(f"SciPy simps (N={grid_n}): Failed - {e}")
+
+        return scipy_results
+
+    def benchmark_convergence_dimension(self, dim: int) -> Optional[Dict]:
+        """Benchmark convergence for a specific dimension."""
+        if not self.config["convergence"].get(f"enable_{dim}d", False):
+            self.logger.info(f"Skipping {dim}D convergence (disabled in config)")
+            return None
+
+        self.logger.info("=" * 60)
+        self.logger.info(f"BENCHMARKING {dim}D CONVERGENCE")
+        self.logger.info("=" * 60)
+
+        # Get function and configuration
+        func_map = {
+            1: (
+                self.challenging_1d,
+                self.challenging_1d_np,
+                "Discontinuous oscillatory",
+                "sin(30πx)exp(-10(x-0.3)²) + 0.5cos(50πx) + step",
+            ),
+            3: (
+                self.challenging_3d,
+                self.challenging_3d_np,
+                "Multi-peak with ridge",
+                "Multiple peaks + sin(15π∏xi) + sharp ridge",
+            ),
+            7: (
+                self.challenging_7d,
+                self.challenging_7d_np,
+                "Rastrigin-Gaussian hybrid",
+                "Rastrigin oscillatory + Gaussian envelope + sharp peak",
+            ),
+            15: (
+                self.challenging_15d,
+                self.challenging_15d_np,
+                "High-dimensional mixed",
+                "sin(πx)x + Gaussian + quadratic",
+            ),
+        }
+
+        if dim not in func_map:
+            self.logger.error(f"No function defined for {dim}D")
+            return None
+
+        func, func_np, name, description = func_map[dim]
+        domain = [[0, 1]] * dim
+
+        # Calculate reference value
+        reference = self.get_reference_value(func, dim, domain)
+
+        # Get evaluation points from config
+        points_config = self.config["convergence"].get(f"points_{dim}d", {})
+
+        # Benchmark torchquad methods
+        integrators = {
+            "simpson": Simpson(),
+            "gauss_legendre": GaussLegendre(),
+            "monte_carlo": MonteCarlo(),
+            "vegas": VEGAS(),
+        }
+
+        results = {"dim": dim, "function": name, "description": description, "reference": reference}
+
+        timeout = self.config.get("timeouts", {}).get("max_time_per_method", 300)
+
+        for method_name, integrator in integrators.items():
+            n_points = points_config.get(method_name, [])
+            if not n_points:
+                self.logger.info(f"No points configured for {method_name}, skipping")
+                continue
+
+            results[method_name] = self.benchmark_method(
+                method_name, integrator, func, dim, domain, n_points, reference, timeout
+            )
+
+        # Benchmark scipy methods
+        if func_np and dim < 7:
+            results["scipy"] = self.benchmark_scipy_methods(func_np, dim, reference)
+
+        # Store timing information
+        self.timing_info[f"{dim}d"] = {
+            method: results[method].get("total_time", 0)
+            for method in ["simpson", "gauss_legendre", "monte_carlo", "vegas"]
+            if method in results
+        }
+
+        return results
+
+    def run_convergence_benchmarks(self, dimensions: List[int] = None) -> Dict:
+        """Run convergence benchmarks for specified dimensions."""
+        if dimensions is None:
+            dimensions = [1, 3, 7, 15]
+
+        self.logger.info(f"Starting convergence benchmarks for dimensions: {dimensions}")
+        total_start = time.perf_counter()
+
+        convergence_results = {}
+
+        for dim in dimensions:
+            dim_start = time.perf_counter()
+
+            result = self.benchmark_convergence_dimension(dim)
+            if result is not None:
+                convergence_results[f"{dim}d"] = result
+
+            dim_elapsed = time.perf_counter() - dim_start
+            self.logger.info(f"{dim}D benchmark completed in {dim_elapsed:.2f}s")
+
+            # Check total timeout
+            total_elapsed = time.perf_counter() - total_start
+            max_total = self.config.get("timeouts", {}).get("max_time_total", 1800)
+            if total_elapsed > max_total:
+                self.logger.warning(f"Total timeout ({max_total}s) reached")
+                break
+
+        total_elapsed = time.perf_counter() - total_start
+        self.logger.info(f"All convergence benchmarks completed in {total_elapsed:.2f}s")
+
+        # Save intermediate results
+        self.save_results(convergence_results, "convergence_results.json")
+
+        return convergence_results
+
+    def save_results(self, results: Dict, filename: str):
+        """Save results to JSON file."""
+        try:
+            # Convert numpy/torch values to native Python types for JSON serialization
+            def convert_for_json(obj):
+                if isinstance(obj, (np.ndarray, torch.Tensor)):
+                    return obj.tolist()
+                elif isinstance(obj, (np.integer, np.floating)):
+                    return obj.item()
+                elif isinstance(obj, dict):
+                    return {k: convert_for_json(v) for k, v in obj.items()}
+                elif isinstance(obj, list):
+                    return [convert_for_json(item) for item in obj]
+                else:
+                    return obj
+
+            results_json = convert_for_json(results)
+
+            with open(self.save_path / filename, "w") as f:
+                json.dump(results_json, f, indent=2)
+
+            self.logger.info(f"Results saved to {self.save_path / filename}")
+
+        except Exception as e:
+            self.logger.error(f"Failed to save results: {e}")
+
+    def load_results(self, filename: str) -> Dict:
+        """Load results from JSON file."""
+        try:
+            with open(self.save_path / filename, "r") as f:
+                results = json.load(f)
+            self.logger.info(f"Results loaded from {self.save_path / filename}")
+            return results
+        except FileNotFoundError:
+            self.logger.info(f"No existing results file found: {filename}")
+            return {}
+        except Exception as e:
+            self.logger.error(f"Failed to load results: {e}")
+            return {}
+
+    def benchmark_framework_comparison(self) -> Dict:
+        """Framework comparison benchmark for 1D Monte Carlo and Simpson methods."""
+        framework_config = self.config.get("framework_comparison", {})
+
+        if not framework_config.get("enable", False):
+            self.logger.info("Framework comparison disabled in config")
+            return {}
+
+        self.logger.info("=" * 60)
+        self.logger.info("FRAMEWORK COMPARISON BENCHMARK")
+        self.logger.info("=" * 60)
+
+        # Configuration
+        methods = framework_config.get("methods", ["monte_carlo", "simpson"])
+        backends = framework_config.get("backends", ["torch_gpu", "torch_cpu"])
+        num_runs = framework_config.get("num_runs", 3)
+        warmup_runs = framework_config.get("warmup_runs", 1)
+
+        # Use the 1D test function
+        # func = self.challenging_1d
+        # domain = [[0, 1]]
+        reference = 4.0422850545e-01  # 1D analytical reference
+
+        results = {
+            "methods": methods,
+            "backends": backends,
+            "reference": reference,
+            "function": "Discontinuous oscillatory 1D",
+            "results": {},
+        }
+
+        for method_name in methods:
+            if method_name not in results["results"]:
+                results["results"][method_name] = {}
+
+            # Get evaluation points for this method
+            points_key = f"points_{method_name}"
+            eval_points = framework_config.get(points_key, [1000, 10000, 100000])
+
+            self.logger.info(f"Benchmarking {method_name} across frameworks...")
+
+            for backend_spec in backends:
+                self.logger.info(f"  Testing {method_name} with {backend_spec}...")
+
+                try:
+                    # Parse backend specification
+                    if "_" in backend_spec:
+                        backend_name, device = backend_spec.split("_", 1)
+                    else:
+                        backend_name, device = backend_spec, "cpu"
+
+                    # Skip unavailable backends gracefully
+                    if not self._is_backend_available(backend_name):
+                        self.logger.warning(f"  Backend {backend_name} not available, skipping...")
+                        continue
+
+                    # Benchmark this method-backend combination using subprocess isolation
+                    backend_results = self._benchmark_method_backend_subprocess(
+                        backend_name,
+                        device,
+                        method_name,
+                        eval_points,
+                        reference,
+                        num_runs,
+                        warmup_runs,
+                    )
+
+                    if backend_results:
+                        results["results"][method_name][backend_spec] = backend_results
+
+                except Exception as e:
+                    self.logger.error(
+                        f"  Failed to benchmark {method_name} with {backend_spec}: {e}"
+                    )
+                    continue
+
+        return results
+
+    def _benchmark_method_backend_subprocess(
+        self,
+        backend_name: str,
+        device: str,
+        method_name: str,
+        eval_points: list,
+        reference: float,
+        num_runs: int,
+        warmup_runs: int,
+    ):
+        """Benchmark method-backend combination using subprocess isolation."""
+        self.logger.info(f"    Using subprocess isolation for {backend_name}_{device}")
+
+        # Prepare configuration for worker process
+        config = {
+            "backend_name": backend_name,
+            "device": device,
+            "method_name": method_name,
+            "eval_points": eval_points,
+            "reference": reference,
+            "num_runs": num_runs,
+            "warmup_runs": warmup_runs,
+        }
+
+        # Path to worker script
+        worker_script = Path(__file__).parent / "framework_worker.py"
+
+        try:
+            # Run worker in subprocess
+            result = subprocess.run(
+                [sys.executable, str(worker_script), json.dumps(config)],
+                capture_output=True,
+                text=True,
+                timeout=300,  # 5 minute timeout per backend
+                cwd=Path(__file__).parent.parent,  # Run from torchquad root
+            )
+
+            if result.returncode != 0:
+                self.logger.error(f"    Worker process failed: {result.stderr}")
+                return None
+
+            # Parse result
+            worker_result = json.loads(result.stdout.strip())
+
+            if worker_result.get("success"):
+                return worker_result["results"]
+            else:
+                self.logger.error(
+                    f"    Worker failed: {worker_result.get('error', 'Unknown error')}"
+                )
+                return None
+
+        except subprocess.TimeoutExpired:
+            self.logger.error(f"    Worker process timed out for {backend_name}_{device}")
+            return None
+        except Exception as e:
+            self.logger.error(f"    Subprocess error: {e}")
+            return None
+
+    def _is_backend_available(self, backend_name: str) -> bool:
+        """Check if a backend is available."""
+        try:
+            if backend_name == "torch":
+                torch  # noqa: F401
+                return True
+            elif backend_name == "tensorflow":
+                import tensorflow as tf  # noqa: F401
+
+                return True
+            elif backend_name == "jax":
+                import jax  # noqa: F401
+
+                return True
+            elif backend_name == "numpy":
+                import numpy  # noqa: F401
+
+                return True
+            else:
+                return False
+        except ImportError:
+            return False
+
+    def benchmark_scaling_analysis(self) -> Dict:
+        """Runtime/feval scaling analysis from 10K to 100M function evaluations."""
+        self.logger.info("Runtime/feval scaling analysis...")
+
+        def test_integrand(x):
+            """Simple quadratic function for scaling tests."""
+            return torch.sum(x**2, dim=1)
+
+        # Load configuration
+        scaling_config = self.config.get("scaling", {})
+        feval_counts = scaling_config.get(
+            "feval_counts",
+            [10000, 50000, 100000, 500000, 1000000, 5000000, 10000000, 50000000, 100000000],
+        )
+
+        # Gauss-Legendre specific fevals
+        gauss_legendre_fevals_1d = scaling_config.get("gauss_legendre_fevals_1d", feval_counts)
+        gauss_legendre_fevals_7d = scaling_config.get("gauss_legendre_fevals_7d", feval_counts)
+
+        # Max fevals from config
+        max_fevals_grid_1d = scaling_config.get("max_fevals_grid_1d", 10000000)
+        max_fevals_grid_7d = scaling_config.get("max_fevals_grid_7d", 100000)
+        max_fevals_mc = scaling_config.get("max_fevals_mc", 100000000)
+
+        # Methods to test
+        methods = {
+            "trapezoid": Trapezoid(),
+            "simpson": Simpson(),
+            "boole": Boole(),
+            "gauss_legendre": GaussLegendre(),
+            "monte_carlo": MonteCarlo(),
+            "vegas": VEGAS(),
+        }
+
+        # Test in 1D and 7D
+        dimensions = [1, 7]
+        num_runs = scaling_config.get("num_runs", 3)
+        warmup_runs = scaling_config.get("warmup_runs", 1)
+
+        results = {}
+
+        for dim in dimensions:
+            self.logger.info(f"\nScaling analysis for {dim}D:")
+            results[f"{dim}d"] = {}
+            domain = [[0, 1]] * dim
+
+            for method_name, integrator in methods.items():
+                self.logger.info(f"  Method: {method_name}")
+                method_results = {
+                    "fevals": [],
+                    "times_mean": [],
+                    "times_std": [],
+                    "times_per_eval_mean": [],
+                    "times_per_eval_std": [],
+                }
+
+                # Determine which feval counts to use for this method
+                if method_name == "gauss_legendre":
+                    # Use special Gauss-Legendre fevals
+                    if dim == 1:
+                        method_feval_counts = gauss_legendre_fevals_1d
+                        max_fevals = max_fevals_grid_1d
+                    else:
+                        method_feval_counts = gauss_legendre_fevals_7d
+                        max_fevals = max_fevals_grid_7d
+                elif method_name in ["trapezoid", "simpson", "boole"]:
+                    # Other grid methods use standard fevals with limits
+                    method_feval_counts = feval_counts
+                    if dim == 1:
+                        max_fevals = max_fevals_grid_1d
+                    else:
+                        max_fevals = max_fevals_grid_7d
+                else:
+                    # Monte Carlo methods
+                    method_feval_counts = feval_counts
+                    max_fevals = max_fevals_mc
+
+                for fevals in method_feval_counts:
+                    if fevals > max_fevals:
+                        continue
+
+                    self.logger.info(f"    N={fevals}: ")
+
+                    run_times = []
+
+                    # Run warmup + actual runs
+                    total_runs = warmup_runs + num_runs
+                    for run in range(total_runs):
+                        is_warmup = run < warmup_runs
+                        run_type = "warmup" if is_warmup else f"run {run - warmup_runs + 1}"
+
+                        try:
+                            # Clear GPU cache if available
+                            if torch.cuda.is_available():
+                                torch.cuda.empty_cache()
+                                gc.collect()
+
+                            start_time = time.perf_counter()
+
+                            if method_name == "vegas":
+                                integrator.integrate(
+                                    test_integrand,
+                                    dim=dim,
+                                    N=fevals,
+                                    integration_domain=domain,
+                                    max_iterations=5,
+                                    use_warmup=True,
+                                    seed=42 + run,
+                                )
+                            elif method_name == "monte_carlo":
+                                integrator.integrate(
+                                    test_integrand,
+                                    dim=dim,
+                                    N=fevals,
+                                    integration_domain=domain,
+                                    seed=42 + run,
+                                )
+                            else:
+                                integrator.integrate(
+                                    test_integrand, dim=dim, N=fevals, integration_domain=domain
+                                )
+
+                            elapsed = time.perf_counter() - start_time
+
+                            # Only record times after warmup runs
+                            if run >= warmup_runs:
+                                run_times.append(elapsed)
+                                self.logger.debug(f"      {run_type}: {elapsed:.4f}s")
+                            else:
+                                self.logger.debug(f"      {run_type}: {elapsed:.4f}s (discarded)")
+
+                        except Exception as e:
+                            self.logger.warning(f"      {run_type} failed: {e}")
+                            continue
+
+                    if run_times:
+                        import statistics
+
+                        mean_time = statistics.mean(run_times)
+                        std_time = statistics.stdev(run_times) if len(run_times) > 1 else 0
+                        mean_time_per_eval = mean_time / fevals
+                        std_time_per_eval = std_time / fevals
+
+                        method_results["fevals"].append(fevals)
+                        method_results["times_mean"].append(mean_time)
+                        method_results["times_std"].append(std_time)
+                        method_results["times_per_eval_mean"].append(mean_time_per_eval)
+                        method_results["times_per_eval_std"].append(std_time_per_eval)
+
+                        self.logger.info(
+                            f"time={mean_time:.4f}±{std_time:.4f}s, "
+                            f"time/eval={mean_time_per_eval:.2e}±{std_time_per_eval:.2e}s"
+                        )
+                    else:
+                        self.logger.warning("All runs failed")
+                        break
+
+                results[f"{dim}d"][method_name] = method_results
+
+        return results
+
+    def benchmark_vectorized_analysis(self) -> Dict:
+        """Vectorized integrand test with configurable scaling."""
+        self.logger.info("Vectorized integrands analysis...")
+
+        integrator = Simpson()
+        domain = [[0, 1]]
+        N = self.config.get("vectorized", {}).get("integration_points", 1001)
+
+        grid_sizes = self.config.get("vectorized", {}).get("grid_sizes", [5, 20, 50, 100, 200])
+        num_runs = self.config.get("vectorized", {}).get("num_runs", 2)
+        results = {"grid_sizes": [], "loop_times": [], "vectorized_times": [], "speedups": []}
+
+        for grid_size in grid_sizes:
+            self.logger.info(f"  Grid size {grid_size}:")
+
+            params = torch.linspace(1, 5, grid_size)
+
+            try:
+                # Method 1: Loop-based (multiple runs for stability)
+                loop_times = []
+                for run in range(num_runs):
+                    start_time = time.perf_counter()
+                    loop_results = []
+                    for param in params:
+
+                        def single_integrand(x):
+                            return torch.sqrt(torch.cos(torch.sin(param * x[:, 0])))
+
+                        result = integrator.integrate(
+                            single_integrand, dim=1, N=N, integration_domain=domain
+                        )
+                        loop_results.append(result.item())
+                    loop_times.append(time.perf_counter() - start_time)
+
+                loop_time = sum(loop_times) / len(loop_times)
+
+                # Method 2: Vectorized (multiple runs for stability)
+                vectorized_times = []
+                for run in range(num_runs):
+                    start_time = time.perf_counter()
+
+                    def vectorized_integrand(x):
+                        x_vals = x[:, 0]
+                        return torch.sqrt(torch.cos(torch.sin(torch.outer(x_vals, params))))
+
+                    integrator.integrate(
+                        vectorized_integrand, dim=1, N=N, integration_domain=domain
+                    )
+                    vectorized_times.append(time.perf_counter() - start_time)
+
+                vectorized_time = sum(vectorized_times) / len(vectorized_times)
+                speedup = loop_time / vectorized_time
+
+                results["grid_sizes"].append(grid_size)
+                results["loop_times"].append(loop_time)
+                results["vectorized_times"].append(vectorized_time)
+                results["speedups"].append(speedup)
+
+                self.logger.info(
+                    f"    Loop: {loop_time:.4f}s, Vectorized: {vectorized_time:.4f}s, Speedup: {speedup:.2f}x"
+                )
+
+            except Exception as e:
+                self.logger.warning(f"    Failed: {e}")
+                break
+
+        return results
+
+
+def main():
+    """Main execution function."""
+    parser = argparse.ArgumentParser(description="Run modular torchquad benchmarks")
+    parser.add_argument(
+        "--config", default="benchmarking/benchmarking_cfg.toml", help="Path to configuration file"
+    )
+    parser.add_argument(
+        "--dimensions", default="1,3,7,15", help="Comma-separated list of dimensions to benchmark"
+    )
+    parser.add_argument(
+        "--convergence-only", action="store_true", help="Run only convergence benchmarks"
+    )
+    parser.add_argument("--scaling-only", action="store_true", help="Run only scaling benchmarks")
+    parser.add_argument(
+        "--framework-only", action="store_true", help="Run only framework comparison"
+    )
+
+    args = parser.parse_args()
+
+    # Parse dimensions
+    try:
+        dimensions = [int(d.strip()) for d in args.dimensions.split(",")]
+    except ValueError:
+        print("Invalid dimensions format. Use comma-separated integers like '1,3,7'")
+        return
+
+    # Initialize benchmark
+    warnings.filterwarnings("ignore")
+    benchmark = ModularBenchmark(args.config)
+
+    scaling_results = None
+    vectorized_results = None
+    framework_results = None
+
+    # Handle mutually exclusive flags
+    exclusive_flags = [args.convergence_only, args.scaling_only, args.framework_only]
+    if sum(exclusive_flags) > 1:
+        print("Error: Cannot use multiple exclusive flags together")
+        return
+
+    if args.framework_only:
+        # Run only framework comparison
+        benchmark.logger.info("Running framework comparison only...")
+        framework_results = benchmark.benchmark_framework_comparison()
+        benchmark.save_results(framework_results, "framework_results.json")
+    elif args.scaling_only:
+        # Run only scaling benchmarks
+        benchmark.logger.info("Running scaling analysis only...")
+        scaling_results = benchmark.benchmark_scaling_analysis()
+        benchmark.save_results(scaling_results, "scaling_results.json")
+    elif args.convergence_only:
+        # Run only convergence benchmarks
+        benchmark.run_convergence_benchmarks(dimensions)
+    else:
+        # Run all benchmarks
+        # Run convergence benchmarks
+        benchmark.run_convergence_benchmarks(dimensions)
+
+        # Run scaling benchmarks
+        benchmark.logger.info("Running scaling analysis...")
+        scaling_results = benchmark.benchmark_scaling_analysis()
+        benchmark.save_results(scaling_results, "scaling_results.json")
+
+        # Run vectorized benchmarks
+        benchmark.logger.info("Running vectorized analysis...")
+        vectorized_results = benchmark.benchmark_vectorized_analysis()
+        benchmark.save_results(vectorized_results, "vectorized_results.json")
+
+        # Run framework comparison
+        benchmark.logger.info("Running framework comparison...")
+        framework_results = benchmark.benchmark_framework_comparison()
+        benchmark.save_results(framework_results, "framework_results.json")
+
+    benchmark.logger.info("Benchmark session completed successfully!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarking/plot_results.py b/benchmarking/plot_results.py
new file mode 100644
index 00000000..3068ce78
--- /dev/null
+++ b/benchmarking/plot_results.py
@@ -0,0 +1,803 @@
+#!/usr/bin/env python3
+"""
+Plotting module for torchquad benchmark results.
+
+This module creates the enhanced plots addressing all identified issues:
+1. Convergence plots with challenging functions and complete scipy coverage
+2. Runtime vs error plots with all methods visible
+3. Scaling analysis with error bars (when scaling data available)
+4. Vectorized speedup plots with log-scale axes
+"""
+
+import matplotlib.pyplot as plt
+from pathlib import Path
+import json
+from typing import Dict
+
+
+class ResultsPlotter:
+    """Create comprehensive plots from benchmark results."""
+
+    def __init__(self, results_dir: str = "resources"):
+        self.results_dir = Path(results_dir)
+
+        # Enhanced color palette and markers
+        self.colors = {
+            "simpson": "#0066CC",
+            "gauss_legendre": "#00AA00",
+            "monte_carlo": "#FF3333",
+            "vegas": "#FF8C00",
+            "scipy_nquad": "#000000",
+            "scipy_trapz": "#808080",
+            "scipy_simps": "#FF1493",
+        }
+
+        self.markers = {
+            "simpson": "o",
+            "gauss_legendre": "s",
+            "monte_carlo": "D",
+            "vegas": "^",
+            "scipy_nquad": "X",
+            "scipy_trapz": "+",
+            "scipy_simps": "*",
+        }
+
+    def load_results(self, filename: str) -> Dict:
+        """Load results from JSON file."""
+        try:
+            with open(self.results_dir / filename, "r") as f:
+                results = json.load(f)
+            print(f"Loaded results from {self.results_dir / filename}")
+            return results
+        except FileNotFoundError:
+            print(f"Results file not found: {filename}")
+            return {}
+        except Exception as e:
+            print(f"Error loading results: {e}")
+            return {}
+
+    def create_convergence_plots(self, convergence_results: Dict, device_info: str = "Unknown GPU"):
+        """Create enhanced convergence analysis plots."""
+        print("Creating convergence plots...")
+
+        # Determine subplot layout based on available dimensions
+        available_dims = [key for key in convergence_results.keys() if key.endswith("d")]
+        n_plots = len(available_dims)
+
+        if n_plots == 0:
+            print("No convergence results to plot")
+            return
+
+        # Create subplot layout
+        if n_plots == 1:
+            fig, axes = plt.subplots(1, 1, figsize=(12, 8))
+            axes = [axes]
+        elif n_plots == 2:
+            fig, axes = plt.subplots(1, 2, figsize=(18, 8))
+        elif n_plots <= 4:
+            fig, axes = plt.subplots(2, 2, figsize=(18, 14))
+            axes = axes.flatten()
+        else:
+            # More than 4 dimensions - use larger grid
+            n_rows = (n_plots + 2) // 3
+            fig, axes = plt.subplots(n_rows, 3, figsize=(24, 6 * n_rows))
+            axes = axes.flatten()
+
+        plot_idx = 0
+        for case_key in sorted(available_dims, key=lambda x: int(x[:-1])):  # Sort by dimension
+            case_data = convergence_results[case_key]
+
+            if plot_idx >= len(axes):
+                break
+
+            ax = axes[plot_idx]
+            dim = case_data["dim"]
+            func_name = case_data["function"]
+            func_desc = case_data["description"]
+
+            # Plot torchquad methods
+            for method in ["simpson", "gauss_legendre", "monte_carlo", "vegas"]:
+                if method in case_data and case_data[method]["errors"]:
+                    n_pts = case_data[method]["n_points"]
+                    errors = case_data[method]["errors"]
+
+                    valid_data = [(n, e) for n, e in zip(n_pts, errors) if e > 0]
+
+                    if valid_data:
+                        valid_n, valid_errors = zip(*valid_data)
+
+                        label = f"{method.replace('_', ' ').title()} (CUDA)"
+                        ax.loglog(
+                            valid_n,
+                            valid_errors,
+                            color=self.colors[method],
+                            marker=self.markers[method],
+                            linewidth=2.5,
+                            markersize=8,
+                            label=label,
+                            alpha=0.85,
+                        )
+
+            # Plot scipy methods
+            if "scipy" in case_data:
+                scipy_data = case_data["scipy"]
+
+                # Plot nquad
+                if "nquad" in scipy_data and scipy_data["nquad"]["error"] > 0:
+                    representative_n = 10**6 if dim <= 3 else 10**5 if dim <= 7 else 10**4
+                    ax.loglog(
+                        [representative_n],
+                        [scipy_data["nquad"]["error"]],
+                        color=self.colors["scipy_nquad"],
+                        marker=self.markers["scipy_nquad"],
+                        markersize=14,
+                        label="SciPy nquad (CPU)",
+                        linestyle="none",
+                        alpha=0.9,
+                    )
+
+                # Plot trapz and simps (1D only)
+                if dim == 1:
+                    # Trapz
+                    trapz_points = []
+                    for key, data in scipy_data.items():
+                        if key.startswith("trapz_") and data["error"] > 0:
+                            trapz_points.append((data["n_points"], data["error"]))
+                    if trapz_points:
+                        trapz_n, trapz_errors = zip(*sorted(trapz_points))
+                        ax.loglog(
+                            trapz_n,
+                            trapz_errors,
+                            color=self.colors["scipy_trapz"],
+                            marker=self.markers["scipy_trapz"],
+                            linewidth=2.5,
+                            markersize=10,
+                            label="SciPy trapz (CPU)",
+                            alpha=0.8,
+                        )
+
+                    # Simps
+                    simps_points = []
+                    for key, data in scipy_data.items():
+                        if key.startswith("simps_") and data["error"] > 0:
+                            simps_points.append((data["n_points"], data["error"]))
+                    if simps_points:
+                        simps_n, simps_errors = zip(*sorted(simps_points))
+                        ax.loglog(
+                            simps_n,
+                            simps_errors,
+                            color=self.colors["scipy_simps"],
+                            marker=self.markers["scipy_simps"],
+                            linewidth=2.5,
+                            markersize=10,
+                            label="SciPy simps (CPU)",
+                            alpha=0.8,
+                        )
+
+            ax.set_xlabel("Number of Function Evaluations", fontsize=13)
+            ax.set_ylabel("Absolute Error", fontsize=13)
+            ax.set_title(f"{dim}D: {func_name} \n Function: {func_desc}", fontsize=12)
+            ax.grid(True, alpha=0.3)
+            ax.legend(fontsize=10, loc="best")
+
+            # Enhanced x-axis ticks for better low-count visibility
+            if dim == 1:
+                ax.set_xticks([10, 100, 1000, 10000, 100000, 1000000])
+            elif dim == 3:
+                ax.set_xticks([10, 100, 1000, 10000, 100000, 1000000])
+            else:
+                ax.set_xticks([1000, 10000, 100000, 1000000, 10000000])
+
+            plot_idx += 1
+
+        # Remove unused subplots
+        for i in range(plot_idx, len(axes)):
+            fig.delaxes(axes[i])
+
+        plt.suptitle(
+            f"Enhanced Convergence Analysis - Challenging Functions \n "
+            f"Hardware: {device_info}, Precision: float32",
+            fontsize=15,
+        )
+        plt.tight_layout()
+        plt.savefig(self.results_dir / "torchquad_convergence.png", dpi=300, bbox_inches="tight")
+        plt.close()
+        print(f"Convergence plot saved to {self.results_dir / 'torchquad_convergence.png'}")
+
+    def create_runtime_vs_error_plots(
+        self, convergence_results: Dict, device_info: str = "Unknown GPU"
+    ):
+        """Create enhanced runtime vs error plots."""
+        print("Creating runtime vs error plots...")
+
+        # Determine subplot layout based on available dimensions
+        available_dims = [key for key in convergence_results.keys() if key.endswith("d")]
+        n_plots = len(available_dims)
+
+        if n_plots == 0:
+            print("No convergence results to plot")
+            return
+
+        # Create subplot layout
+        if n_plots == 1:
+            fig, axes = plt.subplots(1, 1, figsize=(12, 8))
+            axes = [axes]
+        elif n_plots == 2:
+            fig, axes = plt.subplots(1, 2, figsize=(18, 8))
+        elif n_plots <= 4:
+            fig, axes = plt.subplots(2, 2, figsize=(18, 14))
+            axes = axes.flatten()
+        else:
+            # More than 4 dimensions - use larger grid
+            n_rows = (n_plots + 2) // 3
+            fig, axes = plt.subplots(n_rows, 3, figsize=(24, 6 * n_rows))
+            axes = axes.flatten()
+
+        plot_idx = 0
+        for case_key in sorted(available_dims, key=lambda x: int(x[:-1])):
+            case_data = convergence_results[case_key]
+
+            if plot_idx >= len(axes):
+                break
+
+            ax = axes[plot_idx]
+            dim = case_data["dim"]
+            func_name = case_data["function"]
+            func_desc = case_data["description"]
+
+            # Plot torchquad methods
+            for method in ["simpson", "gauss_legendre", "monte_carlo", "vegas"]:
+                if method in case_data and case_data[method]["errors"]:
+                    times = case_data[method]["times"]
+                    errors = case_data[method]["errors"]
+
+                    valid_data = [(t, e) for t, e in zip(times, errors) if e > 0 and t > 0]
+
+                    if valid_data:
+                        valid_times, valid_errors = zip(*valid_data)
+
+                        label = f"{method.replace('_', ' ').title()} (CUDA)"
+                        ax.loglog(
+                            valid_times,
+                            valid_errors,
+                            color=self.colors[method],
+                            marker=self.markers[method],
+                            linewidth=2.5,
+                            markersize=8,
+                            label=label,
+                            alpha=0.85,
+                        )
+
+            # Plot scipy methods
+            if "scipy" in case_data:
+                scipy_data = case_data["scipy"]
+
+                # nquad
+                if "nquad" in scipy_data:
+                    data = scipy_data["nquad"]
+                    if data["error"] > 0 and data["time"] > 0:
+                        ax.loglog(
+                            [data["time"]],
+                            [data["error"]],
+                            color=self.colors["scipy_nquad"],
+                            marker=self.markers["scipy_nquad"],
+                            markersize=14,
+                            label="SciPy nquad (CPU)",
+                            linestyle="none",
+                            alpha=0.9,
+                        )
+
+                # 1D methods
+                if dim == 1:
+                    # Trapz
+                    trapz_points = []
+                    for key, data in scipy_data.items():
+                        if key.startswith("trapz_") and data["error"] > 0 and data["time"] > 0:
+                            trapz_points.append((data["time"], data["error"]))
+                    if trapz_points:
+                        trapz_t, trapz_e = zip(*trapz_points)
+                        ax.loglog(
+                            trapz_t,
+                            trapz_e,
+                            color=self.colors["scipy_trapz"],
+                            marker=self.markers["scipy_trapz"],
+                            markersize=10,
+                            linestyle="none",
+                            label="SciPy trapz (CPU)",
+                            alpha=0.8,
+                        )
+
+                    # Simps
+                    simps_points = []
+                    for key, data in scipy_data.items():
+                        if key.startswith("simps_") and data["error"] > 0 and data["time"] > 0:
+                            simps_points.append((data["time"], data["error"]))
+                    if simps_points:
+                        simps_t, simps_e = zip(*simps_points)
+                        ax.loglog(
+                            simps_t,
+                            simps_e,
+                            color=self.colors["scipy_simps"],
+                            marker=self.markers["scipy_simps"],
+                            markersize=10,
+                            linestyle="none",
+                            label="SciPy simps (CPU)",
+                            alpha=0.8,
+                        )
+
+            ax.set_xlabel("Runtime (seconds)", fontsize=13)
+            ax.set_ylabel("Absolute Error", fontsize=13)
+            ax.set_title(
+                f"{dim}D: {func_name} \n Function: {func_desc} \n (Lower-left is better)",
+                fontsize=12,
+            )
+            ax.grid(True, alpha=0.3)
+            ax.legend(fontsize=10, loc="best")
+
+            plot_idx += 1
+
+        # Remove unused subplots
+        for i in range(plot_idx, len(axes)):
+            fig.delaxes(axes[i])
+
+        plt.suptitle(
+            f"Runtime vs Error Analysis - Challenging Functions \n " f"Hardware: {device_info}",
+            fontsize=15,
+        )
+        plt.tight_layout()
+        plt.savefig(
+            self.results_dir / "torchquad_runtime_vs_error.png", dpi=300, bbox_inches="tight"
+        )
+        plt.close()
+        print(
+            f"Runtime vs error plot saved to {self.results_dir / 'torchquad_runtime_vs_error.png'}"
+        )
+
+    def create_scaling_plots(self, scaling_results: Dict, device_info: str = "Unknown GPU"):
+        """Create runtime/feval scaling analysis plots for 1D and 7D."""
+        print("Creating scaling plots...")
+
+        import numpy as np
+
+        if not scaling_results or ("1d" not in scaling_results and "7d" not in scaling_results):
+            print("No scaling results to plot")
+            return
+
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8))
+
+        # Colors for different methods
+        method_colors = {
+            "trapezoid": "#8B4513",
+            "simpson": "#0066CC",
+            "boole": "#4B0082",
+            "gauss_legendre": "#00AA00",
+            "monte_carlo": "#FF3333",
+            "vegas": "#FF8C00",
+        }
+
+        # Markers for different methods
+        method_markers = {
+            "trapezoid": "v",
+            "simpson": "o",
+            "boole": "p",
+            "gauss_legendre": "s",
+            "monte_carlo": "D",
+            "vegas": "^",
+        }
+
+        # Plot 1D results
+        if "1d" in scaling_results:
+            ax = ax1
+            dim_data = scaling_results["1d"]
+
+            for method_name, method_data in dim_data.items():
+                if method_data.get("fevals") and method_data.get("times_per_eval_mean"):
+                    # Convert to fevals/second and scale to millions
+                    fevals_per_sec = [
+                        1.0 / time_per_eval for time_per_eval in method_data["times_per_eval_mean"]
+                    ]
+                    fevals_per_sec_std = [
+                        std / (time_per_eval**2)
+                        for time_per_eval, std in zip(
+                            method_data["times_per_eval_mean"], method_data["times_per_eval_std"]
+                        )
+                    ]
+
+                    # Scale to millions for better readability
+                    fevals_per_sec_millions = [fps / 1e6 for fps in fevals_per_sec]
+                    fevals_per_sec_std_millions = [std / 1e6 for std in fevals_per_sec_std]
+
+                    # Plot with error bars
+                    ax.errorbar(
+                        method_data["fevals"],
+                        fevals_per_sec_millions,
+                        yerr=fevals_per_sec_std_millions,
+                        color=method_colors.get(method_name, "black"),
+                        marker=method_markers.get(method_name, "o"),
+                        linewidth=2.5,
+                        markersize=8,
+                        capsize=5,
+                        capthick=2,
+                        label=method_name.replace("_", " ").title(),
+                        alpha=0.85,
+                    )
+
+            # Add linear scaling reference line
+            fevals_range = np.logspace(4, 8, 100)
+            reference_throughput = 100  # 100M fevals/sec reference
+            ax.plot(
+                fevals_range,
+                [reference_throughput] * len(fevals_range),
+                "k--",
+                alpha=0.5,
+                label="Linear scaling",
+            )
+
+            ax.set_xlabel("Number of Function Evaluations", fontsize=13)
+            ax.set_ylabel("Function Evaluations per Second (Millions)", fontsize=13)
+            ax.set_title("1D Scaling Analysis \n Function: f(x) = Σx²", fontsize=14)
+            ax.set_xscale("log")
+            ax.set_yscale("log")
+            ax.grid(True, alpha=0.3)
+            ax.legend(fontsize=10, loc="best")
+
+        # Plot 7D results
+        if "7d" in scaling_results:
+            ax = ax2
+            dim_data = scaling_results["7d"]
+
+            for method_name, method_data in dim_data.items():
+                if method_data.get("fevals") and method_data.get("times_per_eval_mean"):
+                    # Convert to fevals/second and scale to millions
+                    fevals_per_sec = [
+                        1.0 / time_per_eval for time_per_eval in method_data["times_per_eval_mean"]
+                    ]
+                    fevals_per_sec_std = [
+                        std / (time_per_eval**2)
+                        for time_per_eval, std in zip(
+                            method_data["times_per_eval_mean"], method_data["times_per_eval_std"]
+                        )
+                    ]
+
+                    # Scale to millions for better readability
+                    fevals_per_sec_millions = [fps / 1e6 for fps in fevals_per_sec]
+                    fevals_per_sec_std_millions = [std / 1e6 for std in fevals_per_sec_std]
+
+                    # Plot with error bars
+                    ax.errorbar(
+                        method_data["fevals"],
+                        fevals_per_sec_millions,
+                        yerr=fevals_per_sec_std_millions,
+                        color=method_colors.get(method_name, "black"),
+                        marker=method_markers.get(method_name, "o"),
+                        linewidth=2.5,
+                        markersize=8,
+                        capsize=5,
+                        capthick=2,
+                        label=method_name.replace("_", " ").title(),
+                        alpha=0.85,
+                    )
+
+            # Add linear scaling reference line
+            fevals_range = np.logspace(4, 8, 100)
+            reference_throughput = 10  # 10M fevals/sec reference (lower for 7D)
+            ax.plot(
+                fevals_range,
+                [reference_throughput] * len(fevals_range),
+                "k--",
+                alpha=0.5,
+                label="Linear scaling",
+            )
+
+            ax.set_xlabel("Number of Function Evaluations", fontsize=13)
+            ax.set_ylabel("Function Evaluations per Second (Millions)", fontsize=13)
+            ax.set_title("7D Scaling Analysis \n Function: f(x) = Σx²", fontsize=14)
+            ax.set_xscale("log")
+            ax.set_yscale("log")
+            ax.grid(True, alpha=0.3)
+            ax.legend(fontsize=10, loc="best")
+
+        plt.suptitle(f"Runtime/Evaluation Scaling Analysis\nHardware: {device_info}", fontsize=16)
+        plt.tight_layout()
+        plt.savefig(
+            self.results_dir / "torchquad_scaling_analysis.png", dpi=300, bbox_inches="tight"
+        )
+        plt.close()
+        print(f"Scaling plot saved to {self.results_dir / 'torchquad_scaling_analysis.png'}")
+
+    def create_vectorized_plots(
+        self,
+        vectorized_results: Dict,
+        device_info: str = "Unknown GPU",
+        x_log_scale: bool = True,
+        y_log_scale: bool = True,
+    ):
+        """Create vectorized speedup plots with configurable log-scale axes."""
+        print("Creating vectorized plots...")
+
+        if not vectorized_results.get("grid_sizes"):
+            print("No vectorized results to plot")
+            return
+
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 7))
+
+        # Execution time comparison
+        import numpy as np
+
+        x_pos = np.arange(len(vectorized_results["grid_sizes"]))
+        width = 0.35
+
+        _ = ax1.bar(
+            x_pos - width / 2,
+            vectorized_results["loop_times"],
+            width,
+            label="Loop-based",
+            alpha=0.8,
+            color="lightcoral",
+        )
+        _ = ax1.bar(
+            x_pos + width / 2,
+            vectorized_results["vectorized_times"],
+            width,
+            label="Vectorized",
+            alpha=0.8,
+            color="lightblue",
+        )
+
+        ax1.set_xlabel("Parameter Grid Size", fontsize=13)
+        ax1.set_ylabel("Execution Time (seconds)", fontsize=13)
+        ax1.set_title(
+            "Vectorized vs Loop-based Integration \n Function: sqrt(cos(sin(a*x)))", fontsize=13
+        )
+        ax1.set_xticks(x_pos)
+        ax1.set_xticklabels(vectorized_results["grid_sizes"])
+
+        if y_log_scale:
+            ax1.set_yscale("log")
+
+        ax1.legend(fontsize=11)
+        ax1.grid(True, alpha=0.3)
+
+        # Speedup factors
+        if x_log_scale and y_log_scale:
+            ax2.loglog(
+                vectorized_results["grid_sizes"],
+                vectorized_results["speedups"],
+                marker="o",
+                linewidth=3,
+                markersize=10,
+                color="green",
+                label="Speedup",
+            )
+        elif x_log_scale:
+            ax2.semilogx(
+                vectorized_results["grid_sizes"],
+                vectorized_results["speedups"],
+                marker="o",
+                linewidth=3,
+                markersize=10,
+                color="green",
+                label="Speedup",
+            )
+        elif y_log_scale:
+            ax2.semilogy(
+                vectorized_results["grid_sizes"],
+                vectorized_results["speedups"],
+                marker="o",
+                linewidth=3,
+                markersize=10,
+                color="green",
+                label="Speedup",
+            )
+        else:
+            ax2.plot(
+                vectorized_results["grid_sizes"],
+                vectorized_results["speedups"],
+                marker="o",
+                linewidth=3,
+                markersize=10,
+                color="green",
+                label="Speedup",
+            )
+
+        ax2.axhline(y=1.0, color="red", linestyle="--", alpha=0.7, label="No speedup")
+
+        ax2.set_xlabel("Parameter Grid Size", fontsize=13)
+        ax2.set_ylabel("Speedup Factor", fontsize=13)
+
+        scale_info = (
+            f"({'Log' if x_log_scale else 'Linear'}-{'Log' if y_log_scale else 'Linear'} Scale)"
+        )
+        ax2.set_title(f"Vectorized Integration Speedup \n {scale_info}", fontsize=13)
+        ax2.grid(True, alpha=0.3)
+        ax2.legend(fontsize=11)
+
+        plt.suptitle(
+            f"Enhanced Vectorized Integrand Performance Analysis \n " f"Hardware: {device_info}",
+            fontsize=15,
+        )
+        plt.tight_layout()
+        plt.savefig(
+            self.results_dir / "torchquad_vectorized_speedup.png", dpi=300, bbox_inches="tight"
+        )
+        plt.close()
+        print(f"Vectorized plot saved to {self.results_dir / 'torchquad_vectorized_speedup.png'}")
+
+    def create_framework_comparison_plots(
+        self, framework_results: Dict, device_info: str = "Unknown GPU"
+    ):
+        """Create framework comparison plots for 1D Monte Carlo and Simpson methods."""
+        print("Creating framework comparison plots...")
+
+        if not framework_results or "results" not in framework_results:
+            print("No framework results to plot")
+            return
+
+        # Colors and markers for different backends
+        backend_colors = {
+            "torch_gpu": "#FF3333",
+            "torch_cpu": "#FF9999",
+            "tensorflow_gpu": "#FF8C00",
+            "tensorflow_cpu": "#FFB84D",
+            "numpy_cpu": "#0066CC",
+            "jax_cpu": "#00AA00",
+        }
+
+        backend_markers = {
+            "torch_gpu": "o",
+            "torch_cpu": "s",
+            "tensorflow_gpu": "D",
+            "tensorflow_cpu": "^",
+            "numpy_cpu": "v",
+            "jax_cpu": "p",
+        }
+
+        backend_labels = {
+            "torch_gpu": "PyTorch GPU",
+            "torch_cpu": "PyTorch CPU",
+            "tensorflow_gpu": "TensorFlow GPU",
+            "tensorflow_cpu": "TensorFlow CPU",
+            "numpy_cpu": "NumPy CPU",
+            "jax_cpu": "JAX CPU",
+        }
+
+        methods = framework_results.get("methods", ["monte_carlo", "simpson"])
+        n_methods = len(methods)
+
+        if n_methods == 0:
+            print("No methods to plot")
+            return
+
+        # Create subplots - 2 plots per method (convergence + runtime vs error)
+        fig, axes = plt.subplots(2, n_methods, figsize=(8 * n_methods, 12))
+
+        if n_methods == 1:
+            axes = axes.reshape(-1, 1)
+
+        for method_idx, method_name in enumerate(methods):
+            method_results = framework_results["results"].get(method_name, {})
+
+            if not method_results:
+                continue
+
+            # Convergence plot (top row)
+            ax_conv = axes[0, method_idx]
+
+            # Runtime vs Error plot (bottom row)
+            ax_runtime = axes[1, method_idx]
+
+            for backend_spec, backend_data in method_results.items():
+                if not backend_data.get("n_points") or not backend_data.get("errors"):
+                    continue
+
+                n_points = backend_data["n_points"]
+                errors = backend_data["errors"]
+                times = backend_data.get("times", [])
+
+                # Skip invalid data
+                valid_conv_data = [(n, e) for n, e in zip(n_points, errors) if e > 0]
+                valid_runtime_data = [(t, e) for t, e in zip(times, errors) if e > 0 and t > 0]
+
+                if not valid_conv_data:
+                    continue
+
+                label = backend_labels.get(backend_spec, backend_spec)
+                color = backend_colors.get(backend_spec, "black")
+                marker = backend_markers.get(backend_spec, "o")
+
+                # Convergence plot
+                conv_n, conv_errors = zip(*valid_conv_data)
+                ax_conv.loglog(
+                    conv_n,
+                    conv_errors,
+                    color=color,
+                    marker=marker,
+                    linewidth=2.5,
+                    markersize=8,
+                    label=label,
+                    alpha=0.85,
+                )
+
+                # Runtime vs Error plot
+                if valid_runtime_data:
+                    runtime_times, runtime_errors = zip(*valid_runtime_data)
+                    ax_runtime.loglog(
+                        runtime_times,
+                        runtime_errors,
+                        color=color,
+                        marker=marker,
+                        linewidth=2.5,
+                        markersize=8,
+                        label=label,
+                        alpha=0.85,
+                    )
+
+            # Format convergence plot
+            ax_conv.set_xlabel("Number of Function Evaluations", fontsize=12)
+            ax_conv.set_ylabel("Absolute Error", fontsize=12)
+            ax_conv.set_title(f"{method_name.replace('_', ' ').title()} - Convergence", fontsize=13)
+            ax_conv.grid(True, alpha=0.3)
+            ax_conv.legend(fontsize=10, loc="best")
+
+            # Format runtime vs error plot
+            ax_runtime.set_xlabel("Runtime (seconds)", fontsize=12)
+            ax_runtime.set_ylabel("Absolute Error", fontsize=12)
+            ax_runtime.set_title(
+                f"{method_name.replace('_', ' ').title()} - Runtime vs Error", fontsize=13
+            )
+            ax_runtime.grid(True, alpha=0.3)
+            ax_runtime.legend(fontsize=10, loc="best")
+
+        plt.suptitle(
+            f"Framework Comparison - 1D Integration \n"
+            f"Function: {framework_results.get('function', 'Unknown')} \n"
+            f"Hardware: {device_info}",
+            fontsize=16,
+        )
+        plt.tight_layout()
+        plt.savefig(
+            self.results_dir / "torchquad_framework_comparison.png", dpi=300, bbox_inches="tight"
+        )
+        plt.close()
+        print(
+            f"Framework comparison plot saved to {self.results_dir / 'torchquad_framework_comparison.png'}"
+        )
+
+
+def main():
+    """Create plots from existing results."""
+    plotter = ResultsPlotter()
+    device_info = "RTX 4060 Ti 16GB, i5-13400F"  # Could be loaded from config
+
+    # Load convergence results
+    convergence_results = plotter.load_results("convergence_results.json")
+    if convergence_results:
+        plotter.create_convergence_plots(convergence_results, device_info)
+        plotter.create_runtime_vs_error_plots(convergence_results, device_info)
+
+    # Load scaling results
+    scaling_results = plotter.load_results("scaling_results.json")
+    if scaling_results:
+        plotter.create_scaling_plots(scaling_results, device_info)
+
+    # Load vectorized results
+    vectorized_results = plotter.load_results("vectorized_results.json")
+    if vectorized_results:
+        # Default to log-log scale, but could be made configurable
+        plotter.create_vectorized_plots(
+            vectorized_results, device_info, x_log_scale=True, y_log_scale=True
+        )
+
+    # Load framework comparison results
+    framework_results = plotter.load_results("framework_results.json")
+    if framework_results:
+        plotter.create_framework_comparison_plots(framework_results, device_info)
+
+    if convergence_results or scaling_results or vectorized_results or framework_results:
+        print("All available plots created successfully!")
+    else:
+        print("No results available to plot. Run benchmark first.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/source/ci_cd.rst b/docs/source/ci_cd.rst
new file mode 100644
index 00000000..56b0bb3c
--- /dev/null
+++ b/docs/source/ci_cd.rst
@@ -0,0 +1,339 @@
+Continuous Integration and Deployment
+=====================================
+
+This document describes the continuous integration (CI) and continuous deployment (CD) setup for torchquad, which ensures code quality, testing, and automated releases.
+
+Overview
+--------
+
+Torchquad uses GitHub Actions for CI/CD with the following key objectives:
+
+* **Automated Testing**: Run comprehensive test suites on every code change
+* **Code Quality**: Enforce consistent formatting and linting standards  
+* **Multi-Backend Support**: Test across PyTorch, JAX, TensorFlow, and NumPy
+* **Automated Deployment**: Streamlined releases to PyPI and Test PyPI
+* **Documentation**: Automated paper builds for JOSS submissions
+
+GitHub Actions Workflows
+-------------------------
+
+The CI/CD pipeline consists of five main workflows:
+
+1. **Test Suite** (``run_tests.yml``)
+   
+   **Triggers**: Push to main/develop branches, pull requests, manual dispatch
+   
+   This is the core testing workflow that runs on every code change:
+   
+   * **Linting Stage**: Uses flake8 to check code quality and style
+   * **Testing Stage**: 
+     - Sets up Python 3.9 environment
+     - Installs all backend dependencies via micromamba
+     - Runs full pytest suite with coverage reporting
+     - Posts coverage reports as PR comments
+   
+   **Key Features**:
+   
+   * Multi-backend testing (all numerical backends)
+   * Coverage tracking with pytest-cov
+   * JUnit XML output for CI integration
+   * Automated PR comments with test results
+
+2. **Code Formatting** (``autoblack.yml``)
+   
+   **Triggers**: Pull requests only
+   
+   Ensures consistent code formatting across the project:
+   
+   * Uses Black formatter with 100-character line length
+   * Python 3.11 environment
+   * Checks formatting without modifying files
+   * Fails if reformatting is needed
+
+3. **PyPI Deployment** (``deploy_to_pypi.yml``)
+   
+   **Triggers**: Manual workflow dispatch only
+   
+   Production deployment to PyPI:
+   
+   * Python 3.10 environment
+   * Builds source distribution and wheel packages
+   * Uploads to PyPI using stored authentication token
+   * Manual trigger ensures controlled releases
+
+4. **Test PyPI Deployment** (``deploy_to_test_pypi.yml``)
+   
+   **Triggers**: Manual workflow dispatch, GitHub releases
+   
+   Test deployment for validation:
+   
+   * Same process as PyPI deployment
+   * Targets Test PyPI for safe testing
+   * Used to validate packages before production release
+
+5. **Documentation** (``draft-pdf.yml``)
+   
+   **Triggers**: Changes to paper directory
+   
+   Builds academic paper PDF:
+   
+   * Uses OpenJournals GitHub Action
+   * Compiles Markdown to PDF for JOSS submissions
+   * Stores generated PDF as workflow artifact
+
+Environment Setup
+-----------------
+
+The CI system uses conda/micromamba for dependency management:
+
+.. code-block:: yaml
+
+   # From run_tests.yml
+   - name: provision-with-micromamba
+     uses: mamba-org/setup-micromamba@v1
+     with:
+       environment-file: environment_all_backends.yml
+       environment-name: torchquad
+       cache-downloads: true
+
+Environment Files
+~~~~~~~~~~~~~~~~~
+
+* ``environment.yml`` - Basic PyTorch setup for development
+* ``environment_all_backends.yml`` - Complete backend support for CI
+* ``rtd_environment.yml`` - ReadTheDocs documentation builds
+
+Test Execution
+--------------
+
+The test suite runs with comprehensive coverage:
+
+.. code-block:: bash
+
+   cd tests/
+   pytest -ra --error-for-skips \\
+          --junitxml=pytest.xml \\
+          --cov-report=term-missing:skip-covered \\
+          --cov=../torchquad . | tee pytest-coverage.txt
+
+**Test Parameters**:
+
+* ``-ra`` - Show summary for all test outcomes
+* ``--error-for-skips`` - Treat skipped tests as errors (fail CI)
+* ``--junitxml`` - Generate XML report for CI integration
+* ``--cov`` - Generate coverage report for the torchquad package
+
+Code Quality Standards
+----------------------
+
+Linting with Flake8
+~~~~~~~~~~~~~~~~~~~
+
+Two-stage linting process:
+
+1. **Critical Errors**: Check for syntax errors and undefined names
+   
+   .. code-block:: bash
+   
+      flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+
+2. **Full Analysis**: Complete code quality check using project ``.flake8`` configuration
+   
+   .. code-block:: bash
+   
+      flake8 . --count --show-source --statistics
+
+Formatting with Black
+~~~~~~~~~~~~~~~~~~~~~
+
+Consistent code style enforcement:
+
+.. code-block:: bash
+
+   black --check --line-length 100 .
+
+**Configuration**:
+
+* Line length: 100 characters
+* Target: Python 3.11+
+* Complies with project style guide
+
+Coverage Reporting
+------------------
+
+The CI system provides detailed coverage analysis:
+
+* **PR Comments**: Automated coverage reports on pull requests
+* **Trend Tracking**: Coverage change detection
+* **Missing Lines**: Identification of untested code
+* **Badge Integration**: Coverage badges for README
+
+**Coverage Requirements**:
+
+* New features must include comprehensive tests
+* Significant coverage decreases block PR merges
+* Target: >90% coverage for new code
+
+Local Development
+-----------------
+
+Before pushing changes, run these checks locally:
+
+.. code-block:: bash
+
+   # Format code
+   black . --line-length 100
+   
+   # Check linting
+   flake8 . --count --show-source --statistics
+   
+   # Run tests
+   cd tests/
+   pytest
+   
+   # Run with coverage
+   pytest --cov=../torchquad
+
+Environment Setup
+~~~~~~~~~~~~~~~~~
+
+For local development:
+
+.. code-block:: bash
+
+   # Create environment
+   conda env create -f environment_all_backends.yml
+   conda activate torchquad
+   
+   # Install in development mode
+   pip install -e .
+
+Backend Testing
+---------------
+
+Multi-Backend Strategy
+~~~~~~~~~~~~~~~~~~~~~~
+
+Tests run across all supported numerical backends:
+
+* **NumPy**: Reference implementation and baseline testing
+* **PyTorch**: GPU acceleration and automatic differentiation
+* **JAX**: JIT compilation and XLA optimization
+* **TensorFlow**: Graph execution and TPU support
+
+**Backend-Specific Considerations**:
+
+* Some tests are backend-specific and use appropriate skip decorators
+* GPU tests run automatically when CUDA is available
+* Complex number support varies by backend
+* Performance characteristics differ between backends
+
+Release Process
+---------------
+
+PyPI Deployment
+~~~~~~~~~~~~~~~
+
+Production releases follow this process:
+
+1. **Code Review**: All changes go through PR review
+2. **Testing**: Full test suite must pass
+3. **Version Update**: Update version in ``pyproject.toml``
+4. **Test Deployment**: Deploy to Test PyPI first
+5. **Validation**: Test installation from Test PyPI
+6. **Production**: Manual trigger of PyPI deployment workflow
+
+**Required Secrets**:
+
+* ``PYPI_TOKEN`` - PyPI API token for package uploads
+* ``TEST_PYPI_TOKEN`` - Test PyPI API token
+
+Security Considerations
+-----------------------
+
+* **Token Management**: API tokens stored as GitHub secrets
+* **Manual Triggers**: Production deployments require manual approval
+* **Branch Protection**: Main branch protected with required status checks
+* **Dependency Scanning**: Automated security updates via Dependabot
+
+Troubleshooting
+---------------
+
+Common CI Failures
+~~~~~~~~~~~~~~~~~~
+
+1. **Formatting Issues**:
+   
+   .. code-block:: bash
+   
+      # Fix locally
+      black . --line-length 100
+      git add . && git commit -m "Fix formatting"
+
+2. **Import Errors**:
+   
+   * Check dependency versions in environment files
+   * Verify relative imports after package structure changes
+   * Ensure test files are properly isolated
+
+3. **Backend-Specific Failures**:
+   
+   * Check if backend is properly installed in CI environment
+   * Verify skip decorators for unavailable backends
+   * Review backend-specific test logic
+
+4. **Coverage Decreases**:
+   
+   * Add tests for new functionality
+   * Check test discovery (files must match ``*_test.py`` or ``test_*.py``)
+   * Verify coverage configuration in ``pyproject.toml``
+
+5. **Environment Issues**:
+   
+   * Update ``environment_all_backends.yml`` for new dependencies
+   * Check for version conflicts between backends
+   * Verify micromamba cache invalidation
+
+Building Documentation Locally
+------------------------------
+
+To build the Sphinx documentation locally:
+
+.. code-block:: bash
+
+   # Navigate to docs directory
+   cd docs
+   
+   # Build HTML documentation
+   make html
+   
+   # On Windows, you can also use:
+   make.bat html
+   
+   # Clean build directory
+   make clean
+   
+   # View all available targets
+   make help
+
+The built documentation will be available in ``docs/_build/html/``. Open ``docs/_build/html/index.html`` in your browser to view the documentation.
+
+**Note**: Make sure you have Sphinx and all documentation dependencies installed:
+
+.. code-block:: bash
+
+   pip install sphinx sphinx-rtd-theme
+
+Getting Help
+------------
+
+For CI/CD issues:
+
+1. Check the `GitHub Actions <https://github.com/esa/torchquad/actions>`_ page for detailed logs
+2. Review similar successful runs for comparison
+3. Check environment file consistency
+4. Verify all required secrets are configured
+5. Open an issue with CI logs if problems persist
+
+The CI/CD system is designed to catch issues early and ensure high code quality. 
+When in doubt, run the same commands locally that CI runs to debug issues quickly.
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 34549f41..b225f722 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -22,7 +22,7 @@
 author = "ESA Advanced Concepts Team"
 
 # The full version, including alpha/beta/rc tags
-release = "0.4.1"
+release = "0.5.0"
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/docs/source/index.rst b/docs/source/index.rst
index d7a2ded0..89f314a4 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -83,6 +83,12 @@ You can see the latest code at https://github.com/esa/torchquad.
    autodoc
 
 
+.. toctree::
+   :maxdepth: 1
+   :caption: Development:
+
+   ci_cd
+
 .. toctree::
    :maxdepth: 1
    :caption: Contact:
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
index 9d1c8ff5..6b350b7b 100644
--- a/docs/source/tutorial.rst
+++ b/docs/source/tutorial.rst
@@ -47,6 +47,8 @@ environment variable; for example ``export TORCHQUAD_LOG_LEVEL=DEBUG``.
 A :ref:`later section <tutorial_backend_selection>` in this tutorial shows how
 to choose a different numerical backend.
 
+For information on using multiple GPUs, see the :ref:`Multi-GPU Usage <tutorial_multi_gpu>` section.
+
 
 Detailed Introduction
 ---------------------
@@ -106,7 +108,9 @@ the following way:
 4.  Information on how to select a numerical backend
 5.  Example showing how gradients can be obtained w.r.t. the integration domain with PyTorch
 6.  Methods to speed up the integration
-7.  Custom Integrators
+7.  Multidimensional/Vectorized Integrands
+8.  Parametric Integration with Variable Domains
+9.  Custom Integrators
 
 Feel free to test the code on your own computer as we go along.
 
@@ -769,6 +773,428 @@ Now let's see how to do this a bit more simply, and in a way that provides signf
 .. note::
     VEGAS does not support multi-dimensional integrands.  If you would like this, please consider opening an issue or PR.
 
+Parametric Integration with Variable Domains
+--------------------------------------------
+
+Sometimes you need to perform multiple integrations where both the integrand and the integration domain depend on parameters. This is particularly useful in applications where you need to compute integrals for many different parameter values simultaneously.
+
+For example, you might want to compute:
+
+.. math::
+
+    I(a, b) = \\int_{a}^{b} f(x, a, b) dx
+
+for multiple values of :math:`a` and :math:`b` simultaneously.
+
+Currently, torchquad doesn't have built-in support for parametric domains, but you can extend the existing integrators to handle this case. Below is an example of how to create a custom integrator that supports batch 1D integration with variable domains:
+
+.. code:: ipython3
+
+    import torch
+    from loguru import logger
+    from autoray import numpy as anp
+    from autoray import infer_backend
+    from torchquad import Gaussian
+
+
+    class Batch1DIntegrator(Gaussian):
+        """Custom integrator for batch 1D integration with variable domains.
+        
+        This integrator can compute multiple integrals with different domains
+        in a single call, providing significant speedup over sequential computation.
+        """
+        
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+            self.disable_integration_domain_check = True
+            
+        def _resize_roots(self, integration_domain, roots):
+            """Resize roots for batched integration domains.
+            
+            Args:
+                integration_domain: Shape [batch_size, 2] for multiple domains
+                roots: Shape [N] - the Gaussian quadrature nodes
+                
+            Returns:
+                Resized roots with shape [batch_size, N]
+            """
+            if integration_domain.ndim == 1:
+                # Single domain case - use parent implementation
+                return super()._resize_roots(integration_domain, roots)
+                
+            # Batch case
+            assert roots.ndim == 1
+            assert integration_domain.ndim == 2
+            assert integration_domain.shape[-1] == 2
+            
+            roots = roots.to(integration_domain.device)
+            
+            # Extract bounds for all domains
+            a = integration_domain[:, 0:1]  # Shape [batch_size, 1]
+            b = integration_domain[:, 1:2]  # Shape [batch_size, 1]
+            
+            # Broadcast and transform roots for each domain
+            roots_expanded = roots.unsqueeze(0)  # [1, N]
+            
+            # Transform from [-1, 1] to [a, b] for each domain
+            out = ((b - a) / 2) * roots_expanded + ((a + b) / 2)  # [batch_size, N]
+            
+            return out
+            
+        def integrate(self, fn, dim, N, integration_domain=None, backend="torch"):
+            """Integrate function over multiple domains in a single call.
+            
+            Args:
+                fn: Function to integrate
+                dim: Must be 1 for this implementation
+                N: Number of quadrature points
+                integration_domain: Shape [batch_size, 2] for batch integration
+                backend: Must be "torch"
+                
+            Returns:
+                Tensor of shape [batch_size] with integral results
+            """
+            assert dim == 1
+            assert backend == "torch"
+            
+            if integration_domain.ndim == 1:
+                integration_domain = integration_domain.reshape(1, 2)
+                
+            batch_size = integration_domain.shape[0]
+            
+            # Get Gaussian quadrature points and weights
+            N = self._adjust_N(dim=1, N=N)
+            roots = self._roots(N, backend, integration_domain.requires_grad)
+            weights = self._weights(N, dim, backend)
+            
+            # Resize roots for all domains at once
+            grid_points = self._resize_roots(integration_domain, roots)  # [batch_size, N]
+            
+            # Evaluate integrand at all points
+            # Flatten for function evaluation: [batch_size * N, 1]
+            points_flat = grid_points.reshape(-1, 1)
+            function_values = fn(points_flat)  # [batch_size * N]
+            
+            # Reshape back to [batch_size, N]
+            function_values = function_values.reshape(batch_size, N)
+            
+            # Apply weights and sum for each domain
+            weighted_values = function_values * weights.unsqueeze(0)
+            
+            # Scale by domain width and sum
+            domain_widths = (integration_domain[:, 1] - integration_domain[:, 0]) / 2
+            results = domain_widths * weighted_values.sum(dim=1)
+            
+            return results
+
+Now let's see a concrete example of using this for parametric integration:
+
+.. code:: ipython3
+
+    # Example 1: Compute multiple integrals in ONE call
+    # I(a) = integral from 0 to a of x^2 dx = a^3/3
+    # for a = 1, 2, 3, 4, 5
+    
+    def integrand(x):
+        # x has shape [batch_size * N, 1] where N is the number of quadrature points
+        return x[:, 0] ** 2
+    
+    # Create multiple integration domains
+    upper_bounds = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0])
+    domains = torch.stack([torch.zeros_like(upper_bounds), upper_bounds], dim=1)
+    print(f"Integration domains shape: {domains.shape}")
+    print(f"Domains:\n{domains}")
+    
+    # Initialize the batch integrator
+    batch_integrator = Batch1DIntegrator()
+    
+    # Compute ALL integrals in ONE call - this is the key difference!
+    results = batch_integrator.integrate(integrand, dim=1, N=50, integration_domain=domains)
+    
+    # Analytical solution: a^3/3
+    analytical = upper_bounds ** 3 / 3
+    
+    print(f"\nResults shape: {results.shape}")
+    print(f"Numerical results: {results}")
+    print(f"Analytical results: {analytical}")
+    print(f"Absolute errors: {torch.abs(results - analytical)}")
+
+Output:
+
+.. parsed-literal::
+
+    Integration domains shape: torch.Size([5, 2])
+    Domains:
+    tensor([[0., 1.],
+            [0., 2.],
+            [0., 3.],
+            [0., 4.],
+            [0., 5.]])
+    
+    Results shape: torch.Size([5])
+    Numerical results: tensor([ 0.3333,  2.6667,  9.0000, 21.3333, 41.6667])
+    Analytical results: tensor([ 0.3333,  2.6667,  9.0000, 21.3333, 41.6667])
+    Absolute errors: tensor([9.9341e-09, 7.9473e-08, 1.7764e-14, 6.3578e-07, 1.2716e-06])
+
+The key advantage of this approach is that all integrals are computed in a single vectorized operation, which can provide significant speedups:
+
+.. code:: ipython3
+
+    # Performance comparison - batch vs sequential
+    import time
+    from torchquad import GaussLegendre
+    
+    # Many domains
+    n_domains = 500
+    many_upper_bounds = torch.linspace(0.1, 5.0, n_domains)
+    many_domains = torch.stack([torch.zeros(n_domains), many_upper_bounds], dim=1)
+    
+    # Batch computation
+    start = time.time()
+    batch_results = batch_integrator.integrate(integrand, dim=1, N=50, integration_domain=many_domains)
+    batch_time = time.time() - start
+    
+    # Sequential computation for comparison
+    standard_integrator = GaussLegendre()
+    start = time.time()
+    sequential_results = []
+    for i in range(n_domains):
+        result = standard_integrator.integrate(
+            integrand, dim=1, N=50, 
+            integration_domain=[[0.0, many_upper_bounds[i].item()]]
+        )
+        sequential_results.append(result)
+    sequential_time = time.time() - start
+    
+    print(f"Computed {n_domains} integrals:")
+    print(f"Batch time: {batch_time:.4f} seconds")
+    print(f"Sequential time: {sequential_time:.4f} seconds")
+    print(f"Speedup: {sequential_time/batch_time:.2f}x")
+
+Output:
+
+.. parsed-literal::
+
+    Computed 500 integrals:
+    Batch time: 0.0010 seconds
+    Sequential time: 0.2289 seconds
+    Speedup: 228.90x
+
+This approach can be extended to more complex scenarios where both the integrand and the domain depend on parameters. The key insight is that by properly vectorizing the computation, we can achieve significant performance improvements over sequential integration.
+
+.. note::
+    This implementation is specifically for 1D integrals. Extending it to higher dimensions would require more careful handling of the grid generation and result calculation.
+
+.. _tutorial_multi_gpu:
+
+Multi-GPU Usage
+---------------
+
+While torchquad doesn't have a built-in device parameter for selecting specific GPUs, you can effectively use multiple GPUs using standard PyTorch practices and environment variables.
+
+Using CUDA_VISIBLE_DEVICES
+```````````````````````````
+
+The recommended way to control which GPU torchquad uses is through the ``CUDA_VISIBLE_DEVICES`` environment variable:
+
+.. code:: bash
+
+    # Use only GPU 0
+    export CUDA_VISIBLE_DEVICES=0
+    python your_integration_script.py
+
+    # Use only GPU 1
+    export CUDA_VISIBLE_DEVICES=1
+    python your_integration_script.py
+
+    # Use GPUs 0 and 2
+    export CUDA_VISIBLE_DEVICES=0,2
+    python your_integration_script.py
+
+This approach has several advantages:
+
+- **Clean separation**: Each process sees only the specified GPU(s)
+- **No code changes**: Your torchquad code remains unchanged
+- **Standard practice**: This is the recommended approach in the PyTorch community
+- **Process isolation**: Different processes can use different GPUs without interference
+
+Parallel Processing with Multiple GPUs
+```````````````````````````````````````
+
+For compute-intensive workloads that can be parallelized, you can spawn multiple processes, each using a different GPU:
+
+.. code:: ipython3
+
+    import multiprocessing as mp
+    import os
+    import torch
+    from torchquad import MonteCarlo, set_up_backend
+
+    def run_integration_on_gpu(gpu_id, integration_params, result_queue):
+        """Run integration on a specific GPU"""
+        # Set the GPU for this process
+        os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu_id)
+        
+        # Initialize torchquad
+        set_up_backend("torch", data_type="float32")
+        
+        # Your integration code here
+        mc = MonteCarlo()
+        result = mc.integrate(
+            integration_params['fn'],
+            dim=integration_params['dim'],
+            N=integration_params['N'],
+            integration_domain=integration_params['domain'],
+            backend="torch"
+        )
+        
+        result_queue.put((gpu_id, result.item()))
+
+    def parallel_integration_example():
+        """Example of parallel integration across multiple GPUs"""
+        # Define your integration parameters
+        def integrand(x):
+            return torch.sin(x[:, 0]) + torch.exp(x[:, 1])
+            
+        integration_params = {
+            'fn': integrand,
+            'dim': 2,
+            'N': 100000,
+            'domain': [[0, 1], [-1, 1]]
+        }
+        
+        # Check available GPUs
+        available_gpus = list(range(torch.cuda.device_count()))
+        if not available_gpus:
+            print("No CUDA GPUs available")
+            return
+            
+        print(f"Using GPUs: {available_gpus}")
+        
+        # Create processes for each GPU
+        processes = []
+        result_queue = mp.Queue()
+        
+        for gpu_id in available_gpus:
+            p = mp.Process(
+                target=run_integration_on_gpu,
+                args=(gpu_id, integration_params, result_queue)
+            )
+            p.start()
+            processes.append(p)
+        
+        # Collect results
+        results = {}
+        for _ in available_gpus:
+            gpu_id, result = result_queue.get()
+            results[gpu_id] = result
+            
+        # Wait for all processes to complete
+        for p in processes:
+            p.join()
+            
+        print("Results from each GPU:")
+        for gpu_id, result in sorted(results.items()):
+            print(f"  GPU {gpu_id}: {result:.6f}")
+            
+        return results
+
+Use Cases for Multi-GPU Integration
+````````````````````````````````````
+
+1. **Parameter Sweeps**: Run the same integration with different parameters on different GPUs
+2. **Different Integration Methods**: Compare multiple integration methods simultaneously
+3. **Monte Carlo with Different Seeds**: Run multiple Monte Carlo integrations with different random seeds for error estimation
+4. **Batch Processing**: Process multiple independent integration problems in parallel
+
+Example: Monte Carlo Error Estimation
+``````````````````````````````````````
+
+.. code:: ipython3
+
+    import subprocess
+    import numpy as np
+    
+    def monte_carlo_error_estimation():
+        """Estimate integration error using multiple independent Monte Carlo runs"""
+        
+        # Script content for each GPU process
+        script_template = '''
+import os
+import torch
+from torchquad import MonteCarlo, set_up_backend
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '{gpu_id}'
+set_up_backend("torch", data_type="float32")
+
+def integrand(x):
+    return torch.sin(x[:, 0]) + torch.exp(x[:, 1])
+
+mc = MonteCarlo()
+result = mc.integrate(
+    integrand,
+    dim=2,
+    N=50000,
+    integration_domain=[[0, 1], [-1, 1]],
+    seed={seed},
+    backend="torch"
+)
+
+print(result.item())
+'''
+        
+        num_gpus = torch.cuda.device_count()
+        runs_per_gpu = 5
+        
+        results = []
+        processes = []
+        
+        for gpu_id in range(num_gpus):
+            for run in range(runs_per_gpu):
+                seed = gpu_id * runs_per_gpu + run + 1000
+                script = script_template.format(gpu_id=gpu_id, seed=seed)
+                
+                # Launch subprocess
+                process = subprocess.Popen(
+                    ['python', '-c', script],
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    text=True
+                )
+                processes.append(process)
+        
+        # Collect results
+        for process in processes:
+            stdout, stderr = process.communicate()
+            if process.returncode == 0:
+                results.append(float(stdout.strip()))
+            else:
+                print(f"Error in subprocess: {stderr}")
+        
+        # Calculate statistics
+        results = np.array(results)
+        mean_result = np.mean(results)
+        std_error = np.std(results) / np.sqrt(len(results))
+        
+        print(f"Monte Carlo Results from {len(results)} runs:")
+        print(f"  Mean: {mean_result:.6f}")
+        print(f"  Standard Error: {std_error:.6f}")
+        print(f"  95% Confidence Interval: [{mean_result - 1.96*std_error:.6f}, {mean_result + 1.96*std_error:.6f}]")
+        
+        return mean_result, std_error
+
+Best Practices for Multi-GPU Usage
+```````````````````````````````````
+
+1. **Use CUDA_VISIBLE_DEVICES**: This is the cleanest way to control GPU selection
+2. **Process-based parallelism**: Use ``multiprocessing`` rather than threading for true parallelism
+3. **Memory management**: Each GPU process will have its own memory space
+4. **Load balancing**: Distribute work evenly across available GPUs
+5. **Error handling**: Handle cases where specific GPUs might be unavailable or busy
+
+.. warning::
+    Avoid using ``torch.cuda.set_device()`` within torchquad applications, as this can interfere with torchquad's internal device management. Always use ``CUDA_VISIBLE_DEVICES`` instead.
+
 Custom Integrators
 ------------------
 
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..9acc646c
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,48 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "torchquad"
+version = "0.5.0"
+description = "Package providing numerical integration methods for PyTorch, jax, TensorFlow and numpy."
+readme = { file = "README.md", content-type = "text/markdown" }
+requires-python = ">=3.8, <4"
+license = { text = "GPL-3.0" }
+authors = [
+    { name = "ESA Advanced Concepts Team", email = "pablo.gomez@esa.int" },
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "Topic :: Scientific/Engineering :: Mathematics",
+    "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
+    "Programming Language :: Python :: 3.8",
+]
+dependencies = [
+    "loguru>=0.5.3",
+    "matplotlib>=3.3.3",
+    "scipy>=1.6.0",
+    "tqdm>=4.56.1",
+    "autoray>=0.5.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/esa/torchquad"
+Source = "https://github.com/esa/torchquad/"
+
+[tool.setuptools]
+packages = ["torchquad", "torchquad.integration", "torchquad.utils"]
+
+[tool.pytest.ini_options]
+filterwarnings = [
+    "ignore::UserWarning:torchquad.integration.utils:256",                                       # Our own deprecation warning
+    "ignore:torch.meshgrid:UserWarning",                                                         # PyTorch meshgrid warning 
+    "ignore:Explicitly requested dtype.*truncated:UserWarning",                                  # JAX dtype warnings
+    "ignore:In the next release:UserWarning",                                                    # TensorFlow warnings
+    "ignore:DEPRECATION WARNING.*In future versions of torchquad:UserWarning",                   # Our deprecation warnings in TF autograph
+    "ignore:integration_domain should be a list:RuntimeWarning",                                 # Intentional warning for conflicting backend arguments
+    "ignore:To copy construct from a tensor.*recommended to use sourceTensor.clone:UserWarning", # PyTorch tensor copy warnings
+    "ignore:Cannot update the VEGASMap.*integrand which evaluates to zero:RuntimeWarning",       # VEGAS warning for zero integrands
+]
diff --git a/resources/torchquad_convergence.png b/resources/torchquad_convergence.png
index 7812c2c5..a36e0f90 100644
Binary files a/resources/torchquad_convergence.png and b/resources/torchquad_convergence.png differ
diff --git a/resources/torchquad_framework_comparison.png b/resources/torchquad_framework_comparison.png
new file mode 100644
index 00000000..67bc5a7a
Binary files /dev/null and b/resources/torchquad_framework_comparison.png differ
diff --git a/resources/torchquad_runtime.png b/resources/torchquad_runtime.png
deleted file mode 100644
index 8cf0c8a7..00000000
Binary files a/resources/torchquad_runtime.png and /dev/null differ
diff --git a/resources/torchquad_runtime_vs_error.png b/resources/torchquad_runtime_vs_error.png
new file mode 100644
index 00000000..c7ed061c
Binary files /dev/null and b/resources/torchquad_runtime_vs_error.png differ
diff --git a/resources/torchquad_scaling_analysis.png b/resources/torchquad_scaling_analysis.png
new file mode 100644
index 00000000..4e53bf8b
Binary files /dev/null and b/resources/torchquad_scaling_analysis.png differ
diff --git a/resources/torchquad_vectorized_speedup.png b/resources/torchquad_vectorized_speedup.png
new file mode 100644
index 00000000..7a87d0e1
Binary files /dev/null and b/resources/torchquad_vectorized_speedup.png differ
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 8e9c5468..00000000
--- a/setup.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""A setuptools based setup module.
-See:
-https://packaging.python.org/guides/distributing-packages-using-setuptools/
-https://github.com/pypa/sampleproject
-"""
-
-# Always prefer setuptools over distutils
-from setuptools import setup
-
-setup(
-    name="torchquad",
-    version="0.4.1",
-    description="Package providing torch-based numerical integration methods.",
-    long_description=open("README.md").read(),
-    long_description_content_type="text/markdown",
-    url="https://github.com/esa/torchquad",
-    author="ESA Advanced Concepts Team",
-    author_email="pablo.gomez@esa.int",
-    install_requires=[
-        "loguru>=0.5.3",
-        "matplotlib>=3.3.3",
-        "scipy>=1.6.0",
-        "tqdm>=4.56.1",
-        "autoray>=0.5.0",
-    ],
-    classifiers=[
-        "Development Status :: 3 - Alpha",
-        "Intended Audience :: Developers",
-        "Intended Audience :: Science/Research",
-        "Topic :: Scientific/Engineering :: Mathematics",
-        "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
-        "Programming Language :: Python :: 3.8",
-    ],
-    packages=[
-        "torchquad",
-        "torchquad.integration",
-        "torchquad.plots",
-        "torchquad.utils",
-    ],
-    python_requires=">=3.8, <4",
-    project_urls={
-        "Source": "https://github.com/esa/torchquad/",
-    },
-)
diff --git a/torchquad/tests/boole_test.py b/tests/boole_test.py
similarity index 60%
rename from torchquad/tests/boole_test.py
rename to tests/boole_test.py
index aaea3f1a..e4662d6a 100644
--- a/torchquad/tests/boole_test.py
+++ b/tests/boole_test.py
@@ -1,10 +1,8 @@
-import sys
-
-sys.path.append("../")
-
 import warnings
+import torch
+import pytest
 
-from integration.boole import Boole
+from torchquad.integration.boole import Boole
 from helper_functions import (
     compute_integration_test_errors,
     setup_test_for_backend,
@@ -71,9 +69,7 @@ def integrate(*args, **kwargs):
             # which is then re-used on all other integrations (as is the point of JIT).
             nonlocal jit_integrate
             if jit_integrate is None:
-                jit_integrate = bl.get_jit_compiled_integrate(
-                    dim=1, N=N, backend=backend
-                )
+                jit_integrate = bl.get_jit_compiled_integrate(dim=1, N=N, backend=backend)
             return jit_integrate(*args, **kwargs)
 
         errors, funcs = compute_integration_test_errors(
@@ -93,9 +89,7 @@ def integrate(*args, **kwargs):
         for error in errors:
             assert error < 6.33e-11
 
-        jit_integrate = (
-            None  # set to None again so can be re-used with new integrand shape
-        )
+        jit_integrate = None  # set to None again so can be re-used with new integrand shape
 
         errors, funcs = compute_integration_test_errors(
             integrate,
@@ -115,12 +109,70 @@ def integrate(*args, **kwargs):
             assert error < 6.33e-11
 
 
+def test_boole_calculate_result_kwargs():
+    """Test that Boole().calculate_result() works correctly with keyword arguments."""
+
+    def integrand1(x):
+        return torch.rand(x.shape, device=x.device)
+
+    integration_domain = torch.tensor([[0.0, 1.0]])
+    dim = 1
+    N = 125  # Must be 5^n for Boole's rule
+
+    integrator = Boole()
+    grid_points, hs, n_per_dim = integrator.calculate_grid(N, integration_domain)
+    function_values, _ = integrator.evaluate_integrand(integrand1, grid_points)
+
+    # Test with positional arguments (should work as before)
+    integral1 = integrator.calculate_result(function_values, dim, n_per_dim, hs, integration_domain)
+
+    # Test with keyword arguments (this was failing before the fix)
+    integral2 = integrator.calculate_result(
+        function_values=function_values,
+        dim=dim,
+        n_per_dim=n_per_dim,
+        hs=hs,
+        integration_domain=integration_domain,
+    )
+
+    # Test with mixed positional and keyword arguments
+    integral3 = integrator.calculate_result(
+        function_values, dim=dim, n_per_dim=n_per_dim, hs=hs, integration_domain=integration_domain
+    )
+
+    # All results should be approximately equal
+    assert torch.allclose(integral1, integral2, rtol=1e-10)
+    assert torch.allclose(integral1, integral3, rtol=1e-10)
+
+
+def test_boole_calculate_result_error_handling():
+    """Test that Boole().calculate_result() gives meaningful error messages for invalid inputs."""
+
+    integrator = Boole()
+
+    # Test missing function_values argument
+    with pytest.raises(ValueError) as exc_info:
+        integrator.calculate_result(
+            dim=1,
+            n_per_dim=5,
+            hs=torch.tensor([0.25]),
+            integration_domain=torch.tensor([[0.0, 1.0]]),
+        )
+
+    assert "function_values argument not found" in str(exc_info.value)
+    assert "Please provide function_values" in str(exc_info.value)
+
+    # Test with only self argument (no function_values)
+    with pytest.raises(ValueError) as exc_info:
+        integrator.calculate_result()
+
+    assert "function_values argument not found" in str(exc_info.value)
+
+
 test_integrate_numpy = setup_test_for_backend(_run_boole_tests, "numpy", "float64")
 test_integrate_torch = setup_test_for_backend(_run_boole_tests, "torch", "float64")
 test_integrate_jax = setup_test_for_backend(_run_boole_tests, "jax", "float64")
-test_integrate_tensorflow = setup_test_for_backend(
-    _run_boole_tests, "tensorflow", "float64"
-)
+test_integrate_tensorflow = setup_test_for_backend(_run_boole_tests, "tensorflow", "float64")
 
 
 if __name__ == "__main__":
@@ -129,3 +181,8 @@ def integrate(*args, **kwargs):
     test_integrate_torch()
     test_integrate_jax()
     test_integrate_tensorflow()
+
+    # Test the new keyword argument functionality
+    test_boole_calculate_result_kwargs()
+    test_boole_calculate_result_error_handling()
+    print("All Boole keyword argument tests passed!")
diff --git a/torchquad/tests/gauss_test.py b/tests/gauss_test.py
similarity index 87%
rename from torchquad/tests/gauss_test.py
rename to tests/gauss_test.py
index 7cff604a..67a42039 100644
--- a/torchquad/tests/gauss_test.py
+++ b/tests/gauss_test.py
@@ -1,8 +1,4 @@
-import sys
-
-sys.path.append("../")
-
-from integration.gaussian import GaussLegendre
+from torchquad.integration.gaussian import GaussLegendre
 from helper_functions import (
     compute_integration_test_errors,
     setup_test_for_backend,
@@ -27,9 +23,9 @@ def _run_gaussian_tests(backend, _precision):
     print(f"1D {gauss} Test passed. N: {N}, backend: {backend}, Errors: {errors}")
     # Polynomials up to degree 2N-1 can be integrated almost exactly with gaussian.
     for err, test_function in zip(errors, funcs):
-        assert test_function.get_order() > (2 * N - 1) or err < 5.5e-11
+        assert test_function.get_order() > (2 * N - 1) or err < 7e-11
     for error in errors:
-        assert error < 6.33e-11
+        assert error < 7e-11
 
     N = 2  # integration points, here 2 for order check (2 points should lead to almost 0 err for low order polynomials)
 
@@ -84,9 +80,7 @@ def _run_gaussian_tests(backend, _precision):
     )
     print(f"10D {gauss} Test passed. N: {N}, backend: {backend}, Errors: {errors}")
     for err, test_function in zip(errors, funcs):
-        assert (
-            test_function.get_order() > 60 or err < 4e-09
-        )  # poly order should be relatively high
+        assert test_function.get_order() > 60 or err < 4e-09  # poly order should be relatively high
     for error in errors:
         assert error < 1e-5
 
@@ -100,9 +94,7 @@ def integrate(*args, **kwargs):
             # which is then re-used on all other integrations (as is the point of JIT).
             nonlocal jit_integrate
             if jit_integrate is None:
-                jit_integrate = gauss.get_jit_compiled_integrate(
-                    dim=1, N=N, backend=backend
-                )
+                jit_integrate = gauss.get_jit_compiled_integrate(dim=1, N=N, backend=backend)
             return jit_integrate(*args, **kwargs)
 
         errors, funcs = compute_integration_test_errors(
@@ -114,18 +106,14 @@ def integrate(*args, **kwargs):
             filter_test_functions=lambda x: x.is_integrand_1d,
         )
 
-        print(
-            f"1D Gaussian JIT Test passed. N: {N}, backend: {backend}, Errors: {errors}"
-        )
+        print(f"1D Gaussian JIT Test passed. N: {N}, backend: {backend}, Errors: {errors}")
         # Polynomials up to degree 2N-1 can be integrated almost exactly with gaussian.
         for err, test_function in zip(errors, funcs):
             assert test_function.get_order() > (2 * N - 1) or err < 2e-10
         for error in errors:
-            assert error < 6.33e-11
+            assert error < 7e-11
 
-        jit_integrate = (
-            None  # set to None again so can be re-used with new integrand shape
-        )
+        jit_integrate = None  # set to None again so can be re-used with new integrand shape
 
         errors, funcs = compute_integration_test_errors(
             integrate,
@@ -147,9 +135,7 @@ def integrate(*args, **kwargs):
 test_integrate_numpy = setup_test_for_backend(_run_gaussian_tests, "numpy", "float64")
 test_integrate_torch = setup_test_for_backend(_run_gaussian_tests, "torch", "float64")
 test_integrate_jax = setup_test_for_backend(_run_gaussian_tests, "jax", "float64")
-test_integrate_tensorflow = setup_test_for_backend(
-    _run_gaussian_tests, "tensorflow", "float64"
-)
+test_integrate_tensorflow = setup_test_for_backend(_run_gaussian_tests, "tensorflow", "float64")
 
 
 if __name__ == "__main__":
diff --git a/torchquad/tests/gradient_test.py b/tests/gradient_test.py
similarity index 93%
rename from torchquad/tests/gradient_test.py
rename to tests/gradient_test.py
index 4487d414..14a3a8da 100644
--- a/torchquad/tests/gradient_test.py
+++ b/tests/gradient_test.py
@@ -1,17 +1,13 @@
-import sys
-
-sys.path.append("../")
-
 from autoray import numpy as anp
 from autoray import to_numpy, to_backend_dtype, get_dtype_name
 import numpy as np
 
-from integration.vegas import VEGAS
-from integration.monte_carlo import MonteCarlo
-from integration.trapezoid import Trapezoid
-from integration.simpson import Simpson
-from integration.boole import Boole
-from integration.gaussian import GaussLegendre
+from torchquad.integration.vegas import VEGAS
+from torchquad.integration.monte_carlo import MonteCarlo
+from torchquad.integration.trapezoid import Trapezoid
+from torchquad.integration.simpson import Simpson
+from torchquad.integration.boole import Boole
+from torchquad.integration.gaussian import GaussLegendre
 
 from helper_functions import setup_test_for_backend
 
@@ -158,9 +154,7 @@ def _calculate_gradient_over_param(
     return _calculate_gradient(
         backend,
         param,
-        lambda par: integrate(
-            lambda x: integrand_with_param(x, par), **integrate_kwargs
-        ),
+        lambda par: integrate(lambda x: integrand_with_param(x, par), **integrate_kwargs),
         dtype_name,
     )
 
@@ -188,9 +182,7 @@ def _run_gradient_tests(backend, dtype_name):
             # Currently VEGAS supports only Torch.
             continue
 
-        print(
-            f"Calculating gradients; backend: {backend}, integrator: {integrator_name}"
-        )
+        print(f"Calculating gradients; backend: {backend}, integrator: {integrator_name}")
 
         print("Calculating gradients of the one-dimensional V-shaped function")
         integrate_kwargs = {"fn": _v_function, "dim": 1, "N": N_1d}
@@ -269,9 +261,7 @@ def _run_gradient_tests(backend, dtype_name):
 
 test_gradients_torch = setup_test_for_backend(_run_gradient_tests, "torch", "float64")
 test_gradients_jax = setup_test_for_backend(_run_gradient_tests, "jax", "float64")
-test_gradients_tensorflow = setup_test_for_backend(
-    _run_gradient_tests, "tensorflow", "float64"
-)
+test_gradients_tensorflow = setup_test_for_backend(_run_gradient_tests, "tensorflow", "float64")
 
 if __name__ == "__main__":
     # used to run this test individually
diff --git a/torchquad/tests/helper_functions.py b/tests/helper_functions.py
similarity index 87%
rename from torchquad/tests/helper_functions.py
rename to tests/helper_functions.py
index 88a8ee84..a284cc5d 100644
--- a/torchquad/tests/helper_functions.py
+++ b/tests/helper_functions.py
@@ -3,9 +3,9 @@
 from autoray import numpy as anp
 import autoray as ar
 
-from integration_test_functions import Polynomial, Exponential, Sinusoid
-from utils.set_up_backend import set_up_backend
-from utils.set_log_level import set_log_level
+from integration_test_functions import Polynomial, Exponential, Sinusoid, ProductFunction
+from torchquad.utils.set_up_backend import set_up_backend
+from torchquad.utils.set_log_level import set_log_level
 
 
 def get_test_functions(integration_dim, backend, use_multi_dim_integrand):
@@ -19,12 +19,8 @@ def get_test_functions(integration_dim, backend, use_multi_dim_integrand):
     if integration_dim == 1:
         res = [
             # Real numbers
-            Polynomial(
-                4.0, [2.0], is_complex=False, backend=backend, integrand_dims=1
-            ),  # y = 2
-            Polynomial(
-                0, [0, 1], is_complex=False, backend=backend, integrand_dims=1
-            ),  # y = x
+            Polynomial(4.0, [2.0], is_complex=False, backend=backend, integrand_dims=1),  # y = 2
+            Polynomial(0, [0, 1], is_complex=False, backend=backend, integrand_dims=1),  # y = x
             Polynomial(
                 2 / 3,
                 [0, 0, 2],
@@ -81,14 +77,26 @@ def get_test_functions(integration_dim, backend, use_multi_dim_integrand):
                 backend=backend,
                 integrand_dims=1,
             ),
+            # Product function: cos(x) from 0 to pi/2
+            ProductFunction(
+                1.0,  # integral of cos(x) from 0 to pi/2
+                domain=[[0, np.pi / 2]],
+                is_complex=False,
+                backend=backend,
+                integrand_dims=1,
+            ),
+            # More sinusoidal functions for better transcendental coverage
+            Sinusoid(
+                2.0,  # integral of sin(x) from 0 to pi = [-cos(x)]_0^pi = -(-1) - (-1) = 2
+                domain=[[0, np.pi]],
+                is_complex=False,
+                backend=backend,
+                integrand_dims=1,
+            ),
             #
             # Complex numbers
-            Polynomial(
-                4.0j, [2.0j], is_complex=True, backend=backend, integrand_dims=1
-            ),  # y = 2j
-            Polynomial(
-                0, [0, 1j], is_complex=True, backend=backend, integrand_dims=1
-            ),  # y = xj
+            Polynomial(4.0j, [2.0j], is_complex=True, backend=backend, integrand_dims=1),  # y = 2j
+            Polynomial(0, [0, 1j], is_complex=True, backend=backend, integrand_dims=1),  # y = xj
             # y=7x^4-3jx^3+2x^2-jx+3
             Polynomial(
                 44648.0 / 15.0,
@@ -202,6 +210,15 @@ def get_test_functions(integration_dim, backend, use_multi_dim_integrand):
                 backend=backend,
                 integrand_dims=1,
             ),
+            # 3D Product function: cos(x)*cos(y)*cos(z) over [0, pi/2]^3
+            ProductFunction(
+                1.0,  # (sin(pi/2) - sin(0))^3 = 1
+                integration_dim=3,
+                domain=[[0, np.pi / 2], [0, np.pi / 2], [0, np.pi / 2]],
+                is_complex=False,
+                backend=backend,
+                integrand_dims=1,
+            ),
             #
             # Complex numbers
             Polynomial(
@@ -264,9 +281,7 @@ def get_test_functions(integration_dim, backend, use_multi_dim_integrand):
                 ),  # f(x,y,z) = x + y + z
                 # Over 3 integrand dims
                 Polynomial(  # MC tests fail here with default float32 precision, so need float64
-                    np.array(
-                        [[[0.0, 48.0], [96.0, 144.0]], [[192.0, 240.0], [288.0, 336.0]]]
-                    ),
+                    np.array([[[0.0, 48.0], [96.0, 144.0]], [[192.0, 240.0], [288.0, 336.0]]]),
                     integration_dim=3,
                     domain=anp.array(
                         [[-1.0, 1.0], [-1.0, 1.0], [-1.0, 1.0]],
@@ -312,8 +327,7 @@ def get_test_functions(integration_dim, backend, use_multi_dim_integrand):
         ]
     else:
         raise ValueError(
-            "Not testing functions implemented for integration_dim "
-            + str(integration_dim)
+            "Not testing functions implemented for integration_dim " + str(integration_dim)
         )
 
 
@@ -353,16 +367,12 @@ def compute_integration_test_errors(
             continue
         if backend == "torch":
             diff = np.abs(
-                test_function.evaluate(integrator, integrator_args)
-                .cpu()
-                .detach()
-                .numpy()
+                test_function.evaluate(integrator, integrator_args).cpu().detach().numpy()
                 - test_function.expected_result
             )
         else:
             diff = np.abs(
-                test_function.evaluate(integrator, integrator_args)
-                - test_function.expected_result
+                test_function.evaluate(integrator, integrator_args) - test_function.expected_result
             )
         if test_function.is_integrand_1d:
             errors.append(diff)
diff --git a/torchquad/tests/integration_grid_test.py b/tests/integration_grid_test.py
similarity index 88%
rename from torchquad/tests/integration_grid_test.py
rename to tests/integration_grid_test.py
index 6f77fc1a..3a460a31 100644
--- a/torchquad/tests/integration_grid_test.py
+++ b/tests/integration_grid_test.py
@@ -7,9 +7,9 @@
 from autoray import numpy as anp
 from autoray import to_backend_dtype
 import autoray as ar
-from integration.integration_grid import IntegrationGrid
-from integration.grid_integrator import GridIntegrator
-from integration.utils import _linspace_with_grads
+from torchquad.integration.integration_grid import IntegrationGrid
+from torchquad.integration.grid_integrator import GridIntegrator
+from torchquad.integration.utils import _linspace_with_grads
 from helper_functions import setup_test_for_backend
 
 
@@ -54,17 +54,11 @@ def _check_grid_validity(grid, integration_domain, N, eps):
         int(N),
         integration_domain.shape[0],
     ), "Incorrect number of calculated points"
-    assert (
-        grid.points.dtype == integration_domain.dtype
-    ), "Grid points have an incorrect dtype"
-    assert (
-        grid.h.dtype == integration_domain.dtype
-    ), "Mesh widths have an incorrect dtype"
+    assert grid.points.dtype == integration_domain.dtype, "Grid points have an incorrect dtype"
+    assert grid.h.dtype == integration_domain.dtype, "Mesh widths have an incorrect dtype"
     for dim in range(len(integration_domain)):
         domain_width = integration_domain[dim][1] - integration_domain[dim][0]
-        assert (
-            anp.abs(grid.h[dim] - domain_width / (grid._N - 1)) < eps
-        ), "Incorrect mesh width"
+        assert anp.abs(grid.h[dim] - domain_width / (grid._N - 1)) < eps, "Incorrect mesh width"
         assert (
             anp.min(grid.points[:, dim]) >= integration_domain[dim][0]
         ), "Points are outside of the integration domain"
@@ -112,9 +106,7 @@ def _run_integration_grid_tests(backend, dtype_name):
 
     def grid_check(x):
         has_right_shape = x.shape == (N, 3)
-        has_right_vals = anp.all(ar.to_numpy(x[0, :]) == 0) and anp.all(
-            ar.to_numpy(x[-1, :]) == 1
-        )
+        has_right_vals = anp.all(ar.to_numpy(x[0, :]) == 0) and anp.all(ar.to_numpy(x[-1, :]) == 1)
         return has_right_shape and has_right_vals
 
     mock_integrator_no_check.integrate(
@@ -144,9 +136,7 @@ def grid_check(x):
 test_integration_grid_torch = setup_test_for_backend(
     _run_integration_grid_tests, "torch", "float64"
 )
-test_integration_grid_jax = setup_test_for_backend(
-    _run_integration_grid_tests, "jax", "float64"
-)
+test_integration_grid_jax = setup_test_for_backend(_run_integration_grid_tests, "jax", "float64")
 test_integration_grid_tensorflow = setup_test_for_backend(
     _run_integration_grid_tests, "tensorflow", "float64"
 )
diff --git a/torchquad/tests/integration_test_functions.py b/tests/integration_test_functions.py
similarity index 86%
rename from torchquad/tests/integration_test_functions.py
rename to tests/integration_test_functions.py
index ec4d79f4..97b9df53 100644
--- a/torchquad/tests/integration_test_functions.py
+++ b/tests/integration_test_functions.py
@@ -1,13 +1,9 @@
-import sys
-
-sys.path.append("../")
-
 from autoray import numpy as anp
 from autoray import infer_backend
 from numpy import inf
 from loguru import logger
 
-from integration.utils import _setup_integration_domain
+from torchquad.integration.utils import _setup_integration_domain
 
 
 class IntegrationTestFunction:
@@ -87,9 +83,7 @@ def integrand(x):
             )
             return self.integrand_scaling(self.f(x))
 
-        return integrator(
-            fn=integrand, integration_domain=self.domain, **integration_args
-        )
+        return integrator(fn=integrand, integration_domain=self.domain, **integration_args)
 
     def get_order(self):
         """Get the order (polynomial degree) of the function
@@ -113,9 +107,7 @@ def integrand_scaling(self, integrand):
         if self.is_integrand_1d:
             return integrand_scaling * integrand
         if self._is_integrand_tensor:
-            scaling_einsum = "".join(
-                [chr(i + 65) for i in range(len(self.integrand_dims))]
-            )
+            scaling_einsum = "".join([chr(i + 65) for i in range(len(self.integrand_dims))])
             return anp.einsum(
                 f"i,{scaling_einsum}->i{scaling_einsum}", integrand, integrand_scaling
             )
@@ -293,3 +285,41 @@ def __init__(
 
     def _sinusoid(self, x):
         return anp.sum(anp.sin(x), axis=1)
+
+
+class ProductFunction(IntegrationTestFunction):
+    def __init__(
+        self,
+        expected_result=None,
+        integration_dim=1,
+        domain=None,
+        is_complex=False,
+        backend=None,
+        integrand_dims=1,
+    ):
+        """Creates an n-dimensional product test function.
+
+        f(x) = prod(cos(x_i)) - product of cosines across dimensions
+
+        Args:
+            expected_result (backend tensor): Expected result. Required to compute errors.
+            integration_dim (int, optional): Input dimension. Defaults to 1.
+            domain (list or backend tensor, optional): Integration domain passed to _setup_integration_domain.
+            is_complex (Boolean): If the test function contains complex numbers. Defaults to False.
+            backend (string, optional): Numerical backend passed to _setup_integration_domain.
+            integrand_dims (Union[int, tuple], optional): Defaults to 1.  Should either be 1 or a tuple.  Determines how the integrand will be evaluated,
+            whether once or over a matrix/vector of scaling factors.
+        """
+        super().__init__(
+            expected_result,
+            integration_dim,
+            domain,
+            is_complex,
+            backend,
+            integrand_dims,
+        )
+        self.f = self._product
+
+    def _product(self, x):
+        # Compute product of cos(x_i) across dimensions
+        return anp.prod(anp.cos(x), axis=1)
diff --git a/torchquad/tests/integrator_types_test.py b/tests/integrator_types_test.py
old mode 100755
new mode 100644
similarity index 91%
rename from torchquad/tests/integrator_types_test.py
rename to tests/integrator_types_test.py
index de48283a..49671ae2
--- a/torchquad/tests/integrator_types_test.py
+++ b/tests/integrator_types_test.py
@@ -3,20 +3,16 @@
 Additional integration tests to check if dtypes, shapes and similar
 backend-specific properties
 """
-import sys
-
-sys.path.append("../")
-
 from autoray import numpy as anp
 from autoray import infer_backend, get_dtype_name, to_backend_dtype
 from itertools import product
 
-from integration.trapezoid import Trapezoid
-from integration.simpson import Simpson
-from integration.boole import Boole
-from integration.monte_carlo import MonteCarlo
-from integration.vegas import VEGAS
-from utils.set_precision import set_precision
+from torchquad.integration.trapezoid import Trapezoid
+from torchquad.integration.simpson import Simpson
+from torchquad.integration.boole import Boole
+from torchquad.integration.monte_carlo import MonteCarlo
+from torchquad.integration.vegas import VEGAS
+from torchquad.utils.set_precision import set_precision
 from helper_functions import setup_test_for_backend
 
 
@@ -120,9 +116,7 @@ def fn_const(x):
 test_integrate_numpy = setup_test_for_backend(_run_simple_integrations, "numpy", None)
 test_integrate_torch = setup_test_for_backend(_run_simple_integrations, "torch", None)
 test_integrate_jax = setup_test_for_backend(_run_simple_integrations, "jax", None)
-test_integrate_tensorflow = setup_test_for_backend(
-    _run_simple_integrations, "tensorflow", None
-)
+test_integrate_tensorflow = setup_test_for_backend(_run_simple_integrations, "tensorflow", None)
 
 
 if __name__ == "__main__":
diff --git a/torchquad/tests/monte_carlo_test.py b/tests/monte_carlo_test.py
similarity index 57%
rename from torchquad/tests/monte_carlo_test.py
rename to tests/monte_carlo_test.py
index 48cc3c62..4a617f46 100644
--- a/torchquad/tests/monte_carlo_test.py
+++ b/tests/monte_carlo_test.py
@@ -1,8 +1,7 @@
-import sys
+import torch
+import pytest
 
-sys.path.append("../")
-
-from integration.monte_carlo import MonteCarlo
+from torchquad.integration.monte_carlo import MonteCarlo
 from helper_functions import (
     compute_integration_test_errors,
     setup_test_for_backend,
@@ -24,9 +23,7 @@ def _run_monte_carlo_tests(backend, _precision):
         use_complex=True,
         backend=backend,
     )
-    print(
-        f"1D Monte Carlo Test passed. N: {N}, backend: {backend}, Errors: {str(errors)}"
-    )
+    print(f"1D Monte Carlo Test passed. N: {N}, backend: {backend}, Errors: {str(errors)}")
     # Constant functions can be integrated exactly with MonteCarlo.
     # (at least our example functions)
     for err, test_function in zip(errors, funcs):
@@ -54,9 +51,7 @@ def _run_monte_carlo_tests(backend, _precision):
         use_complex=True,
         backend=backend,
     )
-    print(
-        f"3D Monte Carlo Test passed. N: {N}, backend: {backend}, Errors: {str(errors)}"
-    )
+    print(f"3D Monte Carlo Test passed. N: {N}, backend: {backend}, Errors: {str(errors)}")
     for err, test_function in zip(errors, funcs):
         assert test_function.get_order() > 0 or err == 0.0
     for error, test_function in zip(errors, funcs):
@@ -73,10 +68,7 @@ def _run_monte_carlo_tests(backend, _precision):
         use_complex=True,
         backend=backend,
     )
-    print(
-        f"10D Monte Carlo Test passed. N: {N}, backend: {backend}, Errors:"
-        f" {str(errors)}"
-    )
+    print(f"10D Monte Carlo Test passed. N: {N}, backend: {backend}, Errors:" f" {str(errors)}")
     for err, test_function in zip(errors, funcs):
         assert test_function.get_order() > 0 or err == 0.0
     for error in errors:
@@ -92,9 +84,7 @@ def integrate(*args, **kwargs):
             # which is then re-used on all other integrations (as is the point of JIT).
             nonlocal jit_integrate
             if jit_integrate is None:
-                jit_integrate = mc.get_jit_compiled_integrate(
-                    dim=1, N=N, backend=backend
-                )
+                jit_integrate = mc.get_jit_compiled_integrate(dim=1, N=N, backend=backend)
             return jit_integrate(*args, **kwargs)
 
         errors, funcs = compute_integration_test_errors(
@@ -125,9 +115,7 @@ def integrate(*args, **kwargs):
         for error in errors[10:]:
             assert error < 35.0
 
-        jit_integrate = (
-            None  # set to None again so can be re-used with new integrand shape
-        )
+        jit_integrate = None  # set to None again so can be re-used with new integrand shape
 
         errors, funcs = compute_integration_test_errors(
             integrate,
@@ -145,16 +133,59 @@ def integrate(*args, **kwargs):
             assert test_function.get_order() > 0 or err < 1e-14
 
 
-test_integrate_numpy = setup_test_for_backend(
-    _run_monte_carlo_tests, "numpy", "float32"
-)
-test_integrate_torch = setup_test_for_backend(
-    _run_monte_carlo_tests, "torch", "float32"
-)
+def test_monte_carlo_calculate_result_kwargs():
+    """Test that MonteCarlo().calculate_result() works correctly with keyword arguments."""
+
+    def integrand1(x):
+        return torch.rand(x.shape, device=x.device)
+
+    # Use the same device as the current default to avoid device mismatch
+    integration_domain = torch.tensor([[0.0, 1.0]])
+    N = 1000
+
+    integrator = MonteCarlo()
+    sample_points = integrator.calculate_sample_points(N, integration_domain, seed=42)
+    function_values, _ = integrator.evaluate_integrand(integrand1, sample_points)
+
+    # Test with positional arguments (should work as before)
+    integral1 = integrator.calculate_result(function_values, integration_domain)
+
+    # Test with keyword arguments (this was failing before the fix)
+    integral2 = integrator.calculate_result(
+        function_values=function_values, integration_domain=integration_domain
+    )
+
+    # Test with mixed positional and keyword arguments
+    integral3 = integrator.calculate_result(function_values, integration_domain=integration_domain)
+
+    # All results should be approximately equal
+    assert torch.allclose(integral1, integral2, rtol=1e-10)
+    assert torch.allclose(integral1, integral3, rtol=1e-10)
+
+
+def test_monte_carlo_calculate_result_error_handling():
+    """Test that MonteCarlo().calculate_result() gives meaningful error messages for invalid inputs."""
+
+    integrator = MonteCarlo()
+
+    # Test missing function_values argument
+    with pytest.raises(ValueError) as exc_info:
+        integrator.calculate_result(integration_domain=torch.tensor([[0.0, 1.0]]))
+
+    assert "function_values argument not found" in str(exc_info.value)
+    assert "Please provide function_values" in str(exc_info.value)
+
+    # Test with only self argument (no function_values)
+    with pytest.raises(ValueError) as exc_info:
+        integrator.calculate_result()
+
+    assert "function_values argument not found" in str(exc_info.value)
+
+
+test_integrate_numpy = setup_test_for_backend(_run_monte_carlo_tests, "numpy", "float32")
+test_integrate_torch = setup_test_for_backend(_run_monte_carlo_tests, "torch", "float32")
 test_integrate_jax = setup_test_for_backend(_run_monte_carlo_tests, "jax", "float32")
-test_integrate_tensorflow = setup_test_for_backend(
-    _run_monte_carlo_tests, "tensorflow", "float32"
-)
+test_integrate_tensorflow = setup_test_for_backend(_run_monte_carlo_tests, "tensorflow", "float32")
 
 
 if __name__ == "__main__":
@@ -163,3 +194,8 @@ def integrate(*args, **kwargs):
     test_integrate_torch()
     test_integrate_jax()
     test_integrate_tensorflow()
+
+    # Test the new keyword argument functionality
+    test_monte_carlo_calculate_result_kwargs()
+    test_monte_carlo_calculate_result_error_handling()
+    print("All MonteCarlo keyword argument tests passed!")
diff --git a/tests/rng_test.py b/tests/rng_test.py
new file mode 100644
index 00000000..691af565
--- /dev/null
+++ b/tests/rng_test.py
@@ -0,0 +1,252 @@
+from autoray import numpy as anp
+from autoray import infer_backend, get_dtype_name, to_backend_dtype, to_numpy
+import pytest
+
+from torchquad.integration.rng import RNG
+
+from helper_functions import setup_test_for_backend
+
+
+def _run_RNG_tests(backend, dtype_name):
+    """
+    Test the random number generator with the given numerical backend
+    * With the same seed, the same numbers should be generated
+    * With different seeds, different numbers should be generated
+    * If seed is None / omitted, the RNG should be randomly seeded
+    """
+    backend_dtype = to_backend_dtype(dtype_name, like=backend)
+    size = [3, 9]
+    generateds = [
+        RNG(backend, 547).uniform(size=size, dtype=backend_dtype),
+        RNG(backend, None).uniform(size=size, dtype=backend_dtype),
+        RNG(backend, 547).uniform(size=size, dtype=backend_dtype),
+        RNG(backend).uniform(size=size, dtype=backend_dtype),
+        RNG(backend, 42).uniform(size=size, dtype=backend_dtype),
+    ]
+    numpy_arrs = list(map(to_numpy, generateds))
+
+    # Validity of the backend, dtype, shape and values range
+    assert all(infer_backend(arr) == backend for arr in generateds)
+    assert all(get_dtype_name(arr) == dtype_name for arr in generateds)
+    assert all(arr.shape == (3, 9) for arr in generateds)
+    assert all(0.0 <= x <= 1.0 for arr in numpy_arrs for x in arr.ravel())
+
+    # Test if the seed argument leads to consistent results and
+    # if omitting a seed leads to random numbers
+    assert anp.array_equal(numpy_arrs[0], numpy_arrs[2])
+    for i1 in range(len(generateds)):
+        for i2 in range(i1 + 1, len(generateds)):
+            if i1 == 0 and i2 == 2:
+                continue
+            # With a very low probability this may fail
+            assert not anp.array_equal(numpy_arrs[i1], numpy_arrs[i2])
+
+
+def _run_torch_save_state_tests(backend, dtype_name):
+    """
+    Test torch_save_state functionality for PyTorch backend
+    """
+    if backend != "torch":
+        pytest.skip("torch_save_state tests only applicable for PyTorch backend")
+
+    backend_dtype = to_backend_dtype(dtype_name, like=backend)
+    size = [2, 3]
+
+    # Test that torch_save_state=True works and produces valid arrays
+    rng_save_true = RNG(backend, seed=123, torch_save_state=True)
+    arr_save_true = rng_save_true.uniform(size=size, dtype=backend_dtype)
+
+    # Check basic properties
+    assert arr_save_true.shape == (2, 3)
+    assert all(0.0 <= x <= 1.0 for x in to_numpy(arr_save_true).ravel())
+
+    # Test that torch_save_state=False works and produces valid arrays
+    rng_save_false = RNG(backend, seed=123, torch_save_state=False)
+    arr_save_false = rng_save_false.uniform(size=size, dtype=backend_dtype)
+
+    # Check basic properties
+    assert arr_save_false.shape == (2, 3)
+    assert all(0.0 <= x <= 1.0 for x in to_numpy(arr_save_false).ravel())
+
+    # Test that both modes produce some randomness
+    arr2_save_true = rng_save_true.uniform(size=size, dtype=backend_dtype)
+    arr2_save_false = rng_save_false.uniform(size=size, dtype=backend_dtype)
+
+    # Second calls should be different from first calls
+    assert not anp.array_equal(to_numpy(arr_save_true), to_numpy(arr2_save_true))
+    assert not anp.array_equal(to_numpy(arr_save_false), to_numpy(arr2_save_false))
+
+
+def _run_jax_key_tests(backend, dtype_name):
+    """
+    Test JAX key get/set functionality
+    """
+    if backend != "jax":
+        pytest.skip("JAX key tests only applicable for JAX backend")
+
+    backend_dtype = to_backend_dtype(dtype_name, like=backend)
+    size = [2, 3]
+
+    rng = RNG(backend, seed=42)
+
+    # Test getting the key
+    key1 = rng.jax_get_key()
+    assert key1 is not None
+
+    # Generate some numbers
+    arr1 = rng.uniform(size=size, dtype=backend_dtype)
+
+    # Key should have changed after generation
+    key2 = rng.jax_get_key()
+    assert not anp.array_equal(to_numpy(key1), to_numpy(key2))
+
+    # Test setting the key back
+    rng.jax_set_key(key1)
+    key3 = rng.jax_get_key()
+    assert anp.array_equal(to_numpy(key1), to_numpy(key3))
+
+    # Should generate the same numbers again
+    arr2 = rng.uniform(size=size, dtype=backend_dtype)
+    assert anp.array_equal(to_numpy(arr1), to_numpy(arr2))
+
+
+def _run_edge_case_tests(backend, dtype_name):
+    """
+    Test edge cases and error conditions
+    """
+    backend_dtype = to_backend_dtype(dtype_name, like=backend)
+
+    # Test with zero size
+    rng = RNG(backend, seed=42)
+    arr = rng.uniform(size=[0], dtype=backend_dtype)
+    assert arr.shape == (0,)
+
+    # Test with large size
+    large_size = [10, 10]
+    arr_large = rng.uniform(size=large_size, dtype=backend_dtype)
+    assert arr_large.shape == (10, 10)
+    assert all(0.0 <= x <= 1.0 for x in to_numpy(arr_large).ravel())
+
+    # Test multiple calls produce different results (unless seeded identically)
+    arr1 = rng.uniform(size=[5], dtype=backend_dtype)
+    arr2 = rng.uniform(size=[5], dtype=backend_dtype)
+    assert not anp.array_equal(to_numpy(arr1), to_numpy(arr2))
+
+
+def _run_backend_consistency_tests(backend, dtype_name):
+    """
+    Test that the uniform method generates consistent results with same seed
+    """
+    backend_dtype = to_backend_dtype(dtype_name, like=backend)
+    size = [3, 4]
+
+    if backend == "torch":
+        # For PyTorch, test torch_save_state functionality for consistency
+        rng1 = RNG(backend, seed=12345, torch_save_state=True)
+        rng2 = RNG(backend, seed=12345, torch_save_state=True)
+
+        # Generate arrays - first calls should be equal
+        arr1_first = rng1.uniform(size=size, dtype=backend_dtype)
+        arr2_first = rng2.uniform(size=size, dtype=backend_dtype)
+        assert anp.array_equal(to_numpy(arr1_first), to_numpy(arr2_first))
+
+        # Test that consecutive calls from same RNG are different
+        arr1_second = rng1.uniform(size=size, dtype=backend_dtype)
+        assert not anp.array_equal(to_numpy(arr1_first), to_numpy(arr1_second))
+    else:
+        # For other backends, test normal seeding behavior
+        rng1 = RNG(backend, seed=12345)
+        rng2 = RNG(backend, seed=12345)
+
+        # Generate first arrays - should be equal
+        arr1 = rng1.uniform(size=size, dtype=backend_dtype)
+        arr2 = rng2.uniform(size=size, dtype=backend_dtype)
+        assert anp.array_equal(to_numpy(arr1), to_numpy(arr2))
+
+        # Test with different seed
+        rng3 = RNG(backend, seed=54321)
+        arr3 = rng3.uniform(size=size, dtype=backend_dtype)
+        assert not anp.array_equal(to_numpy(arr1), to_numpy(arr3))
+
+
+# Original tests
+test_rng_jax_f32 = setup_test_for_backend(_run_RNG_tests, "jax", "float32")
+test_rng_jax_f64 = setup_test_for_backend(_run_RNG_tests, "jax", "float64")
+test_rng_numpy_f32 = setup_test_for_backend(_run_RNG_tests, "numpy", "float32")
+test_rng_numpy_f64 = setup_test_for_backend(_run_RNG_tests, "numpy", "float64")
+test_rng_torch_f32 = setup_test_for_backend(_run_RNG_tests, "torch", "float32")
+test_rng_torch_f64 = setup_test_for_backend(_run_RNG_tests, "torch", "float64")
+test_rng_tensorflow_f32 = setup_test_for_backend(_run_RNG_tests, "tensorflow", "float32")
+test_rng_tensorflow_f64 = setup_test_for_backend(_run_RNG_tests, "tensorflow", "float64")
+
+# Additional comprehensive tests
+test_torch_save_state_f32 = setup_test_for_backend(_run_torch_save_state_tests, "torch", "float32")
+test_torch_save_state_f64 = setup_test_for_backend(_run_torch_save_state_tests, "torch", "float64")
+
+test_jax_key_f32 = setup_test_for_backend(_run_jax_key_tests, "jax", "float32")
+test_jax_key_f64 = setup_test_for_backend(_run_jax_key_tests, "jax", "float64")
+
+test_edge_cases_numpy_f32 = setup_test_for_backend(_run_edge_case_tests, "numpy", "float32")
+test_edge_cases_numpy_f64 = setup_test_for_backend(_run_edge_case_tests, "numpy", "float64")
+test_edge_cases_torch_f32 = setup_test_for_backend(_run_edge_case_tests, "torch", "float32")
+test_edge_cases_torch_f64 = setup_test_for_backend(_run_edge_case_tests, "torch", "float64")
+test_edge_cases_jax_f32 = setup_test_for_backend(_run_edge_case_tests, "jax", "float32")
+test_edge_cases_jax_f64 = setup_test_for_backend(_run_edge_case_tests, "jax", "float64")
+test_edge_cases_tensorflow_f32 = setup_test_for_backend(
+    _run_edge_case_tests, "tensorflow", "float32"
+)
+test_edge_cases_tensorflow_f64 = setup_test_for_backend(
+    _run_edge_case_tests, "tensorflow", "float64"
+)
+
+test_consistency_numpy_f32 = setup_test_for_backend(
+    _run_backend_consistency_tests, "numpy", "float32"
+)
+test_consistency_numpy_f64 = setup_test_for_backend(
+    _run_backend_consistency_tests, "numpy", "float64"
+)
+test_consistency_torch_f32 = setup_test_for_backend(
+    _run_backend_consistency_tests, "torch", "float32"
+)
+test_consistency_torch_f64 = setup_test_for_backend(
+    _run_backend_consistency_tests, "torch", "float64"
+)
+test_consistency_jax_f32 = setup_test_for_backend(_run_backend_consistency_tests, "jax", "float32")
+test_consistency_jax_f64 = setup_test_for_backend(_run_backend_consistency_tests, "jax", "float64")
+test_consistency_tensorflow_f32 = setup_test_for_backend(
+    _run_backend_consistency_tests, "tensorflow", "float32"
+)
+test_consistency_tensorflow_f64 = setup_test_for_backend(
+    _run_backend_consistency_tests, "tensorflow", "float64"
+)
+
+
+if __name__ == "__main__":
+    # used to run this test individually
+    # Original tests
+    test_rng_numpy_f32()
+    test_rng_numpy_f64()
+    test_rng_torch_f32()
+    test_rng_torch_f64()
+    test_rng_jax_f32()
+    test_rng_jax_f64()
+    test_rng_tensorflow_f32()
+    test_rng_tensorflow_f64()
+
+    # Additional comprehensive tests
+    test_torch_save_state_f32()
+    test_torch_save_state_f64()
+    test_jax_key_f32()
+    test_jax_key_f64()
+
+    # Edge case tests
+    test_edge_cases_numpy_f32()
+    test_edge_cases_torch_f32()
+    test_edge_cases_jax_f32()
+    test_edge_cases_tensorflow_f32()
+
+    # Consistency tests
+    test_consistency_numpy_f32()
+    test_consistency_torch_f32()
+    test_consistency_jax_f32()
+    test_consistency_tensorflow_f32()
diff --git a/torchquad/tests/simpson_test.py b/tests/simpson_test.py
similarity index 66%
rename from torchquad/tests/simpson_test.py
rename to tests/simpson_test.py
index b9865ac8..ba62ad80 100644
--- a/torchquad/tests/simpson_test.py
+++ b/tests/simpson_test.py
@@ -1,10 +1,8 @@
-import sys
-
-sys.path.append("../")
-
 import warnings
+import torch
+import pytest
 
-from integration.simpson import Simpson
+from torchquad.integration.simpson import Simpson
 from helper_functions import (
     compute_integration_test_errors,
     setup_test_for_backend,
@@ -99,9 +97,7 @@ def integrate(*args, **kwargs):
             # which is then re-used on all other integrations (as is the point of JIT).
             nonlocal jit_integrate
             if jit_integrate is None:
-                jit_integrate = simp.get_jit_compiled_integrate(
-                    dim=1, N=N, backend=backend
-                )
+                jit_integrate = simp.get_jit_compiled_integrate(dim=1, N=N, backend=backend)
             return jit_integrate(*args, **kwargs)
 
         errors, funcs = compute_integration_test_errors(
@@ -113,9 +109,7 @@ def integrate(*args, **kwargs):
             filter_test_functions=lambda x: x.is_integrand_1d,
         )
 
-        print(
-            f"1D Simpson JIT Test passed. N: {N}, backend: {backend}, Errors: {errors}"
-        )
+        print(f"1D Simpson JIT Test passed. N: {N}, backend: {backend}, Errors: {errors}")
         for err, test_function in zip(errors, funcs):
             assert test_function.get_order() > 3 or (
                 err < 3e-11 if test_function.is_integrand_1d else err < 6e-10
@@ -123,9 +117,7 @@ def integrate(*args, **kwargs):
         for error in errors:
             assert error < 1e-7
 
-        jit_integrate = (
-            None  # set to None again so can be re-used with new integrand shape
-        )
+        jit_integrate = None  # set to None again so can be re-used with new integrand shape
 
         errors, funcs = compute_integration_test_errors(
             integrate,
@@ -146,12 +138,70 @@ def integrate(*args, **kwargs):
             assert error < 1e-7
 
 
+def test_simpson_calculate_result_kwargs():
+    """Test that Simpson().calculate_result() works correctly with keyword arguments."""
+
+    def integrand1(x):
+        return torch.rand(x.shape, device=x.device)
+
+    integration_domain = torch.tensor([[0.0, 1.0]])
+    dim = 1
+    N = 101
+
+    integrator = Simpson()
+    grid_points, hs, n_per_dim = integrator.calculate_grid(N, integration_domain)
+    function_values, _ = integrator.evaluate_integrand(integrand1, grid_points)
+
+    # Test with positional arguments (should work as before)
+    integral1 = integrator.calculate_result(function_values, dim, n_per_dim, hs, integration_domain)
+
+    # Test with keyword arguments (this was failing before the fix)
+    integral2 = integrator.calculate_result(
+        function_values=function_values,
+        dim=dim,
+        n_per_dim=n_per_dim,
+        hs=hs,
+        integration_domain=integration_domain,
+    )
+
+    # Test with mixed positional and keyword arguments
+    integral3 = integrator.calculate_result(
+        function_values, dim=dim, n_per_dim=n_per_dim, hs=hs, integration_domain=integration_domain
+    )
+
+    # All results should be approximately equal
+    assert torch.allclose(integral1, integral2, rtol=1e-10)
+    assert torch.allclose(integral1, integral3, rtol=1e-10)
+
+
+def test_simpson_calculate_result_error_handling():
+    """Test that Simpson().calculate_result() gives meaningful error messages for invalid inputs."""
+
+    integrator = Simpson()
+
+    # Test missing function_values argument
+    with pytest.raises(ValueError) as exc_info:
+        integrator.calculate_result(
+            dim=1,
+            n_per_dim=101,
+            hs=torch.tensor([0.01]),
+            integration_domain=torch.tensor([[0.0, 1.0]]),
+        )
+
+    assert "function_values argument not found" in str(exc_info.value)
+    assert "Please provide function_values" in str(exc_info.value)
+
+    # Test with only self argument (no function_values)
+    with pytest.raises(ValueError) as exc_info:
+        integrator.calculate_result()
+
+    assert "function_values argument not found" in str(exc_info.value)
+
+
 test_integrate_numpy = setup_test_for_backend(_run_simpson_tests, "numpy", "float64")
 test_integrate_torch = setup_test_for_backend(_run_simpson_tests, "torch", "float64")
 test_integrate_jax = setup_test_for_backend(_run_simpson_tests, "jax", "float64")
-test_integrate_tensorflow = setup_test_for_backend(
-    _run_simpson_tests, "tensorflow", "float64"
-)
+test_integrate_tensorflow = setup_test_for_backend(_run_simpson_tests, "tensorflow", "float64")
 
 
 if __name__ == "__main__":
@@ -160,3 +210,8 @@ def integrate(*args, **kwargs):
     test_integrate_torch()
     test_integrate_jax()
     test_integrate_tensorflow()
+
+    # Test the new keyword argument functionality
+    test_simpson_calculate_result_kwargs()
+    test_simpson_calculate_result_error_handling()
+    print("All Simpson keyword argument tests passed!")
diff --git a/tests/test_deployment.py b/tests/test_deployment.py
new file mode 100644
index 00000000..2f79d9d9
--- /dev/null
+++ b/tests/test_deployment.py
@@ -0,0 +1,72 @@
+"""
+Test module for deployment verification functionality.
+This test ensures that the _deployment_test function works correctly
+and is included in pytest coverage.
+"""
+
+from torchquad.utils.deployment_test import _deployment_test
+
+
+def test_deployment_functionality():
+    """
+    Test that the deployment test runs successfully.
+
+    This test verifies that the _deployment_test function executes
+    without critical errors and returns True, indicating successful
+    deployment verification.
+    """
+    # Run the deployment test
+    result = _deployment_test()
+
+    # The deployment test should return True on success
+    assert result is True, "Deployment test failed - check logs for details"
+
+
+def test_deployment_test_imports():
+    """
+    Test that all required imports for deployment test are available.
+    """
+    # Test that we can import the deployment test function
+    from torchquad.utils.deployment_test import _deployment_test
+
+    assert callable(_deployment_test)
+
+    # Test that deployment test helper functions exist
+    from torchquad.utils.deployment_test import _get_exp_func, _get_sin_func
+    from torchquad.utils.deployment_test import _infer_backend_from_tensor, _is_finite_result
+
+    assert callable(_get_exp_func)
+    assert callable(_get_sin_func)
+    assert callable(_infer_backend_from_tensor)
+    assert callable(_is_finite_result)
+
+
+def test_deployment_helper_functions():
+    """
+    Test the helper functions used by deployment test.
+    """
+    from torchquad.utils.deployment_test import _is_finite_result
+
+    # Test finite result detection
+    assert _is_finite_result(1.0)
+    assert _is_finite_result(-1.0)
+    assert _is_finite_result(0.0)
+
+    # Test with different types
+    assert _is_finite_result(42)
+
+    # These tests might not work on all systems, so we'll be lenient
+    try:
+        assert not _is_finite_result(float("inf"))
+        assert not _is_finite_result(float("nan"))
+    except (ImportError, AssertionError):
+        # Skip if math operations not available or behave differently
+        pass
+
+
+if __name__ == "__main__":
+    # Run tests individually for debugging
+    test_deployment_functionality()
+    test_deployment_test_imports()
+    test_deployment_helper_functions()
+    print("All deployment tests passed!")
diff --git a/torchquad/tests/trapezoid_test.py b/tests/trapezoid_test.py
similarity index 66%
rename from torchquad/tests/trapezoid_test.py
rename to tests/trapezoid_test.py
index 086cbdb7..79519fdf 100644
--- a/torchquad/tests/trapezoid_test.py
+++ b/tests/trapezoid_test.py
@@ -1,8 +1,7 @@
-import sys
+import torch
+import pytest
 
-sys.path.append("../")
-
-from integration.trapezoid import Trapezoid
+from torchquad.integration.trapezoid import Trapezoid
 from helper_functions import (
     compute_integration_test_errors,
     setup_test_for_backend,
@@ -96,9 +95,7 @@ def integrate(*args, **kwargs):
             # which is then re-used on all other integrations (as is the point of JIT).
             nonlocal jit_integrate
             if jit_integrate is None:
-                jit_integrate = tp.get_jit_compiled_integrate(
-                    dim=1, N=N, backend=backend
-                )
+                jit_integrate = tp.get_jit_compiled_integrate(dim=1, N=N, backend=backend)
             return jit_integrate(*args, **kwargs)
 
         errors, funcs = compute_integration_test_errors(
@@ -120,9 +117,7 @@ def integrate(*args, **kwargs):
         for error in errors:
             assert error < 1e-5
 
-        jit_integrate = (
-            None  # set to None again so can be re-used with new integrand shape
-        )
+        jit_integrate = None  # set to None again so can be re-used with new integrand shape
 
         errors, funcs = compute_integration_test_errors(
             integrate,
@@ -143,12 +138,70 @@ def integrate(*args, **kwargs):
             assert error < 1e-5
 
 
+def test_trapezoid_calculate_result_kwargs():
+    """Test that Trapezoid().calculate_result() works correctly with keyword arguments."""
+
+    def integrand1(x):
+        return torch.rand(x.shape, device=x.device)
+
+    integration_domain = torch.tensor([[0.0, 1.0]])
+    dim = 1
+    N = 101
+
+    integrator = Trapezoid()
+    grid_points, hs, n_per_dim = integrator.calculate_grid(N, integration_domain)
+    function_values, _ = integrator.evaluate_integrand(integrand1, grid_points)
+
+    # Test with positional arguments (should work as before)
+    integral1 = integrator.calculate_result(function_values, dim, n_per_dim, hs, integration_domain)
+
+    # Test with keyword arguments (this was failing before the fix)
+    integral2 = integrator.calculate_result(
+        function_values=function_values,
+        dim=dim,
+        n_per_dim=n_per_dim,
+        hs=hs,
+        integration_domain=integration_domain,
+    )
+
+    # Test with mixed positional and keyword arguments
+    integral3 = integrator.calculate_result(
+        function_values, dim=dim, n_per_dim=n_per_dim, hs=hs, integration_domain=integration_domain
+    )
+
+    # All results should be approximately equal
+    assert torch.allclose(integral1, integral2, rtol=1e-10)
+    assert torch.allclose(integral1, integral3, rtol=1e-10)
+
+
+def test_trapezoid_calculate_result_error_handling():
+    """Test that Trapezoid().calculate_result() gives meaningful error messages for invalid inputs."""
+
+    integrator = Trapezoid()
+
+    # Test missing function_values argument
+    with pytest.raises(ValueError) as exc_info:
+        integrator.calculate_result(
+            dim=1,
+            n_per_dim=101,
+            hs=torch.tensor([0.01]),
+            integration_domain=torch.tensor([[0.0, 1.0]]),
+        )
+
+    assert "function_values argument not found" in str(exc_info.value)
+    assert "Please provide function_values" in str(exc_info.value)
+
+    # Test with only self argument (no function_values)
+    with pytest.raises(ValueError) as exc_info:
+        integrator.calculate_result()
+
+    assert "function_values argument not found" in str(exc_info.value)
+
+
 test_integrate_numpy = setup_test_for_backend(_run_trapezoid_tests, "numpy", "float64")
 test_integrate_torch = setup_test_for_backend(_run_trapezoid_tests, "torch", "float64")
 test_integrate_jax = setup_test_for_backend(_run_trapezoid_tests, "jax", "float64")
-test_integrate_tensorflow = setup_test_for_backend(
-    _run_trapezoid_tests, "tensorflow", "float64"
-)
+test_integrate_tensorflow = setup_test_for_backend(_run_trapezoid_tests, "tensorflow", "float64")
 
 
 if __name__ == "__main__":
@@ -157,3 +210,8 @@ def integrate(*args, **kwargs):
     test_integrate_torch()
     test_integrate_jax()
     test_integrate_tensorflow()
+
+    # Test the new keyword argument functionality
+    test_trapezoid_calculate_result_kwargs()
+    test_trapezoid_calculate_result_error_handling()
+    print("All Trapezoid keyword argument tests passed!")
diff --git a/torchquad/tests/utils_integration_test.py b/tests/utils_integration_test.py
similarity index 91%
rename from torchquad/tests/utils_integration_test.py
rename to tests/utils_integration_test.py
index ad35de41..77c89e0b 100644
--- a/torchquad/tests/utils_integration_test.py
+++ b/tests/utils_integration_test.py
@@ -1,21 +1,17 @@
-import sys
-
-sys.path.append("../")
-
 from autoray import numpy as anp
 from autoray import infer_backend, get_dtype_name, to_backend_dtype
 import importlib
 import pytest
 import warnings
 
-from integration.utils import (
+from torchquad.integration.utils import (
     _linspace_with_grads,
     _add_at_indices,
     _setup_integration_domain,
     _is_compiling,
 )
-from utils.set_precision import set_precision
-from utils.enable_cuda import enable_cuda
+from torchquad.utils.set_precision import set_precision
+from torchquad.utils.enable_cuda import enable_cuda
 
 
 def _run_tests_with_all_backends(func, func_extra_args=[{}]):
@@ -63,9 +59,7 @@ def _run_linspace_with_grads_tests(dtype_name, backend, requires_grad):
     dtype_backend = to_backend_dtype(dtype_name, like=backend)
     start = anp.array(-2.0, like=backend, dtype=dtype_backend)
     stop = anp.array(-1.0, like=backend, dtype=dtype_backend)
-    assert (
-        get_dtype_name(start) == dtype_name
-    ), "Unexpected dtype for the configured precision"
+    assert get_dtype_name(start) == dtype_name, "Unexpected dtype for the configured precision"
     grid1d = _linspace_with_grads(start, stop, 10, requires_grad)
     # Test if the backend, dtype and shape match
     assert infer_backend(grid1d) == backend
@@ -130,9 +124,7 @@ def _run_setup_integration_domain_tests(dtype_name, backend):
     """
     Test _setup_integration_domain with the given dtype and numerical backend
     """
-    print(
-        f"Testing _setup_integration_domain; backend: {backend}, precision: {dtype_name}"
-    )
+    print(f"Testing _setup_integration_domain; backend: {backend}, precision: {dtype_name}")
 
     # Domain given as List with Python floats
     domain = _setup_integration_domain(2, [[0.0, 1.0], [1.0, 2.0]], backend)
@@ -157,17 +149,13 @@ def _run_setup_integration_domain_tests(dtype_name, backend):
 
     # User-specified domain
     dtype_backend = to_backend_dtype(dtype_name, like=backend)
-    custom_domain = anp.array(
-        [[0.0, 1.0], [1.0, 2.0]], like=backend, dtype=dtype_backend
-    )
+    custom_domain = anp.array([[0.0, 1.0], [1.0, 2.0]], like=backend, dtype=dtype_backend)
     domain = _setup_integration_domain(2, custom_domain, None)
     assert domain.shape == custom_domain.shape
     assert domain.dtype == custom_domain.dtype
 
     # Backend specified with both backend and integration_domain parameters
-    custom_np_domain = anp.array(
-        [[0.0, 1.0], [1.0, 2.0]], like="numpy", dtype="float16"
-    )
+    custom_np_domain = anp.array([[0.0, 1.0], [1.0, 2.0]], like="numpy", dtype="float16")
     domain = _setup_integration_domain(2, custom_np_domain, backend)
     assert (
         infer_backend(domain) == backend
@@ -203,14 +191,10 @@ def _run_is_compiling_tests(dtype_name, backend):
     """
     dtype = to_backend_dtype(dtype_name, like=backend)
     x = anp.array([[0.0, 1.0], [1.0, 2.0]], dtype=dtype, like=backend)
-    assert not _is_compiling(
-        x
-    ), f"_is_compiling has a false positive with backend {backend}"
+    assert not _is_compiling(x), f"_is_compiling has a false positive with backend {backend}"
 
     def check_compiling(x):
-        assert _is_compiling(
-            x
-        ), f"_is_compiling has a false negative with backend {backend}"
+        assert _is_compiling(x), f"_is_compiling has a false negative with backend {backend}"
         return x
 
     if backend == "jax":
diff --git a/torchquad/tests/vegas_map_test.py b/tests/vegas_map_test.py
similarity index 87%
rename from torchquad/tests/vegas_map_test.py
rename to tests/vegas_map_test.py
index 7448c746..7b8f5551 100644
--- a/torchquad/tests/vegas_map_test.py
+++ b/tests/vegas_map_test.py
@@ -1,11 +1,7 @@
-import sys
-
-sys.path.append("../")
-
 from autoray import numpy as anp
 from autoray import to_backend_dtype
 
-from integration.vegas_map import VEGASMap
+from torchquad.integration.vegas_map import VEGASMap
 
 from helper_functions import setup_test_for_backend
 
@@ -58,7 +54,7 @@ def _run_vegas_map_checks(backend, dtype_name):
     # Get example point and function values
     N_per_dim = 100
     y = anp.linspace(0.0, 0.99999, N_per_dim, dtype=dtype_float, like=backend)
-    y = anp.meshgrid(*([y] * dim))
+    y = anp.meshgrid(*([y] * dim), indexing="ij")
     y = anp.stack([mg.ravel() for mg in y], axis=1, like=backend)
     # Use exp to get a peak in a corner
     f_eval = anp.prod(anp.exp(y), axis=1)
@@ -101,18 +97,14 @@ def _run_vegas_map_checks(backend, dtype_name):
         like=backend,
     )
     smoothed_weights = VEGASMap._smooth_map(weights, counts, alpha)
-    _check_tensor_similarity(
-        smoothed_weights, smoothed_weights_expected, 3e-7, dtype_float
-    )
+    _check_tensor_similarity(smoothed_weights, smoothed_weights_expected, 3e-7, dtype_float)
 
     # Test if vegasmap.update_map changes the edge locations and distances
     # correctly
     vegasmap.update_map()
     # The outermost edge locations must match the domain [0,1]^dim
     unit_domain = anp.array([[0.0, 1.0]] * dim, dtype=dtype_float, like=backend)
-    _check_tensor_similarity(
-        vegasmap.x_edges[:, [0, -1]], unit_domain, 0.0, dtype_float
-    )
+    _check_tensor_similarity(vegasmap.x_edges[:, [0, -1]], unit_domain, 0.0, dtype_float)
     assert vegasmap.x_edges.shape == (dim, N_intervals + 1), "Invalid number of edges"
     assert vegasmap.dx_edges.shape == (
         dim,
@@ -136,18 +128,10 @@ def _run_vegas_map_checks(backend, dtype_name):
     assert anp.max(anp.abs(x[0])) == 0.0, "Boundary point was remapped"
 
 
-test_vegas_map_numpy_f32 = setup_test_for_backend(
-    _run_vegas_map_checks, "numpy", "float32"
-)
-test_vegas_map_numpy_f64 = setup_test_for_backend(
-    _run_vegas_map_checks, "numpy", "float64"
-)
-test_vegas_map_torch_f32 = setup_test_for_backend(
-    _run_vegas_map_checks, "torch", "float32"
-)
-test_vegas_map_torch_f64 = setup_test_for_backend(
-    _run_vegas_map_checks, "torch", "float64"
-)
+test_vegas_map_numpy_f32 = setup_test_for_backend(_run_vegas_map_checks, "numpy", "float32")
+test_vegas_map_numpy_f64 = setup_test_for_backend(_run_vegas_map_checks, "numpy", "float64")
+test_vegas_map_torch_f32 = setup_test_for_backend(_run_vegas_map_checks, "torch", "float32")
+test_vegas_map_torch_f64 = setup_test_for_backend(_run_vegas_map_checks, "torch", "float64")
 
 
 if __name__ == "__main__":
diff --git a/torchquad/tests/vegas_stratification_test.py b/tests/vegas_stratification_test.py
similarity index 93%
rename from torchquad/tests/vegas_stratification_test.py
rename to tests/vegas_stratification_test.py
index 1c721358..d82622f9 100644
--- a/torchquad/tests/vegas_stratification_test.py
+++ b/tests/vegas_stratification_test.py
@@ -1,12 +1,8 @@
-import sys
-
-sys.path.append("../")
-
 from autoray import numpy as anp
 from autoray import to_backend_dtype
 
-from integration.rng import RNG
-from integration.vegas_stratification import VEGASStratification
+from torchquad.integration.rng import RNG
+from torchquad.integration.vegas_stratification import VEGASStratification
 
 from helper_functions import setup_test_for_backend
 
@@ -47,9 +43,7 @@ def _run_vegas_stratification_checks(backend, dtype_name):
     assert jf.dtype == jf2.dtype == dtype_float
     assert jf.shape == jf2.shape == (strat.N_cubes,)
     assert anp.min(jf2) >= 0.0, "Sums of squared values should be non-negative"
-    assert (
-        anp.min(jf**2 - jf2) >= 0.0
-    ), "Squared sums should be bigger than summed squares"
+    assert anp.min(jf**2 - jf2) >= 0.0, "Squared sums should be bigger than summed squares"
 
     # Test the dampened sample counts update
     strat.update_DH()
diff --git a/torchquad/tests/vegas_test.py b/tests/vegas_test.py
similarity index 93%
rename from torchquad/tests/vegas_test.py
rename to tests/vegas_test.py
index 9ed8fc37..06d9af59 100644
--- a/torchquad/tests/vegas_test.py
+++ b/tests/vegas_test.py
@@ -1,7 +1,3 @@
-import sys
-
-sys.path.append("../")
-
 from autoray import numpy as anp
 from autoray import to_backend_dtype, astype
 import timeit
@@ -9,8 +5,8 @@
 import pstats
 from unittest.mock import patch
 
-from integration.vegas import VEGAS
-from integration.rng import RNG
+from torchquad.integration.vegas import VEGAS
+from torchquad.integration.rng import RNG
 
 from helper_functions import (
     compute_integration_test_errors,
@@ -79,9 +75,7 @@ def _run_vegas_accuracy_checks(backend, dtype_name):
     integrator = VEGAS()
 
     print("Integrating a function with a single peak")
-    integration_domain = anp.array(
-        [[1.0, 5.0], [-4.0, 4.0], [2.0, 6.0]], dtype=dtype, like=backend
-    )
+    integration_domain = anp.array([[1.0, 5.0], [-4.0, 4.0], [2.0, 6.0]], dtype=dtype, like=backend)
     dim = integration_domain.shape[0]
 
     def integrand_hypercube_peak(x):
@@ -92,9 +86,7 @@ def integrand_hypercube_peak(x):
         # to zero for all passed points
         return astype(in_cube, dtype_name) + 0.001
 
-    reference_integral = (
-        anp.prod(integration_domain[:, 1] - integration_domain[:, 0]) * 0.001 + 1.0
-    )
+    reference_integral = anp.prod(integration_domain[:, 1] - integration_domain[:, 0]) * 0.001 + 1.0
 
     # Use multiple seeds to reduce luck
     for seed in [0, 1, 2, 3, 41317]:
@@ -154,18 +146,14 @@ class ModifiedRNG(RNG):
     def __init__(self, *args, **kargs):
         super().__init__(*args, **kargs)
         rng_uniform = self.uniform
-        self.uniform = lambda *args, **kargs: self.modify_numbers(
-            rng_uniform(*args, **kargs)
-        )
+        self.uniform = lambda *args, **kargs: self.modify_numbers(rng_uniform(*args, **kargs))
 
     def modify_numbers(self, numbers):
         """Change the randomly generated numbers"""
         zeros = anp.zeros(numbers.shape, dtype=numbers.dtype, like=numbers)
         ones = anp.ones(numbers.shape, dtype=numbers.dtype, like=numbers)
         # Replace half of the random values randomly with 0.0 or 1.0
-        return anp.where(
-            numbers < 0.5, numbers * 2.0, anp.where(numbers < 0.75, zeros, ones)
-        )
+        return anp.where(numbers < 0.5, numbers * 2.0, anp.where(numbers < 0.75, zeros, ones))
 
 
 def _run_vegas_special_case_checks(backend, dtype_name):
@@ -198,7 +186,7 @@ def _run_vegas_special_case_checks(backend, dtype_name):
     print("Testing VEGAS with random numbers which are 0.0 and 1.0")
     # This test may be helpful to detect rounding and indexing errors which
     # would happen with a low probability with the usual RNG
-    with patch("integration.vegas.RNG", ModifiedRNG):
+    with patch("torchquad.integration.vegas.RNG", ModifiedRNG):
         integral = integrator.integrate(
             lambda x: anp.sum(x, axis=1),
             2,
diff --git a/torchquad/__init__.py b/torchquad/__init__.py
index fa5aa003..99e1e0d7 100644
--- a/torchquad/__init__.py
+++ b/torchquad/__init__.py
@@ -1,5 +1,9 @@
 import os
-from loguru import logger
+
+__version__ = "0.5.0"
+
+# Set for release builds
+TORCHQUAD_DISABLE_LOGGING = True
 
 # TODO: Currently this is the way to expose to the docs
 # hopefully changes with setup.py
@@ -16,8 +20,6 @@
 
 from .integration.rng import RNG
 
-from .plots.plot_convergence import plot_convergence
-from .plots.plot_runtime import plot_runtime
 
 from .utils.set_log_level import set_log_level
 from .utils.enable_cuda import enable_cuda
@@ -26,6 +28,7 @@
 from .utils.deployment_test import _deployment_test
 
 __all__ = [
+    "__version__",
     "GridIntegrator",
     "BaseIntegrator",
     "IntegrationGrid",
@@ -37,8 +40,6 @@
     "GaussLegendre",
     "Gaussian",
     "RNG",
-    "plot_convergence",
-    "plot_runtime",
     "enable_cuda",
     "set_precision",
     "set_log_level",
@@ -46,5 +47,8 @@
     "_deployment_test",
 ]
 
-set_log_level(os.environ.get("TORCHQUAD_LOG_LEVEL", "WARNING"))
-logger.info("Initializing torchquad.")
+if not TORCHQUAD_DISABLE_LOGGING:
+    from loguru import logger
+
+    set_log_level(os.environ.get("TORCHQUAD_LOG_LEVEL", "WARNING"))
+    logger.info("Initializing torchquad.")
diff --git a/torchquad/integration/__init__.py b/torchquad/integration/__init__.py
new file mode 100644
index 00000000..5d679687
--- /dev/null
+++ b/torchquad/integration/__init__.py
@@ -0,0 +1 @@
+# Integration methods package
diff --git a/torchquad/integration/base_integrator.py b/torchquad/integration/base_integrator.py
index 03514ac8..0d10a265 100644
--- a/torchquad/integration/base_integrator.py
+++ b/torchquad/integration/base_integrator.py
@@ -25,9 +25,7 @@ def __init__(self):
         self._nr_of_fevals = 0
 
     def integrate(self):
-        raise (
-            NotImplementedError("This is an abstract base class. Should not be called.")
-        )
+        raise (NotImplementedError("This is an abstract base class. Should not be called."))
 
     def _eval(self, points, weights=None, args=None):
         """Call evaluate_integrand to evaluate self._fn function at the passed points and update self._nr_of_evals
@@ -37,9 +35,7 @@ def _eval(self, points, weights=None, args=None):
             weights (backend tensor, optional): Integration weights. Defaults to None.
             args (list or tuple, optional): Any arguments required by the function. Defaults to None.
         """
-        result, num_points = self.evaluate_integrand(
-            self._fn, points, weights=weights, args=args
-        )
+        result, num_points = self.evaluate_integrand(self._fn, points, weights=weights, args=args)
         self._nr_of_fevals += num_points
         return result
 
@@ -83,10 +79,7 @@ def evaluate_integrand(fn, points, weights=None, args=None):
                 len(result.shape) > 1
             ):  # if the the integrand is multi-dimensional, we need to reshape/repeat weights so they can be broadcast in the *=
                 integrand_shape = anp.array(
-                    [
-                        dim if isinstance(dim, int) else dim.as_list()
-                        for dim in result.shape[1:]
-                    ],
+                    [dim if isinstance(dim, int) else dim.as_list() for dim in result.shape[1:]],
                     like=infer_backend(points),
                 )
 
diff --git a/torchquad/integration/boole.py b/torchquad/integration/boole.py
index a064dff3..f538e582 100644
--- a/torchquad/integration/boole.py
+++ b/torchquad/integration/boole.py
@@ -64,16 +64,13 @@ def _adjust_N(dim, N):
             int: An N satisfying N^(1/dim) - 1 % 4 == 0.
         """
         n_per_dim = int(N ** (1.0 / dim) + 1e-8)
-        logger.debug(
-            "Checking if N per dim is >=5 and N = 1 + 4n, where n is a positive integer."
-        )
+        logger.debug("Checking if N per dim is >=5 and N = 1 + 4n, where n is a positive integer.")
 
         # Boole's rule requires N per dim >=5 and N = 1 + 4n,
         # where n is a positive integer, for correctness.
         if n_per_dim < 5:
             warnings.warn(
-                "N per dimension cannot be lower than 5. "
-                "N per dim will now be changed to 5."
+                "N per dimension cannot be lower than 5. " "N per dim will now be changed to 5."
             )
             N = 5**dim
         elif (n_per_dim - 1) % 4 != 0:
diff --git a/torchquad/integration/gaussian.py b/torchquad/integration/gaussian.py
index 17051681..2bc0a706 100644
--- a/torchquad/integration/gaussian.py
+++ b/torchquad/integration/gaussian.py
@@ -58,7 +58,7 @@ def _weights(self, N, dim, backend, requires_grad=False):
             return anp.prod(
                 anp.array(
                     anp.stack(
-                        list(anp.meshgrid(*([weights] * dim))), like=backend, dim=0
+                        list(anp.meshgrid(*([weights] * dim), indexing="ij")), like=backend, dim=0
                     )
                 ),
                 axis=0,
@@ -90,9 +90,7 @@ def _grid_func(self):
         """
 
         def f(integration_domain, N, requires_grad, backend=None):
-            return self._resize_roots(
-                integration_domain, self._roots(N, backend, requires_grad)
-            )
+            return self._resize_roots(integration_domain, self._roots(N, backend, requires_grad))
 
         return f
 
@@ -124,9 +122,7 @@ def _cached_points_and_weights(self, N):
             if hasattr(N, "item"):
                 _root_args = (N.item(), *self._root_args)
             else:
-                raise NotImplementedError(
-                    f"N {N} is not an int and lacks an `item` method"
-                )
+                raise NotImplementedError(f"N {N} is not an int and lacks an `item` method")
         if _root_args in self._cache:
             return self._cache[_root_args]
         self._cache[_root_args] = self._root_fn(*_root_args)
diff --git a/torchquad/integration/grid_integrator.py b/torchquad/integration/grid_integrator.py
index 6c574d81..5002a584 100644
--- a/torchquad/integration/grid_integrator.py
+++ b/torchquad/integration/grid_integrator.py
@@ -52,9 +52,7 @@ def integrate(self, fn, dim, N, integration_domain, backend):
         )
         self._nr_of_fevals = num_points
 
-        return self.calculate_result(
-            function_values, dim, n_per_dim, hs, integration_domain
-        )
+        return self.calculate_result(function_values, dim, n_per_dim, hs, integration_domain)
 
     @expand_func_values_and_squeeze_integral
     def calculate_result(self, function_values, dim, n_per_dim, hs, integration_domain):
@@ -80,22 +78,16 @@ def calculate_result(self, function_values, dim, n_per_dim, hs, integration_doma
         einsum = "".join(
             [chr(i + 65) for i in range(len(function_values.shape))]
         )  # chr(i + 65) generates an alphabetical character
-        reshaped_function_values = anp.einsum(
-            f"{einsum}->{einsum[1:]}{einsum[0]}", function_values
-        )
+        reshaped_function_values = anp.einsum(f"{einsum}->{einsum[1:]}{einsum[0]}", function_values)
         reshaped_function_values = reshaped_function_values.reshape(new_shape)
         assert new_shape == list(
             reshaped_function_values.shape
         ), f"reshaping produced shape {reshaped_function_values.shape}, expected shape was {new_shape}"
         logger.debug("Computing areas.")
 
-        result = self._apply_composite_rule(
-            reshaped_function_values, dim, hs, integration_domain
-        )
+        result = self._apply_composite_rule(reshaped_function_values, dim, hs, integration_domain)
 
-        logger.opt(lazy=True).info(
-            "Computed integral: {result}", result=lambda: str(result)
-        )
+        logger.opt(lazy=True).info("Computed integral: {result}", result=lambda: str(result))
         return result
 
     def calculate_grid(
@@ -139,9 +131,7 @@ def _adjust_N(dim, N):
         # Nothing to do by default
         return N
 
-    def get_jit_compiled_integrate(
-        self, dim, N=None, integration_domain=None, backend=None
-    ):
+    def get_jit_compiled_integrate(self, dim, N=None, integration_domain=None, backend=None):
         """Create an integrate function where the performance-relevant steps except the integrand evaluation are JIT compiled.
         Use this method only if the integrand cannot be compiled.
         The compilation happens when the function is executed the first time.
@@ -170,9 +160,7 @@ def get_jit_compiled_integrate(
                 if not hasattr(self, "_tf_jit_calculate_grid"):
                     import tensorflow as tf
 
-                    self._tf_jit_calculate_grid = tf.function(
-                        self.calculate_grid, jit_compile=True
-                    )
+                    self._tf_jit_calculate_grid = tf.function(self.calculate_grid, jit_compile=True)
                     self._tf_jit_calculate_result = tf.function(
                         self.calculate_result, jit_compile=True
                     )
@@ -211,9 +199,7 @@ def compiled_integrate(fn, integration_domain):
             def do_compile(example_integrand):
                 # Define traceable first and third steps
                 def step1(integration_domain):
-                    grid_points, hs, n_per_dim = self.calculate_grid(
-                        N, integration_domain
-                    )
+                    grid_points, hs, n_per_dim = self.calculate_grid(N, integration_domain)
                     return (
                         grid_points,
                         hs,
diff --git a/torchquad/integration/integration_grid.py b/torchquad/integration/integration_grid.py
index 1068e90d..ef0b0c0a 100644
--- a/torchquad/integration/integration_grid.py
+++ b/torchquad/integration/integration_grid.py
@@ -95,10 +95,8 @@ def __init__(
         logger.opt(lazy=True).debug("Grid mesh width is {h}", h=lambda: str(self.h))
 
         # Get grid points
-        points = anp.meshgrid(*grid_1d)
-        self.points = anp.stack(
-            [mg.ravel() for mg in points], axis=1, like=integration_domain
-        )
+        points = anp.meshgrid(*grid_1d, indexing="ij")
+        self.points = anp.stack([mg.ravel() for mg in points], axis=1, like=integration_domain)
 
         logger.info("Integration grid created.")
 
diff --git a/torchquad/integration/monte_carlo.py b/torchquad/integration/monte_carlo.py
index 432e6242..ae66f135 100644
--- a/torchquad/integration/monte_carlo.py
+++ b/torchquad/integration/monte_carlo.py
@@ -76,14 +76,9 @@ def calculate_result(self, function_values, integration_domain):
         N = function_values.shape[0]
         integral = volume * anp.sum(function_values, axis=0) / N
         # NumPy automatically casts to float64 when dividing by N
-        if (
-            infer_backend(integration_domain) == "numpy"
-            and function_values.dtype != integral.dtype
-        ):
+        if infer_backend(integration_domain) == "numpy" and function_values.dtype != integral.dtype:
             integral = integral.astype(function_values.dtype)
-        logger.opt(lazy=True).info(
-            "Computed integral: {result}", result=lambda: str(integral)
-        )
+        logger.opt(lazy=True).info("Computed integral: {result}", result=lambda: str(integral))
         return integral
 
     def calculate_sample_points(self, N, integration_domain, seed=None, rng=None):
@@ -108,10 +103,7 @@ def calculate_sample_points(self, N, integration_domain, seed=None, rng=None):
         domain_starts = integration_domain[:, 0]
         domain_sizes = integration_domain[:, 1] - domain_starts
         # Scale and translate random numbers via broadcasting
-        return (
-            rng.uniform(size=[N, dim], dtype=domain_sizes.dtype) * domain_sizes
-            + domain_starts
-        )
+        return rng.uniform(size=[N, dim], dtype=domain_sizes.dtype) * domain_sizes + domain_starts
 
     def get_jit_compiled_integrate(
         self, dim, N=1000, integration_domain=None, seed=None, backend=None
@@ -150,17 +142,13 @@ def get_jit_compiled_integrate(
                 self._tf_jit_calculate_sample_points = tf.function(
                     self.calculate_sample_points, jit_compile=True
                 )
-                self._tf_jit_calculate_result = tf.function(
-                    self.calculate_result, jit_compile=True
-                )
+                self._tf_jit_calculate_result = tf.function(self.calculate_result, jit_compile=True)
             jit_calculate_sample_points = self._tf_jit_calculate_sample_points
             jit_calculate_result = self._tf_jit_calculate_result
             rng = RNG(backend="tensorflow", seed=seed)
 
             def compiled_integrate(fn, integration_domain):
-                sample_points = jit_calculate_sample_points(
-                    N, integration_domain, rng=rng
-                )
+                sample_points = jit_calculate_sample_points(N, integration_domain, rng=rng)
                 function_values, _ = self.evaluate_integrand(fn, sample_points)
                 return jit_calculate_result(function_values, integration_domain)
 
@@ -188,9 +176,7 @@ def jit_calc_sample_points(integration_domain, rng_key):
 
             def compiled_integrate(fn, integration_domain):
                 nonlocal rng_key
-                sample_points, rng_key = jit_calc_sample_points(
-                    integration_domain, rng_key
-                )
+                sample_points, rng_key = jit_calc_sample_points(integration_domain, rng_key)
                 function_values, _ = self.evaluate_integrand(fn, sample_points)
                 return jit_calculate_result(function_values, integration_domain)
 
@@ -201,9 +187,7 @@ def compiled_integrate(fn, integration_domain):
             def do_compile(example_integrand):
                 # Define traceable first and third steps
                 def step1(integration_domain):
-                    return self.calculate_sample_points(
-                        N, integration_domain, seed=seed
-                    )
+                    return self.calculate_sample_points(N, integration_domain, seed=seed)
 
                 step3 = self.calculate_result
 
@@ -214,14 +198,10 @@ def step1(integration_domain):
 
                 # Get example input for the third step
                 sample_points = step1(integration_domain)
-                function_values, _ = self.evaluate_integrand(
-                    example_integrand, sample_points
-                )
+                function_values, _ = self.evaluate_integrand(example_integrand, sample_points)
 
                 # Trace the third step
-                step3 = _torch_trace_without_warnings(
-                    step3, (function_values, integration_domain)
-                )
+                step3 = _torch_trace_without_warnings(step3, (function_values, integration_domain))
 
                 # Define a compiled integrate function
                 def compiled_integrate(fn, integration_domain):
diff --git a/torchquad/integration/rng.py b/torchquad/integration/rng.py
index 401da2c9..8d5e7475 100644
--- a/torchquad/integration/rng.py
+++ b/torchquad/integration/rng.py
@@ -65,9 +65,7 @@ def uniform_func(size, dtype):
                 self._rng = tf.random.Generator.from_non_deterministic_state()
             else:
                 self._rng = tf.random.Generator.from_seed(seed)
-            self.uniform = lambda size, dtype: self._rng.uniform(
-                shape=size, dtype=dtype
-            )
+            self.uniform = lambda size, dtype: self._rng.uniform(shape=size, dtype=dtype)
         else:
             if seed is not None:
                 anp.random.seed(seed, like=backend)
diff --git a/torchquad/integration/simpson.py b/torchquad/integration/simpson.py
index e67ce47b..456446d0 100644
--- a/torchquad/integration/simpson.py
+++ b/torchquad/integration/simpson.py
@@ -68,8 +68,7 @@ def _adjust_N(dim, N):
         # complex rule that works for even N as well but it is not implemented here.
         if n_per_dim < 3:
             warnings.warn(
-                "N per dimension cannot be lower than 3. "
-                "N per dim will now be changed to 3."
+                "N per dimension cannot be lower than 3. " "N per dim will now be changed to 3."
             )
             N = 3**dim
         elif n_per_dim % 2 != 1:
diff --git a/torchquad/integration/trapezoid.py b/torchquad/integration/trapezoid.py
index 476743e2..b9387988 100644
--- a/torchquad/integration/trapezoid.py
+++ b/torchquad/integration/trapezoid.py
@@ -32,8 +32,6 @@ def _apply_composite_rule(cur_dim_areas, dim, hs, domain):
         """
         # We collapse dimension by dimension
         for cur_dim in range(dim):
-            cur_dim_areas = (
-                hs[cur_dim] / 2.0 * (cur_dim_areas[..., 0:-1] + cur_dim_areas[..., 1:])
-            )
+            cur_dim_areas = hs[cur_dim] / 2.0 * (cur_dim_areas[..., 0:-1] + cur_dim_areas[..., 1:])
             cur_dim_areas = anp.sum(cur_dim_areas, axis=len(cur_dim_areas.shape) - 1)
         return cur_dim_areas
diff --git a/torchquad/integration/utils.py b/torchquad/integration/utils.py
index 12838cb0..47b5c323 100644
--- a/torchquad/integration/utils.py
+++ b/torchquad/integration/utils.py
@@ -34,10 +34,16 @@ def _linspace_with_grads(start, stop, N, requires_grad):
     """
     # The requires_grad case is only needed for Torch.
     if requires_grad:
-        # Create 0 to 1 spaced grid
-        grid = anp.linspace(
-            anp.array(0.0, like=start), anp.array(1.0, like=start), N, dtype=start.dtype
-        )
+        # Create 0 to 1 spaced grid using the same device and dtype as start
+        backend = infer_backend(start)
+        if backend == "torch":
+            zero = start.new_zeros(())  # scalar tensor
+            one = start.new_ones(())  # scalar tensor
+        else:
+            zero = anp.array(0.0, like=start)
+            one = anp.array(1.0, like=start)
+
+        grid = anp.linspace(zero, one, N, dtype=start.dtype)
 
         # Scale to desired range, thus keeping gradients
         grid *= stop - start
@@ -123,18 +129,16 @@ def _setup_integration_domain(dim, integration_domain, backend):
     domain_arg_backend = infer_backend(integration_domain)
     convert_to_tensor = domain_arg_backend == "builtins"
     if not convert_to_tensor and backend is not None and domain_arg_backend != backend:
-        logger.warning(
-            "integration_domain should be a list when the backend argument is set."
-        )
+        warning_msg = "integration_domain should be a list when the backend argument is set."
+        logger.warning(warning_msg)
+        warnings.warn(warning_msg, RuntimeWarning)
         convert_to_tensor = True
 
     # Convert integration_domain to a tensor if needed
     if convert_to_tensor:
         # Cast all integration domain values to Python3 float because
         # some numerical backends create a tensor based on the Python3 types
-        integration_domain = [
-            [float(b) for b in bounds] for bounds in integration_domain
-        ]
+        integration_domain = [[float(b) for b in bounds] for bounds in integration_domain]
         if backend is None:
             # Get a globally default backend
             backend = _get_default_backend()
@@ -144,9 +148,7 @@ def _setup_integration_domain(dim, integration_domain, backend):
 
             dtype_arg = dtype_arg or tf.keras.backend.floatx()
 
-        integration_domain = anp.array(
-            integration_domain, like=backend, dtype=dtype_arg
-        )
+        integration_domain = anp.array(integration_domain, like=backend, dtype=dtype_arg)
 
     if integration_domain.shape != (dim, 2):
         raise ValueError(
@@ -240,17 +242,36 @@ def expand_func_values_and_squeeze_integral(f):
     """
 
     def wrap(*args, **kwargs):
+        # Extract function_values from either positional or keyword arguments
+        if len(args) > 1:
+            function_values = args[1]
+        elif "function_values" in kwargs:
+            function_values = kwargs["function_values"]
+        else:
+            raise ValueError(
+                "function_values argument not found in either positional or keyword arguments. "
+                "Please provide function_values as the second positional argument or as a keyword argument."
+            )
+
         # i.e we only have one dimension, or the second dimension (that of the integrand) is 1
-        is_1d = len(args[1].shape) == 1 or (
-            len(args[1].shape) == 2 and args[1].shape[1] == 1
+        is_1d = len(function_values.shape) == 1 or (
+            len(function_values.shape) == 2 and function_values.shape[1] == 1
         )
+
         if is_1d:
             warnings.warn(
                 "DEPRECATION WARNING: In future versions of torchquad, an array-like object will be returned."
             )
-            return anp.squeeze(
-                f(args[0], anp.expand_dims(args[1], axis=1), *args[2:], **kwargs)
-            )
+            if len(args) > 1:
+                # Modify positional arguments
+                args = (args[0], anp.expand_dims(function_values, axis=1), *args[2:])
+            else:
+                # Modify keyword arguments
+                kwargs["function_values"] = anp.expand_dims(function_values, axis=1)
+
+            result = f(*args, **kwargs)
+            return anp.squeeze(result)
+
         return f(*args, **kwargs)
 
     return wrap
diff --git a/torchquad/integration/vegas.py b/torchquad/integration/vegas.py
index b1e02f0a..bafdb549 100644
--- a/torchquad/integration/vegas.py
+++ b/torchquad/integration/vegas.py
@@ -175,9 +175,7 @@ def _check_abort_conditions(self):
         err = self._get_error()
         chi2 = self._get_chisq()
         logger.debug(f"Iteration {self.it},Chi2={chi2:.4e}")
-        if (
-            err <= self._eps_rel * res_abs or err <= self._eps_abs
-        ) and chi2 / 5.0 < 1.0:
+        if (err <= self._eps_rel * res_abs or err <= self._eps_abs) and chi2 / 5.0 < 1.0:
             return True
 
         # Adjust number of evals if Chi square indicates instability
@@ -217,9 +215,7 @@ def _warmup_grid(self, warmup_N_it=5, N_samples=1000):
             warmup_N_it (int, optional): Number of warmup iterations. Defaults to 5.
             N_samples (int, optional): Number of samples per warmup iteration. Defaults to 1000.
         """
-        logger.debug(
-            f"Running Map Warmup with warmup_N_it={warmup_N_it}, N_samples={N_samples}..."
-        )
+        logger.debug(f"Running Map Warmup with warmup_N_it={warmup_N_it}, N_samples={N_samples}...")
 
         alpha_start = 0.5  # initial alpha value
         # TODO in the original paper this is adjusted over time
@@ -237,10 +233,7 @@ def _warmup_grid(self, warmup_N_it=5, N_samples=1000):
 
             # Sample points yrnd and transformed sample points x
             # Multiplying by 0.99999999 as the edge case of y=1 leads to an error
-            yrnd = (
-                self.rng.uniform(size=[N_samples, self._dim], dtype=self.dtype)
-                * 0.999999
-            )
+            yrnd = self.rng.uniform(size=[N_samples, self._dim], dtype=self.dtype) * 0.999999
             x = self.map.get_X(yrnd)
             f_eval = self._eval(x).squeeze()
             jac = self.map.get_Jac(yrnd)
diff --git a/torchquad/integration/vegas_map.py b/torchquad/integration/vegas_map.py
index c81a6d92..997bd7a6 100644
--- a/torchquad/integration/vegas_map.py
+++ b/torchquad/integration/vegas_map.py
@@ -1,6 +1,7 @@
 from autoray import numpy as anp
 from autoray import astype, to_backend_dtype
 from loguru import logger
+import warnings
 
 from .utils import _add_at_indices
 
@@ -34,12 +35,8 @@ def __init__(self, N_intervals, dim, backend, dtype, alpha=0.5) -> None:
             anp.ones((self.dim, self.N_intervals), dtype=self.dtype, like=self.backend)
             / self.N_intervals
         )
-        x_edges_per_dim = anp.linspace(
-            0.0, 1.0, N_edges, dtype=self.dtype, like=self.backend
-        )
-        self.x_edges = anp.repeat(
-            anp.reshape(x_edges_per_dim, [1, N_edges]), self.dim, axis=0
-        )
+        x_edges_per_dim = anp.linspace(0.0, 1.0, N_edges, dtype=self.dtype, like=self.backend)
+        self.x_edges = anp.repeat(anp.reshape(x_edges_per_dim, [1, N_edges]), self.dim, axis=0)
 
         # Initialize self.weights and self.counts
         self._reset_weight()
@@ -132,14 +129,10 @@ def _smooth_map(weights, counts, alpha):
             # their nearest neighbouring intervals
             # (up to a distance of 10 indices).
             for _ in range(10):
-                weights[:, :-1] = anp.where(
-                    z_idx[:, :-1], weights[:, 1:], weights[:, :-1]
-                )
+                weights[:, :-1] = anp.where(z_idx[:, :-1], weights[:, 1:], weights[:, :-1])
                 # The asterisk corresponds to a logical And here
                 z_idx[:, :-1] = z_idx[:, :-1] * z_idx[:, 1:]
-                weights[:, 1:] = anp.where(
-                    z_idx[:, 1:], weights[:, :-1], weights[:, 1:]
-                )
+                weights[:, 1:] = anp.where(z_idx[:, 1:], weights[:, :-1], weights[:, 1:])
                 z_idx[:, 1:] = z_idx[:, 1:] * z_idx[:, :-1]
                 logger.opt(lazy=True).debug(
                     "  remaining intervals: {z_idx_sum}",
@@ -174,18 +167,14 @@ def _smooth_map(weights, counts, alpha):
 
         # Range compression
         # EQ 19
-        d_tmp[d_tmp != 0] = (
-            (d_tmp[d_tmp != 0] - 1.0) / anp.log(d_tmp[d_tmp != 0])
-        ) ** alpha
+        d_tmp[d_tmp != 0] = ((d_tmp[d_tmp != 0] - 1.0) / anp.log(d_tmp[d_tmp != 0])) ** alpha
 
         return d_tmp
 
     def _reset_weight(self):
         """Reset or initialize weights and counts."""
         # weights in each intervall
-        self.weights = anp.zeros(
-            (self.dim, self.N_intervals), dtype=self.dtype, like=self.backend
-        )
+        self.weights = anp.zeros((self.dim, self.N_intervals), dtype=self.dtype, like=self.backend)
         # numbers of random samples in specific interval
         self.counts = anp.zeros(
             (self.dim, self.N_intervals),
@@ -197,10 +186,12 @@ def update_map(self):
         """Update the adaptive map, Section II C."""
         smoothed_weights = self._smooth_map(self.weights, self.counts, self.alpha)
         if smoothed_weights is None:
-            logger.warning(
+            warning_msg = (
                 "Cannot update the VEGASMap. This can happen with an integrand "
                 "which evaluates to zero everywhere."
             )
+            logger.warning(warning_msg)
+            warnings.warn(warning_msg, RuntimeWarning)
             self._reset_weight()
             return
 
@@ -217,29 +208,22 @@ def update_map(self):
             # with float32 precision is too inaccurate which leads to wrong
             # indices, so cast to float64 here.
             delta_d_multiples = astype(
-                anp.cumsum(astype(smoothed_weights[i, :-1], "float64"), axis=0)
-                / delta_d,
+                anp.cumsum(astype(smoothed_weights[i, :-1], "float64"), axis=0) / delta_d,
                 "int64",
             )
             # For each number of delta_d multiples in {0, 1, …, N_intervals},
             # determine how many intervals belong to it (num_sw_per_dw)
             # and the sum of smoothed weights in these intervals (val_sw_per_dw)
             dtype_int = delta_d_multiples.dtype
-            num_sw_per_dw = anp.zeros(
-                [self.N_intervals + 1], dtype=dtype_int, like=delta_d
-            )
+            num_sw_per_dw = anp.zeros([self.N_intervals + 1], dtype=dtype_int, like=delta_d)
             _add_at_indices(
                 num_sw_per_dw,
                 delta_d_multiples,
                 anp.ones(delta_d_multiples.shape, dtype=dtype_int, like=delta_d),
                 is_sorted=True,
             )
-            val_sw_per_dw = anp.zeros(
-                [self.N_intervals + 1], dtype=self.dtype, like=delta_d
-            )
-            _add_at_indices(
-                val_sw_per_dw, delta_d_multiples, smoothed_weights[i], is_sorted=True
-            )
+            val_sw_per_dw = anp.zeros([self.N_intervals + 1], dtype=self.dtype, like=delta_d)
+            _add_at_indices(val_sw_per_dw, delta_d_multiples, smoothed_weights[i], is_sorted=True)
             # The cumulative sum of the number of smoothed weights per delta_d
             # multiple determines the old inner edges indices for the new inner
             # edges calculation
@@ -261,9 +245,9 @@ def update_map(self):
                 # smoothed_weights[i][indices] can be zero, which leads to
                 # invalid edges.
                 num_edges = self.x_edges.shape[1]
-                logger.warning(
-                    f"{num_edges - anp.sum(finite_edges)} out of {num_edges} calculated VEGASMap edges were infinite"
-                )
+                warning_msg = f"{num_edges - anp.sum(finite_edges)} out of {num_edges} calculated VEGASMap edges were infinite"
+                logger.warning(warning_msg)
+                warnings.warn(warning_msg, RuntimeWarning)
                 # Replace inf edges with the average of their two neighbours
                 middle_edges = 0.5 * (self.x_edges[i][:-2] + self.x_edges[i][2:])
                 self.x_edges[i][1:-1] = anp.where(
diff --git a/torchquad/integration/vegas_stratification.py b/torchquad/integration/vegas_stratification.py
index 9b3239e2..81a440ce 100644
--- a/torchquad/integration/vegas_stratification.py
+++ b/torchquad/integration/vegas_stratification.py
@@ -38,11 +38,7 @@ def __init__(self, N_increment, dim, rng, backend, dtype, beta=0.75):
         self.JF2 = anp.zeros([self.N_cubes], dtype=self.dtype, like=backend)
 
         # dampened counts
-        self.dh = (
-            anp.ones([self.N_cubes], dtype=self.dtype, like=backend)
-            * 1.0
-            / self.N_cubes
-        )
+        self.dh = anp.ones([self.N_cubes], dtype=self.dtype, like=backend) * 1.0 / self.N_cubes
 
         # current index counts as floating point numbers
         self.strat_counts = anp.zeros([self.N_cubes], dtype=self.dtype, like=backend)
@@ -81,8 +77,7 @@ def update_DH(self):
         # EQ 42
         V2 = self.V_cubes * self.V_cubes
         d_tmp = (
-            V2 * self.JF2 / self.strat_counts
-            - (self.V_cubes * self.JF / self.strat_counts) ** 2
+            V2 * self.JF2 / self.strat_counts - (self.V_cubes * self.JF / self.strat_counts) ** 2
         )
         # Sometimes rounding errors produce negative values very close to 0
         d_tmp[d_tmp < 0.0] = 0.0
@@ -162,9 +157,7 @@ def get_Y(self, nevals):
         # Convert the positions to float, add random offsets to them and scale
         # the result so that each point is in [0, 1)^dim
         positions = astype(positions, self.dtype)
-        random_uni = self.rng.uniform(
-            size=[positions.shape[0], self.dim], dtype=self.dtype
-        )
+        random_uni = self.rng.uniform(size=[positions.shape[0], self.dim], dtype=self.dtype)
         positions = (positions + random_uni) / self.N_strat
         # Due to rounding errors points are sometimes 1.0; replace them with
         # a value close to 1
diff --git a/torchquad/plots/plot_convergence.py b/torchquad/plots/plot_convergence.py
deleted file mode 100644
index 04c1a9c8..00000000
--- a/torchquad/plots/plot_convergence.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import matplotlib.pyplot as plt
-import numpy as np
-
-
-def plot_convergence(evals, fvals, ground_truth, labels, dpi=150):
-    """Plots errors vs. function evaluations (fevals) and shows the convergence rate.
-
-    Args:
-        evals (list of np.array): Number of evaluations, for each method a np.array of ints.
-        fvals (list of np.array): Function values for evals.
-        ground_truth (np.array): Ground truth values.
-        labels (list): Method names.
-        dpi (int, optional): Plot dpi. Defaults to 150.
-    """
-    plt.figure(dpi=dpi)
-    for evals_item, f_item, label in zip(evals, fvals, labels):
-        evals_item = np.array(evals_item)
-        abs_err = np.abs(np.asarray(f_item) - np.asarray(ground_truth))
-        abs_err_delta = np.mean(np.abs((abs_err[:-1]) / (abs_err[1:] + 1e-16)))
-        label = label + "\nConvergence Rate: " + str.format("{:.2e}", abs_err_delta)
-        plt.semilogy(evals_item, abs_err, label=label)
-
-    plt.legend(fontsize=6)
-    plt.xlabel("# of function evaluations")
-    plt.ylabel("Absolute error")
diff --git a/torchquad/plots/plot_runtime.py b/torchquad/plots/plot_runtime.py
deleted file mode 100644
index 141ce5ad..00000000
--- a/torchquad/plots/plot_runtime.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import matplotlib.pyplot as plt
-
-
-def plot_runtime(evals, runtime, labels, dpi=150, y_axis_name="Runtime [s]"):
-    """Plots the runtime vs. function evaluations (fevals).
-
-    Args:
-        evals (list of np.array): Number of evaluations, for each method a np.array of fevals.
-        runtime (list of np.array): Runtime for evals.
-        labels (list): Method names.
-        dpi (int, optional): Plot dpi. Defaults to 150.
-        y_axis_name (str, optional): Name for y axis. Deafults to "Runtime [s]".
-    """
-    plt.figure(dpi=dpi)
-    for evals_item, rt, label in zip(evals, runtime, labels):
-        plt.semilogy(evals_item, rt, label=label)
-    plt.legend(fontsize=6)
-    plt.xlabel("Number of evaluations")
-    plt.ylabel(y_axis_name)
diff --git a/torchquad/tests/rng_test.py b/torchquad/tests/rng_test.py
deleted file mode 100644
index df8dd286..00000000
--- a/torchquad/tests/rng_test.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import sys
-
-sys.path.append("../")
-
-from autoray import numpy as anp
-from autoray import infer_backend, get_dtype_name, to_backend_dtype, to_numpy
-
-from integration.rng import RNG
-
-from helper_functions import setup_test_for_backend
-
-
-def _run_RNG_tests(backend, dtype_name):
-    """
-    Test the random number generator with the given numerical backend
-    * With the same seed, the same numbers should be generated
-    * With different seeds, different numbers should be generated
-    * If seed is None / omitted, the RNG should be randomly seeded
-    """
-    backend_dtype = to_backend_dtype(dtype_name, like=backend)
-    size = [3, 9]
-    generateds = [
-        RNG(backend, 547).uniform(size=size, dtype=backend_dtype),
-        RNG(backend, None).uniform(size=size, dtype=backend_dtype),
-        RNG(backend, 547).uniform(size=size, dtype=backend_dtype),
-        RNG(backend).uniform(size=size, dtype=backend_dtype),
-        RNG(backend, 42).uniform(size=size, dtype=backend_dtype),
-    ]
-    numpy_arrs = list(map(to_numpy, generateds))
-
-    # Validity of the backend, dtype, shape and values range
-    assert all(infer_backend(arr) == backend for arr in generateds)
-    assert all(get_dtype_name(arr) == dtype_name for arr in generateds)
-    assert all(arr.shape == (3, 9) for arr in generateds)
-    assert all(0.0 <= x <= 1.0 for arr in numpy_arrs for x in arr.ravel())
-
-    # Test if the seed argument leads to consistent results and
-    # if omitting a seed leads to random numbers
-    assert anp.array_equal(numpy_arrs[0], numpy_arrs[2])
-    for i1 in range(len(generateds)):
-        for i2 in range(i1 + 1, len(generateds)):
-            if i1 == 0 and i2 == 2:
-                continue
-            # With a very low probability this may fail
-            assert not anp.array_equal(numpy_arrs[i1], numpy_arrs[i2])
-
-
-test_rng_jax_f32 = setup_test_for_backend(_run_RNG_tests, "jax", "float32")
-test_rng_jax_f64 = setup_test_for_backend(_run_RNG_tests, "jax", "float64")
-test_rng_numpy_f32 = setup_test_for_backend(_run_RNG_tests, "numpy", "float32")
-test_rng_numpy_f64 = setup_test_for_backend(_run_RNG_tests, "numpy", "float64")
-test_rng_torch_f32 = setup_test_for_backend(_run_RNG_tests, "torch", "float32")
-test_rng_torch_f64 = setup_test_for_backend(_run_RNG_tests, "torch", "float64")
-test_rng_tensorflow_f32 = setup_test_for_backend(
-    _run_RNG_tests, "tensorflow", "float32"
-)
-test_rng_tensorflow_f64 = setup_test_for_backend(
-    _run_RNG_tests, "tensorflow", "float64"
-)
-
-
-if __name__ == "__main__":
-    # used to run this test individually
-    test_rng_numpy_f32()
-    test_rng_numpy_f64()
-    test_rng_torch_f32()
-    test_rng_torch_f64()
-    test_rng_jax_f32()
-    test_rng_jax_f64()
-    test_rng_tensorflow_f32()
-    test_rng_tensorflow_f64()
diff --git a/torchquad/utils/__init__.py b/torchquad/utils/__init__.py
new file mode 100644
index 00000000..af25f783
--- /dev/null
+++ b/torchquad/utils/__init__.py
@@ -0,0 +1 @@
+# Utilities package
diff --git a/torchquad/utils/deployment_test.py b/torchquad/utils/deployment_test.py
index ef0bc885..fb5051c8 100644
--- a/torchquad/utils/deployment_test.py
+++ b/torchquad/utils/deployment_test.py
@@ -7,56 +7,312 @@
 from torchquad import enable_cuda
 from torchquad import set_precision
 from torchquad import set_log_level
+from torchquad import set_up_backend
 from loguru import logger
+import warnings
 
 
 def _deployment_test():
-    """This method is used to check successful deployment of torch.
-    It should not be used by users. We use it internally to check
-    successful deployment of torchquad.
-    """
-    """[summary]
-    """
-    import torch
+    """Comprehensive test to verify successful deployment of torchquad.
 
+    This method is used internally to check successful deployment after PyPI releases.
+    It verifies:
+    - Basic imports work
+    - All integrators can be initialized
+    - Integration methods work with different backends
+    - GPU functionality (if available)
+    - Precision settings
+    - Error handling
+    - Results are reasonable
+    """
     set_log_level("INFO")
+
+    # Suppress common warnings to reduce noise
+    warnings.filterwarnings("ignore", message="torch.meshgrid: in an upcoming release")
+    warnings.filterwarnings(
+        "ignore", message="DEPRECATION WARNING: In future versions of torchquad"
+    )
+
     logger.info("####################################")
     logger.info("######## TESTING DEPLOYMENT ########")
     logger.info("####################################")
     logger.info("")
 
-    logger.info("Testing CUDA init... ")
-    # Test inititialization on GPUs if available
-    enable_cuda()
-    set_precision("double")
-    logger.info("Done.")
+    # Test 1: Basic imports and version info
+    logger.info("Testing imports and version info...")
+    try:
+        import torchquad
 
-    logger.info("")
-    logger.info("####################################")
+        logger.info(f"torchquad version: {getattr(torchquad, '__version__', 'unknown')}")
+        logger.info("✓ Basic imports successful")
+    except Exception as e:
+        logger.error(f"✗ Import failed: {e}")
+        return False
 
-    logger.info("Initializing integrators... ")
-    tp = Trapezoid()
-    sp = Simpson()
-    boole = Boole()
-    mc = MonteCarlo()
-    vegas = VEGAS()
-    logger.info("Done.")
+    # Test 2: Backend availability
+    logger.info("\nTesting available backends...")
+    available_backends = []
+    for backend in ["torch", "numpy", "jax", "tensorflow"]:
+        try:
+            set_up_backend(backend, "float32")
+            available_backends.append(backend)
+            logger.info(f"✓ {backend} backend available")
+        except Exception:
+            logger.info(f"- {backend} backend not available (expected if not installed)")
 
-    def some_test_function(x):
-        return torch.exp(x) * torch.pow(x, 2)
+    if not available_backends:
+        logger.error("✗ No backends available!")
+        return False
 
-    logger.info("")
-    logger.info("####################################")
+    # Test 3: CUDA and precision settings
+    logger.info("\nTesting CUDA and precision settings...")
+    try:
+        enable_cuda()
+        set_precision("double")
+        logger.info("✓ CUDA and precision settings configured")
+    except Exception as e:
+        logger.info(f"- CUDA configuration warning: {e}")
 
-    logger.info("Testing integrate functions... ")
-    tp.integrate(some_test_function, dim=1, N=101)
-    sp.integrate(some_test_function, dim=1, N=101)
-    boole.integrate(some_test_function, dim=1, N=101)
-    mc.integrate(some_test_function, dim=1, N=101)
-    vegas.integrate(some_test_function, dim=1, N=300)
-    logger.info("Done.")
-    logger.info("")
+    # Test 4: Integrator initialization
+    logger.info("\nInitializing integrators...")
+    integrators = {}
+    try:
+        integrators["trapezoid"] = Trapezoid()
+        integrators["simpson"] = Simpson()
+        integrators["boole"] = Boole()
+        integrators["monte_carlo"] = MonteCarlo()
+        integrators["vegas"] = VEGAS()
+        logger.info("✓ All integrators initialized successfully")
+    except Exception as e:
+        logger.error(f"✗ Integrator initialization failed: {e}")
+        return False
 
+    # Test 5: Integration with multiple backends and functions
+    logger.info("\nTesting integration functions...")
+
+    test_domain = [[0, 2]]
+    success_count = 0
+    total_tests = 0
+
+    for backend in available_backends:
+        logger.info(f"\n  Testing {backend} backend...")
+        backend_success = 0
+        backend_total = 0
+
+        try:
+            set_up_backend(backend, "float32")
+
+            # Simple vectorized test functions
+            def simple_polynomial(x):
+                # Vectorized: x^2 + 1, expected ~4.33 for [0,2]
+                return x[:, 0] ** 2 + 1
+
+            def simple_constant(x):
+                # Vectorized: constant function = 2, expected 4.0 for [0,2]
+                backend_type = _infer_backend_from_tensor(x)
+                if backend_type == "torch":
+                    import torch
+
+                    return torch.ones(x.shape[0]) * 2
+                else:
+                    from autoray import numpy as anp
+
+                    return anp.ones(x.shape[0], like=x, dtype=x.dtype) * 2
+
+            test_functions = {
+                "polynomial": (simple_polynomial, 4.33),  # approximate expected result
+                "constant": (simple_constant, 4.0),  # exact expected result
+            }
+
+            # Test each integrator with different functions
+            for integrator_name, integrator in integrators.items():
+                for func_name, (func, expected) in test_functions.items():
+                    backend_total += 1
+                    total_tests += 1
+                    try:
+                        if integrator_name == "vegas":
+                            result = integrator.integrate(
+                                func, dim=1, N=1000, integration_domain=test_domain
+                            )
+                        else:
+                            result = integrator.integrate(
+                                func, dim=1, N=101, integration_domain=test_domain
+                            )
+
+                        # Convert result to float for comparison
+                        if hasattr(result, "item"):
+                            result_val = float(result.item())
+                        else:
+                            result_val = float(result)
+
+                        # Check if result is reasonable (within 50% of expected for simple test)
+                        if (
+                            _is_finite_result(result_val)
+                            and abs(result_val - expected) < expected * 0.5
+                        ):
+                            logger.debug(
+                                f"  ✓ {integrator_name} with {func_name}: {result_val:.3f} (expected ~{expected})"
+                            )
+                            backend_success += 1
+                            success_count += 1
+                        else:
+                            logger.warning(
+                                f"  - {integrator_name} with {func_name}: unreasonable result {result_val:.3f} (expected ~{expected})"
+                            )
+
+                    except Exception as e:
+                        logger.warning(
+                            f"  - {integrator_name} with {func_name} failed: {str(e)[:100]}..."
+                        )
+
+            logger.info(f"  ✓ {backend} backend: {backend_success}/{backend_total} tests passed")
+
+        except Exception as e:
+            logger.warning(f"  - {backend} backend setup failed: {e}")
+
+    # Check if enough tests passed
+    if total_tests == 0:
+        logger.error("✗ No integration tests could be run!")
+        return False
+    elif success_count < total_tests * 0.5:  # At least 50% should pass
+        logger.error(f"✗ Only {success_count}/{total_tests} integration tests passed")
+        return False
+    else:
+        logger.info(f"✓ Integration tests: {success_count}/{total_tests} passed")
+
+    # Test 6: Error handling
+    logger.info("\nTesting error handling...")
+    try:
+        tp = Trapezoid()
+        # This should handle the error gracefully
+        try:
+            tp.integrate(lambda x: x / 0, dim=1, N=101)  # Division by zero
+            logger.info("✓ Error handling works (or function avoided error)")
+        except Exception:
+            logger.info("✓ Error handling works (caught expected error)")
+    except Exception as e:
+        logger.warning(f"- Error handling test inconclusive: {e}")
+
+    # Test 7: Multi-dimensional integration
+    logger.info("\nTesting multi-dimensional integration...")
+    multi_dim_success = False
+    try:
+        mc = MonteCarlo()
+
+        def multi_dim_func(x):
+            # Vectorized 2D function: sum of squares, expected result ~ 2/3 for [0,1]x[0,1]
+            backend = _infer_backend_from_tensor(x)
+            if backend == "torch":
+                import torch
+
+                return torch.sum(x**2, dim=1)
+            else:
+                from autoray import numpy as anp
+
+                return anp.sum(x**2, axis=1)
+
+        result = mc.integrate(multi_dim_func, dim=2, N=1000, integration_domain=[[0, 1], [0, 1]])
+        if hasattr(result, "item"):
+            result_val = float(result.item())
+        else:
+            result_val = float(result)
+
+        # Expected result is 2/3 ≈ 0.667 for integral of x^2 + y^2 over [0,1]x[0,1]
+        if (
+            _is_finite_result(result_val) and 0.2 < result_val < 1.2
+        ):  # Reasonable range for Monte Carlo
+            logger.info(f"✓ Multi-dimensional integration: {result_val:.3f} (expected ~0.67)")
+            multi_dim_success = True
+        else:
+            logger.warning(f"- Multi-dimensional integration: unreasonable result {result_val:.3f}")
+    except Exception as e:
+        logger.warning(f"- Multi-dimensional integration failed: {str(e)[:100]}...")
+
+    if not multi_dim_success:
+        logger.warning("- Multi-dimensional integration test failed")
+
+    logger.info("\n####################################")
+    logger.info("############ ALL DONE #############")
     logger.info("####################################")
-    logger.info("############ ALL DONE. #############")
-    logger.info("####################################")
+
+    # Final assessment - only return True if critical tests passed
+    critical_failures = []
+
+    if not available_backends:
+        critical_failures.append("No backends available")
+
+    if success_count < total_tests * 0.5:
+        critical_failures.append(f"Integration tests: only {success_count}/{total_tests} passed")
+
+    if not multi_dim_success:
+        critical_failures.append("Multi-dimensional integration failed")
+
+    if critical_failures:
+        logger.error("✗ Deployment test FAILED with critical issues:")
+        for failure in critical_failures:
+            logger.error(f"  - {failure}")
+        return False
+    else:
+        logger.info("✓ Deployment test completed successfully!")
+        return True
+
+
+def _get_exp_func(x):
+    """Get exponential function for current backend"""
+    backend = _infer_backend_from_tensor(x)
+    if backend == "torch":
+        import torch
+
+        return torch.exp(x[0])
+    else:
+        from autoray import numpy as anp
+
+        return anp.exp(x[0])
+
+
+def _get_sin_func(x):
+    """Get sine function for current backend"""
+    backend = _infer_backend_from_tensor(x)
+    if backend == "torch":
+        import torch
+
+        return torch.sin(x[0])
+    else:
+        from autoray import numpy as anp
+
+        return anp.sin(x[0])
+
+
+def _infer_backend_from_tensor(x):
+    """Infer backend from tensor type"""
+    try:
+        from autoray import infer_backend
+
+        return infer_backend(x)
+    except Exception:
+        # Fallback method
+        if hasattr(x, "numpy"):  # PyTorch tensor
+            return "torch"
+        elif str(type(x)).find("jax") != -1:
+            return "jax"
+        elif str(type(x)).find("tensorflow") != -1:
+            return "tensorflow"
+        else:
+            return "numpy"
+
+
+def _is_finite_result(result):
+    """Check if result is finite"""
+    try:
+        from autoray import numpy as anp
+
+        result_np = anp.asarray(result)
+        return anp.isfinite(result_np).all()
+    except Exception:
+        # Fallback for basic types
+        try:
+            import math
+
+            return math.isfinite(float(result))
+        except Exception:
+            return False
diff --git a/torchquad/utils/enable_cuda.py b/torchquad/utils/enable_cuda.py
index 6174459f..ac12beca 100644
--- a/torchquad/utils/enable_cuda.py
+++ b/torchquad/utils/enable_cuda.py
@@ -1,4 +1,5 @@
 from loguru import logger
+import warnings
 
 from .set_precision import set_precision
 
@@ -20,6 +21,6 @@ def enable_cuda(data_type="float32"):
         if data_type is not None:
             set_precision(data_type)
     else:
-        logger.warning(
-            "Error enabling CUDA. cuda.is_available() returned False. CPU will be used."
-        )
+        warning_msg = "Error enabling CUDA. cuda.is_available() returned False. CPU will be used."
+        logger.warning(warning_msg)
+        warnings.warn(warning_msg, RuntimeWarning)
diff --git a/torchquad/utils/set_precision.py b/torchquad/utils/set_precision.py
index b551aee6..c3e98bc1 100644
--- a/torchquad/utils/set_precision.py
+++ b/torchquad/utils/set_precision.py
@@ -1,5 +1,6 @@
 from loguru import logger
 import os
+import sys
 
 
 def _get_precision(backend):
@@ -29,32 +30,44 @@ def set_precision(data_type="float32", backend="torch"):
     """
     # Backwards-compatibility: allow "float" and "double", optionally with
     # upper-case letters
-    data_type = {"float": "float32", "double": "float64"}.get(
-        data_type.lower(), data_type
-    )
+    data_type = {"float": "float32", "double": "float64"}.get(data_type.lower(), data_type)
     if data_type not in ["float32", "float64"]:
-        logger.error(
-            f'Invalid data type "{data_type}". Only float32 and float64 are supported. Setting the data type to float32.'
-        )
+        error_msg = f'Invalid data type "{data_type}". Only float32 and float64 are supported. Setting the data type to float32.'
+        logger.error(error_msg)
+        print(f"ERROR: {error_msg}", file=sys.stderr)
         data_type = "float32"
 
     if backend == "torch":
         import torch
 
-        cuda_enabled = torch.cuda.is_initialized()
-        tensor_dtype, tensor_dtype_name = {
-            ("float32", True): (torch.cuda.FloatTensor, "cuda.Float32"),
-            ("float64", True): (torch.cuda.DoubleTensor, "cuda.Float64"),
-            ("float32", False): (torch.FloatTensor, "Float32"),
-            ("float64", False): (torch.DoubleTensor, "Float64"),
-        }[(data_type, cuda_enabled)]
-        cuda_enabled_info = (
-            "CUDA is initialized" if cuda_enabled else "CUDA not initialized"
-        )
-        logger.info(
-            f"Setting Torch's default tensor type to {tensor_dtype_name} ({cuda_enabled_info})."
-        )
-        torch.set_default_tensor_type(tensor_dtype)
+        # Use new PyTorch 2.1+ API if available, fallback to legacy for older versions
+        if hasattr(torch, "set_default_dtype"):
+            # Modern PyTorch 2.1+ approach
+            dtype_map = {"float32": torch.float32, "float64": torch.float64}
+            torch.set_default_dtype(dtype_map[data_type])
+
+            # Only set default device to CUDA if CUDA was already initialized
+            # (matching the old behavior more closely)
+            cuda_enabled = torch.cuda.is_initialized()
+            if cuda_enabled:
+                torch.set_default_device("cuda")
+                logger.info(f"Setting Torch's default dtype to {data_type} and device to CUDA.")
+            else:
+                logger.info(f"Setting Torch's default dtype to {data_type} (CPU).")
+        else:
+            # Legacy approach for older PyTorch versions
+            cuda_enabled = torch.cuda.is_initialized()
+            tensor_dtype, tensor_dtype_name = {
+                ("float32", True): (torch.cuda.FloatTensor, "cuda.Float32"),
+                ("float64", True): (torch.cuda.DoubleTensor, "cuda.Float64"),
+                ("float32", False): (torch.FloatTensor, "Float32"),
+                ("float64", False): (torch.DoubleTensor, "Float64"),
+            }[(data_type, cuda_enabled)]
+            cuda_enabled_info = "CUDA is initialized" if cuda_enabled else "CUDA not initialized"
+            logger.info(
+                f"Setting Torch's default tensor type to {tensor_dtype_name} ({cuda_enabled_info})."
+            )
+            torch.set_default_tensor_type(tensor_dtype)
     elif backend == "jax":
         from jax import config
 
@@ -71,4 +84,6 @@ def set_precision(data_type="float32", backend="torch"):
         os.environ["TORCHQUAD_DTYPE_NUMPY"] = data_type
         logger.info(f"NumPy default dtype set to {_get_precision('numpy')}")
     else:
-        logger.error(f"Changing the data type is not supported for backend {backend}")
+        error_msg = f"Changing the data type is not supported for backend {backend}"
+        logger.error(error_msg)
+        print(f"ERROR: {error_msg}", file=sys.stderr)