weblyzard · AlbertWeichselbraun · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ]
+        python-version: [ '3.10', '3.11', '3.12', '3.13' ]
 
     steps:
     - uses: actions/checkout@v3
@@ -22,7 +22,8 @@ jobs:
     - name: Install build environment
       run: |
         python -m pip install --upgrade pip
-        python -m pip install tox setuptools pytest pytest-cov codecov
-    - name: Build and test with tox.
+        python -m pip install uv
+    - name: Build and test with uv.
       run: |
-        tox -vv -e flake8
+        uv run ruff check
+        uv build
diff --git a/benchmarking/run_benchmarking.py b/benchmarking/run_benchmarking.py
@@ -1,8 +1,5 @@
 #!/usr/bin/env python3
-"""
-Runs a benchmarking suite to compare speed
-and output of different implementations.
-"""
+"""Run a benchmarking suite to compare speed and output of different implementations."""
 
 import argparse
 import operator
@@ -53,23 +50,16 @@
 
 
 class AbstractHtmlConverter:
-    """
-    An abstract HTML convert class.
-    """
+    """An abstract HTML convert class."""
 
     def get_text(self, html):
-        """
-        Returns:
-            a text representation of the given HTML snippet.
-        """
+        """Return a text representation of the given HTML snippet."""
         raise NotImplementedError
 
     def benchmark(self, html):
-        """
-        Benchmarks the classes HTML to text converter.
+        """Benchmarks the classes HTML to text converter.
 
-        Returns:
-            A tuple of the required time and the obtained text representation.
+        Return a tuple of the required time and the obtained text representation.
         """
         start_time = time()
         for _ in range(TRIES):
@@ -78,9 +68,7 @@ def benchmark(self, html):
 
 
 class BeautifulSoupHtmlConverter(AbstractHtmlConverter):
-    """
-    Converts HTML to text using BeautifulSoup.
-    """
+    """Converts HTML to text using BeautifulSoup."""
 
     name = "BeautifulSoup"
 
@@ -100,9 +88,7 @@ def get_text(self, html):
 
 
 class JustextConverter(AbstractHtmlConverter):
-    """
-    Converts HTML to text using Justtext.
-    """
+    """Converts HTML to text using Justtext."""
 
     name = "Justtext"
 
@@ -116,9 +102,7 @@ def get_text(self, html):
 
 
 class Html2TextConverter(AbstractHtmlConverter):
-    """
-    Converts HTML to text using Html2Text.
-    """
+    """Converts HTML to text using Html2Text."""
 
     name = "Html2Text"
 
@@ -133,9 +117,7 @@ def get_text(self, html):
 
 
 class LynxConverter(AbstractHtmlConverter):
-    """
-    Converts HTML to text using lynx.
-    """
+    """Converts HTML to text using lynx."""
 
     name = "Lynx"
 
@@ -166,9 +148,7 @@ def kill_lynx(pid):
 
 
 class LinksConverter(AbstractHtmlConverter):
-    """
-    Converts HTML to text using links.
-    """
+    """Converts HTML to text using links."""
 
     name = "Links"
 
@@ -199,9 +179,7 @@ def kill_links(pid):
 
 
 class InscriptisHtmlConverter(AbstractHtmlConverter):
-    """
-    Converts HTML to text using Inscriptis.
-    """
+    """Converts HTML to text using Inscriptis."""
 
     name = "Inscriptis"
 
@@ -217,18 +195,14 @@ def __init__(self):
 
 
 def save_to_file(algorithm, url, data, benchmarking_results_dir):
-    """
-    Saves a benchmarking result to the given file.
-    """
+    """Save the benchmarking result to the given file."""
     result_file = os.path.join(benchmarking_results_dir, f"{algorithm}_{url}.txt")
     with open(result_file, "w") as output_file:
         output_file.write(data)
 
 
 def get_speed_table(times):
-    """
-    Provides the table which compares the conversion speed.
-    """
+    """Provide the table which compares the conversion speed."""
     fastest = min((value for _, value in times.items()))
     longest_key = max(len(key) for key, _ in times.items())
     longest_value = max(len(str(value)) for _, value in times.items())
@@ -251,9 +225,7 @@ def get_speed_table(times):
 
 
 def get_fname(url) -> str:
-    """
-    Transforms a URL to a file name.
-    """
+    """Transform a URL to a file name."""
     trash = (("http://", ""), ("https://", ""), ("/", "-"), (":", "-"), ("%", ""))
 
     for key, value in trash:
@@ -272,9 +244,7 @@ def get_fname(url) -> str:
 
 
 def parse_args():
-    """
-    Parse optional benchmarking arguments.
-    """
+    """Parse optional benchmarking arguments."""
     parser = argparse.ArgumentParser(description="Inscriptis benchmarking suite")
     parser.add_argument(
         "converter",
@@ -306,11 +276,11 @@ def parse_args():
 
 
 def _setup_benchmarking_directories(args):
-    """
-    Setup the benchmarking result and caching directories.
+    """Set up the benchmarking result and caching directories.
 
     Args:
         args: command line arguments that provide the directory names.
+
     """
     if not os.path.exists(args.benchmarking_results):
         os.makedirs(args.benchmarking_results)
@@ -319,16 +289,17 @@ def _setup_benchmarking_directories(args):
 
 
 def _fetch_url(url, cache_dir):
-    """
-    Fetch the given URL either from the cache or from the Web.
+    """Fetch the given URL either from the cache or from the Web.
 
     URLs that are not yet cached are added to the cache.
 
     Args:
         url: the URL to fetch.
+        cache_dir: the cache directory.
 
     Returns:
         A tuple of the cache file name and the URLs content.
+
     """
     source_name = get_fname(url)
     source_cache_path = os.path.join(cache_dir, source_name)
@@ -349,14 +320,13 @@ def _fetch_url(url, cache_dir):
 
 
 def benchmark(args, source_list):
-    """
-    Run the benchmark.
+    """Run the benchmark.
 
     Args:
         args: command line arguments
         source_list: a list of URLs to benchmark.
-    """
 
+    """
     _setup_benchmarking_directories(args)
 
     output = []

diff --git a/examples/custom-html-handling.py b/examples/custom-html-handling.py
@@ -1,13 +1,13 @@
 #!/usr/bin/env python3
 
-"""
-Custom HTML tag handling example.
+"""Custom HTML tag handling example.
 
 Add a custom HTML handler for the bold <b> tag which encloses
 bold text with "**".
 
 Example:
     "Welcome to <b>Chur</b>" is rendered as "Welcome to **Chur**".
+
 """
 
 from lxml.html import fromstring

diff --git a/publish.sh b/publish.sh
@@ -19,8 +19,9 @@ case "$1" in
 		# cleanup dist
 		rm -rf ./dist
 
-		# build and publish packages
-		poetry publish --build
+		# build with hatchling and publish to PyPI
+		uv build
+		uv publish
 		;;
 	docker)
 		echo "Publishing ${IMAGE_NAME} in version ${VERSION}"

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ version = "2.7.0"
 description = "inscriptis - HTML to text converter."
 license = "Apache-2.0"
 readme = "README.rst"
-requires-python = ">=3.9,<3.14"
+requires-python = ">=3.10,<3.15"
 
 authors = [
         { name = "Albert Weichselbraun", email = "albert.weichselbraun@fhgr.ch" },
@@ -20,12 +20,13 @@ classifiers = [
         "Topic :: Text Processing :: Markup :: HTML",
         "Topic :: Utilities",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
         "Programming Language :: Python :: 3.12",
         "Programming Language :: Python :: 3.13",
+        "Programming Language :: Python :: 3.14",        
     ]
+
 
 dependencies = [
   "requests>=2.32.3,<3.0.0",
@@ -49,14 +50,15 @@ web-service = [
 
 [dependency-groups]
 dev = [
-    "pytest>=8.3.5",
+    "pytest>=9.0.1",
     "fastapi>=0.115.11,<1.0.0",
-    "ruff>=0.11.12",
+    "ruff>=0.14.5",
     "httpx>=0.28.1",
-    "uvicorn>=0.34.2",
-    "ty>=0.0.1a7",
-    "pytest-cov>=6.1.1",
-    "safety>=3.5.1",
+    "uvicorn>=0.38.0",
+    "ty>=0.0.1a26",
+    "pytest-cov>=7.0.0",
+    "safety>=3.7.0",
+    "tox>=4.23.0",
 ]
 [build-system]
 requires = ["hatchling"]
@@ -77,32 +79,56 @@ quote-style = "double"
 
 [tool.ruff.lint]
 select = [
-    # pycodestyle
-    "E",
-    # Pyflakes
-    "F",
-    # pyupgrade
-    "UP",
-    # flake8-builtins
-    "A",
-    # flake8-bugbear
-    "B",
-    # flake8-comprehensions
-    "C4",
-    # flake8-errmsg
-    "EM",
-    # flake8-quotes
-    "Q",
-    # flake8-pyi
-    "PYI",
-    # flake8-simplify
-    "SIM",
-    # isort
-    "I",
-    "RSE", "RET", "SLOT", "TID", "TC", "C90", "N", "PERF", "E", "W", 
-    "UP", "FURB", "RUF", "TRY", "YTT"
+    "A",      # flake8-builtins
+    "B",      # flake8-bugbear
+    "COM",    # flake8-commas - trailing commas
+    "BLE",    # flake8-blind-except - avoid bare except
+    "D",      # flake8-docstrings
+    "C4",     # flake8-comprehensions
+    "E",      # pycodestyle
+    "EM",     # flake8-errmsg
+    "F",      # Pyflakes
+    "FA",     # flake8-future-annotations - use modern annotations
+    "ICN",    # flake8-import-conventions - standard import aliases
+    "PIE",    # flake8-pie
+    "PLE",    # pylint equivalents
+    "PLW",    # pylint equivalents
+    "PTH",    # flake8-use-pathlib - prefer pathlib over os.path
+    "PYI",    # flake8-pyi
+    "Q",      # flake8-quotes
+    "N",      # flake8-naming
+    "SIM",    # flake8-simplify
+    "I",      # isort
+    "RET",    # flake8-return
+    "RSE", "SLOT", "TID", "TC", "C90", "PERF", "E", "W", 
+    "FURB", "RUF", "TRY", "YTT",
-    "RSE", "SLOT", "TID", "TC", "C90", "PERF", "E", "W", 
-    "FURB", "RUF", "TRY", "YTT",
+    "RSE", "SLOT", "TID", "TC", "C90", "PERF", "E", "W"
+    , "FURB", "RUF", "TRY", "YTT",
-    "RSE", "SLOT", "TID", "TC", "C90", "PERF", "E", "W", 
-    "FURB", "RUF", "TRY", "YTT",
+    "RSE", "SLOT", "TID", "TC", "C90", "PERF", "E", "W"
+    , "FURB", "RUF", "TRY", "YTT",
+    "TCH",    # flake8-type-checking - optimize type checking imports
+    "S",      # flake8-bandit (security) — replaces dlint/bandit
+    "UP",     # pyupgrade
 ]
 
+ignore = [
+    "D102",   # missing docstring in public method
+    "D105",   # missing docstring in magic method
+    "D107",   # missing docstring in __init__
+    "D203",   # incorrect-blank-line-before-class
+    "D213",   # multi-line-summary-second-line
+]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/**/*.py" = [
+  "S101",   # allow asserts
+  "D",      # no dockstring checks
+  "S310",   # allow URLs
+  "PTH",    # prefer pathlib
+]
+"benchmarking/*.py" = [
+  "S310",   # allow URLs
+  "S603",   # call: check for execution of untrusted input
+  "PTH",    # prefer pathlib  
+] 
+
+
 [tool.ty.src]
 root="./src"
 

diff --git a/src/inscriptis/__init__.py b/src/inscriptis/__init__.py
@@ -83,6 +83,7 @@ def _get_html_tree(html_content: str) -> HtmlElement | None:
 
     Returns:
         The corresponding HTML parse tree.
+
     """
     html_content = html_content.strip()
     if not html_content:
@@ -107,6 +108,7 @@ def get_text(html_content: str, config: ParserConfig | None = None) -> str:
 
     Returns:
       The text representation of the HTML content.
+
     """
     html_tree = _get_html_tree(html_content)
     return Inscriptis(html_tree, config).get_text() if html_tree is not None else ""
@@ -128,6 +130,7 @@ def get_annotated_text(html_content: str, config: ParserConfig | None = None) ->
 
     Returns:
         A dictionary of text (key: 'text') and annotations (key: 'label')
+
     """
     html_tree = _get_html_tree(html_content)
     if html_tree is None: