diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..cf1a529
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,74 @@
+name: docs
+
+on:
+ push:
+ branches:
+ - main
+ paths:
+ - 'docs/**'
+ - '.github/workflows/docs.yml'
+ pull_request:
+ paths:
+ - 'docs/**'
+ - '.github/workflows/docs.yml'
+ workflow_dispatch:
+
+permissions:
+ contents: read
+
+concurrency:
+ group: pages-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ pr-build-check:
+ name: Docs build check (PR)
+ if: github.event_name == 'pull_request'
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Code
+ uses: actions/checkout@v4
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.x'
+ - name: Install zensical and plugins
+ run: pip install -r requirements.txt
+ working-directory: ./docs
+ - name: Build site
+ run: zensical build --clean
+ working-directory: ./docs
+
+ deploy:
+ name: Deploy docs
+ if: github.event_name != 'pull_request'
+ permissions:
+ pages: write
+ id-token: write
+ contents: read
+ environment:
+ name: github-pages
+ url: ${{ steps.deployment.outputs.page_url }}
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Code
+ uses: actions/checkout@v4
+ - name: Configure Pages
+ uses: actions/configure-pages@v5
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.x'
+ - name: Install zensical and plugins
+ run: pip install -r requirements.txt
+ working-directory: ./docs
+ - name: Build site
+ run: zensical build --clean
+ working-directory: ./docs
+ - name: Upload Pages artifact
+ uses: actions/upload-pages-artifact@v4
+ with:
+ path: docs/site
+ - name: Deploy to GitHub Pages
+ id: deployment
+ uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
index fafaf4d..376e8c5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,4 +44,7 @@ benchmarking/target/
.DS_Store
*.iml
-/bin/
\ No newline at end of file
+/bin/
+
+# Zensical docs build output
+docs/site/
diff --git a/README.md b/README.md
index ba4cf19..667a3ad 100644
--- a/README.md
+++ b/README.md
@@ -15,10 +15,12 @@ its MVCC (MultiVersion Concurrency Control) capability.
com.phonepe
DLM
- 1.0.0
+ ${dlm.version}
```
+> Replace `${dlm.version}` with the latest version from [Maven Central](https://central.sonatype.com/artifact/com.phonepe/DLM) or [GitHub Releases](https://github.com/PhonePe/DLM/releases).
+
### Usage
@@ -26,13 +28,13 @@ its MVCC (MultiVersion Concurrency Control) capability.
##### With Aerospike as lock base
-``` java
+```java
DistributedLockManager lockManager = DistributedLockManager.builder()
.clientId("CLIENT_ID")
.farmId("FA1")
- .lockBase(AerospikeLockBase.builder()
+ .lockBase(LockBase.builder()
.mode(LockMode.EXCLUSIVE)
- .store(AerospikeStore.builder()
+ .lockStore(AerospikeStore.builder()
.aerospikeClient(aerospikeClient)
.namespace("NAMESPACE")
.setSuffix("distributed_lock")
@@ -44,13 +46,13 @@ lockManager.initialize();
##### With HBase as lock base
-``` java
+```java
DistributedLockManager lockManager = DistributedLockManager.builder()
.clientId("CLIENT_ID")
.farmId("FA1")
- .lockBase(HBaseLockBase.builder()
+ .lockBase(LockBase.builder()
.mode(LockMode.EXCLUSIVE)
- .store(HBaseStore.builder()
+ .lockStore(HBaseStore.builder()
.connection(connection) // HBase connection reference
.tableName("table_name")
.build())
@@ -88,40 +90,47 @@ This library offers various methods for acquiring and releasing locks on critica
final Lock lock = lockManager.getLockInstance("LOCK_ID", LockLevel.DC);
try {
lockManager.tryAcquireLock(lock); // Attempts to acquire the lock for the default duration of 90 seconds
- // OR lockManager.tryAcquireLock(lock, 120); // Tries to acquire the lock for 120 seconds
+ // OR lockManager.tryAcquireLock(lock, Duration.ofSeconds(120)); // Tries to acquire the lock for 120 seconds
// Perform actions once the lock is successfully acquired.
-} catch (DLSException e) {
- if (ErrorCode.LOCK_UNAVAILABLE.equals(e.getErrorCode)) {
+} catch (DLMException e) {
+ if (ErrorCode.LOCK_UNAVAILABLE.equals(e.getErrorCode())) {
// Actions to take if the lock can't be acquired.
}
} finally {
// Verify if the lock was released successfully.
- boolean released = lockManager.release(lock);
+ boolean released = lockManager.releaseLock(lock);
}
```
```java
-// Vulnerable entity represented by LOCK_ID
// Representing a vulnerable entity by LOCK_ID
final Lock lock = lockManager.getLockInstance("LOCK_ID", LockLevel.DC);
try {
lockManager.acquireLock(lock); // Attempts to acquire the lock for the default duration of 90 seconds and waits for 90 seconds
- // OR lockManager.acquireLock(lock, 30); // Tries to acquire the lock for 30 seconds, waiting for 90 seconds
- // OR lockManager.acquireLock(lock, 30, 30); // Tries to acquire the lock for 30 seconds, waiting for 30 seconds
+ // OR lockManager.acquireLock(lock, Duration.ofSeconds(30)); // Tries to acquire the lock for 30 seconds, waiting for 90 seconds
+ // OR lockManager.acquireLock(lock, Duration.ofSeconds(30), Duration.ofSeconds(30)); // Tries to acquire the lock for 30 seconds, waiting for 30 seconds
// Perform actions once the lock is successfully acquired.
-} catch (DLSException e) {
- if (ErrorCode.LOCK_UNAVAILABLE.equals(e.getErrorCode)) {
+} catch (DLMException e) {
+ if (ErrorCode.LOCK_UNAVAILABLE.equals(e.getErrorCode())) {
// Actions to take if the lock can't be acquired.
}
} finally {
// Verify if the lock was released successfully.
- boolean released = lockManager.release(lock);
+ boolean released = lockManager.releaseLock(lock);
}
```
+#### Cleanup
+
+When the application shuts down, call `destroy()` to close the underlying store connection:
+
+```java
+lockManager.destroy();
+```
+
#### Lock Levels
* DC - Acquiring/releasing lock within a DC
* XDC - Acquiring/releasing lock across DCs.
@@ -175,3 +184,22 @@ lockManager.initialize();
> **Backward compatibility**: omitting `lockConfiguration(...)` from the builder is fully supported
> and produces identical behaviour to all previous library versions.
+
+## Documentation Site (Zensical)
+
+This repository now includes Zensical-based docs under `docs/`.
+
+- Config: `docs/zensical.toml`
+- Content: `docs/docs/`
+- Python dependencies: `docs/requirements.txt`
+- GitHub Pages workflow: `.github/workflows/docs.yml`
+
+Build docs locally:
+
+```bash
+cd docs
+pip install -r requirements.txt
+zensical build --clean
+```
+
+Generated site output is available at `docs/site`.
diff --git a/docs/docs/getting-started.md b/docs/docs/getting-started.md
new file mode 100644
index 0000000..e990f34
--- /dev/null
+++ b/docs/docs/getting-started.md
@@ -0,0 +1,82 @@
+# Getting Started
+
+## Requirements
+
+- **Java 17** or later.
+- One of the supported storage backends:
+ - An **Aerospike** cluster reachable from application nodes, or
+ - An **HBase** cluster reachable from application nodes.
+
+## Add dependency
+
+```xml
+
+ com.phonepe
+ DLM
+ ${dlm.version}
+
+```
+
+Replace `${dlm.version}` with the latest version from [Maven Central](https://central.sonatype.com/artifact/com.phonepe/DLM) or [GitHub Releases](https://github.com/PhonePe/DLM/releases).
+
+## Build locally
+
+```bash
+git clone https://github.com/PhonePe/DLM.git
+cd DLM
+mvn clean install
+```
+
+To run the tests:
+
+```bash
+mvn clean test
+```
+
+!!! note
+ Some integration tests require Docker (via Testcontainers) for Aerospike.
+ Make sure the Docker daemon is running before executing the full test suite.
+
+## Minimal example
+
+```java
+// 1. Build the lock manager
+DistributedLockManager lockManager = DistributedLockManager.builder()
+ .clientId("order-service")
+ .farmId("dc1")
+ .lockBase(LockBase.builder()
+ .mode(LockMode.EXCLUSIVE)
+ .lockStore(AerospikeStore.builder()
+ .aerospikeClient(aerospikeClient)
+ .namespace("locks")
+ .setSuffix("distributed_lock")
+ .build())
+ .build())
+ .build();
+
+// 2. Initialize (creates tables / validates connectivity)
+lockManager.initialize();
+
+// 3. Acquire → work → release
+Lock lock = lockManager.getLockInstance("order-123", LockLevel.DC);
+try {
+ lockManager.acquireLock(lock);
+ // critical section
+} finally {
+ lockManager.releaseLock(lock);
+}
+
+// 4. Shutdown
+lockManager.destroy();
+```
+
+!!! tip
+ The example above uses all default timing values (90s TTL, 90s wait, 1s retry).
+ To customise these, pass a `LockConfiguration` to the `LockBase` builder.
+ See [Configuring lock timing](usage.md#configuring-lock-timing).
+
+## What's next
+
+- [Usage](usage.md) — initialization, lock timing configuration, API overloads, error handling.
+- [Locking Semantics](locking.md) — defaults, retry behavior, lock levels.
+- [Storage Backends](storages/aerospike.md) — Aerospike and HBase details.
diff --git a/docs/docs/index.md b/docs/docs/index.md
new file mode 100644
index 0000000..e45fd78
--- /dev/null
+++ b/docs/docs/index.md
@@ -0,0 +1,71 @@
+# Distributed Lock Manager
+
+Distributed Lock Manager (DLM) is a lightweight Java library for coordinating lock acquisition and release across multiple application instances in a distributed environment.
+
+## Why DLM?
+
+In service-oriented architectures, concurrent access to shared resources is inevitable. DLM provides a simple, pluggable distributed locking mechanism that protects critical entities for a specified duration — without requiring a dedicated lock server.
+
+## Key features
+
+- **Exclusive locking** — only one holder at a time per lock identity.
+- **Lock levels** — `DC` (single data center) and `XDC` (cross data center).
+- **Pluggable storage backends** — Aerospike and HBase out of the box.
+- **Blocking and non-blocking acquisition** — choose between immediate-fail (`tryAcquireLock`) or wait-with-timeout (`acquireLock`).
+- **Automatic TTL** — every lock has a time-to-live; the lock expires even if the holder crashes.
+- **Configurable lock timing** — tune TTL, wait timeout, and retry interval per `LockBase` via `LockConfiguration`.
+- **Built-in retry** — configurable retry with backoff on transient storage failures.
+
+## How it works
+
+```mermaid
+sequenceDiagram
+ participant App as Application
+ participant DLM as DistributedLockManager
+ participant LB as LockBase
+ participant Store as ILockStore (Aerospike / HBase)
+
+ App->>DLM: getLockInstance("order-123", DC)
+ DLM-->>App: Lock
+
+ App->>DLM: acquireLock(lock, duration, timeout)
+ DLM->>LB: acquireLock(lock, duration, timeout)
+
+ loop Until acquired or timeout
+ LB->>Store: write(lockId, lockLevel, farmId, ttl)
+ alt Lock available
+ Store-->>LB: success
+ LB-->>DLM: acquired
+ else Lock held by another holder
+ Store-->>LB: LOCK_UNAVAILABLE
+ LB->>LB: sleep 1s, retry
+ end
+ end
+
+ DLM-->>App: void (acquired)
+
+ App->>DLM: releaseLock(lock)
+ DLM->>LB: releaseLock(lock)
+ LB->>Store: remove(lockId, lockLevel, farmId)
+ LB-->>DLM: true
+ DLM-->>App: true
+```
+
+### Lock identity scoping
+
+Each lock identity is scoped to a **client**. Internally the lock ID is stored as `clientId#lockId`, so two different clients can independently lock the same logical entity without conflict.
+
+### Lock lifecycle
+
+1. **Initialize** — `lockManager.initialize()` prepares the storage backend (e.g. creates the HBase table).
+2. **Get lock instance** — `lockManager.getLockInstance(id, level)` creates a `Lock` object.
+3. **Acquire** — `tryAcquireLock` / `acquireLock` writes a record to the store with a TTL.
+4. **Release** — `releaseLock` removes the record from the store.
+5. **Destroy** — `lockManager.destroy()` closes the underlying storage connection.
+
+## What to read next
+
+- [Getting Started](getting-started.md) — dependency setup, prerequisites, building locally.
+- [Usage](usage.md) — initialization, acquisition, release, cleanup.
+- [Locking Semantics](locking.md) — API reference, defaults, retry behavior, error codes.
+- [Storage Backends](storages/aerospike.md) — Aerospike and HBase internals.
diff --git a/docs/docs/locking.md b/docs/docs/locking.md
new file mode 100644
index 0000000..10a6edb
--- /dev/null
+++ b/docs/docs/locking.md
@@ -0,0 +1,120 @@
+# Locking Semantics
+
+## Lock levels
+
+DLM supports two lock levels that control the scope of lock isolation:
+
+### `DC` — Data Center
+
+Locks scoped to a single data center. The storage key includes the `farmId`:
+
+```
+DC##
+```
+
+Use `DC` locks when all competing instances are within the same data center. This is the **recommended default** for most workloads.
+
+### `XDC` — Cross Data Center
+
+Locks scoped across data centers. The storage key omits the `farmId`:
+
+```
+XDC#
+```
+
+Use `XDC` when instances in different data centers must coordinate on the same entity.
+
+!!! warning "XDC consistency"
+ Concurrently acquiring an `XDC` lock from multiple data centers may produce unexpected behavior due to storage replication lag. For strong consistency with `XDC` locks:
+
+ - **Aerospike** — use a strong-consistency namespace or a multi-site cluster.
+ - **HBase** — ensure a single-region deployment or synchronous replication.
+
+## Lock modes
+
+Currently, only `LockMode.EXCLUSIVE` is supported — at most one holder per lock identity at any given time.
+
+The `LockMode` parameter is accepted by the builder for forward compatibility. Future versions may introduce shared / limited-protected modes.
+
+## API reference
+
+### `tryAcquireLock` — non-blocking
+
+| Signature | Description |
+|-----------|-------------|
+| `tryAcquireLock(Lock lock)` | Acquire with TTL from `LockConfiguration.lockTtl` (default **90s**). Throws immediately if unavailable. |
+| `tryAcquireLock(Lock lock, Duration duration)` | Acquire with custom TTL. Throws immediately if unavailable. |
+
+Both methods perform a **single** write attempt. If the lock is held, they throw `DLMException` with `ErrorCode.LOCK_UNAVAILABLE` without retrying.
+
+### `acquireLock` — blocking with retry
+
+| Signature | Description |
+|-----------|-------------|
+| `acquireLock(Lock lock)` | TTL from `lockConfiguration.lockTtl`, timeout from `lockConfiguration.waitForLock`. |
+| `acquireLock(Lock lock, Duration duration)` | Custom TTL, timeout from `lockConfiguration.waitForLock`. |
+| `acquireLock(Lock lock, Duration duration, Duration timeout)` | Custom TTL and custom timeout. |
+
+These methods enter a **retry loop**. The sleep interval between retries is always read from `lockConfiguration.sleepBetweenRetries`:
+
+```mermaid
+flowchart TD
+ A[acquireLock called] --> B[Try write to store]
+ B -->|Success| C[Lock acquired ✓]
+ B -->|DLMException| D{Timeout expired?}
+ D -->|Yes| F[Throw DLMException]
+ D -->|No| H{LOCK_UNAVAILABLE?}
+ H -->|Yes| E["Sleep (sleepBetweenRetries)"]
+ E --> B
+ H -->|No| G[Throw DLMException immediately]
+```
+
+### `releaseLock`
+
+```java
+boolean released = lockManager.releaseLock(lock);
+```
+
+| Return value | Meaning |
+|-------------|---------|
+| `true` | Lock was held by this instance and has been removed from the store. |
+| `false` | Lock was not held (`acquiredStatus` was already `false`). No store operation performed. |
+
+### `getLockInstance`
+
+```java
+Lock lock = lockManager.getLockInstance("order-123", LockLevel.DC);
+```
+
+Creates a `Lock` with `lockId` set to `clientId#order-123`. No I/O is performed.
+
+## Defaults
+
+All timing defaults are defined in `LockConfiguration` and can be overridden per `LockBase` instance. See [Configuring lock timing](usage.md#configuring-lock-timing).
+
+| Constant | Value | Description |
+|----------|-------|-------------|
+| `DEFAULT_LOCK_TTL` | `Duration.ofSeconds(90)` | How long the lock record lives in the store before auto-expiring. |
+| `DEFAULT_WAIT_FOR_LOCK` | `Duration.ofSeconds(90)` | Maximum time `acquireLock` will retry before giving up. |
+| `DEFAULT_SLEEP_BETWEEN_RETRIES` | `Duration.ofMillis(1000)` | Sleep interval between retry attempts inside `acquireLock`. |
+
+## Error codes
+
+All errors are surfaced as `DLMException` with one of the following codes:
+
+| Error Code | When it occurs |
+|------------|----------------|
+| `LOCK_UNAVAILABLE` | Lock is currently held by another holder (Aerospike generation conflict / HBase `checkAndMutate` returned `false`). |
+| `CONNECTION_ERROR` | Storage backend is unreachable or returned an I/O error. |
+| `RETRIES_EXHAUSTED` | All retry attempts to the storage backend failed. |
+| `TABLE_CREATION_ERROR` | HBase table creation failed during `initialize()`. |
+| `INTERNAL_ERROR` | Catch-all for unexpected failures. |
+
+### Exception propagation
+
+`DLMException.propagate(throwable)` unwraps nested `DLMException` instances so you always receive the original error code rather than a wrapped `INTERNAL_ERROR`.
+
+## Thread safety
+
+- `DistributedLockManager` is **thread-safe** — it can be shared across threads.
+- Each `Lock` instance tracks its own `acquiredStatus` via `AtomicBoolean`. Do **not** share a single `Lock` object across threads for concurrent acquisitions; create a new instance per thread via `getLockInstance`.
diff --git a/docs/docs/storages/aerospike.md b/docs/docs/storages/aerospike.md
new file mode 100644
index 0000000..cfe072a
--- /dev/null
+++ b/docs/docs/storages/aerospike.md
@@ -0,0 +1,87 @@
+# Aerospike Backend
+
+Use `AerospikeStore` when your workload needs low-latency lock operations backed by Aerospike's in-memory storage.
+
+## Configuration
+
+```java
+AerospikeStore store = AerospikeStore.builder()
+ .aerospikeClient(aerospikeClient) // IAerospikeClient instance
+ .namespace("locks") // Aerospike namespace
+ .setSuffix("distributed_lock") // suffix used in set name
+ .build();
+```
+
+| Parameter | Type | Description |
+|--------------------|--------------------|-------------|
+| `aerospikeClient` | `IAerospikeClient` | An already-connected Aerospike client. DLM does **not** manage the client lifecycle (except on `destroy()`). |
+| `namespace` | `String` | The Aerospike namespace where lock records are stored. Must already exist on the cluster. |
+| `setSuffix` | `String` | Suffix appended to the set name. The full set name is constructed from the lock level, farm ID, and this suffix. |
+
+## How locking works
+
+Aerospike's **generation-based optimistic concurrency (MVCC)** is used to guarantee mutual exclusion:
+
+1. A `WritePolicy` is created with `GenerationPolicy.EXPECT_GEN_EQUAL` and `generation = 0`.
+2. The `put` call succeeds only if the record **does not already exist** (generation 0).
+3. If the record exists (another holder), Aerospike returns a `GENERATION_ERROR`, which DLM maps to `ErrorCode.LOCK_UNAVAILABLE`.
+4. The record's `expiration` is set to the requested TTL — the lock auto-expires even if the holder crashes.
+
+```mermaid
+flowchart LR
+ A[put with gen=0] -->|Record absent| B[Created → lock acquired]
+ A -->|Record exists| C[GENERATION_ERROR → LOCK_UNAVAILABLE]
+```
+
+### Write policy details
+
+| Policy field | Value | Reason |
+|------------------|------------------|--------|
+| `generationPolicy` | `EXPECT_GEN_EQUAL` | Only succeed if the record's generation matches (0 = does not exist). |
+| `generation` | `0` | Expect the record to be absent. |
+| `expiration` | TTL in seconds | Auto-expire the lock record. |
+| `commitLevel` | `COMMIT_MASTER` | Commit to master only — avoids replica round-trips since no reads are performed. |
+
+## Set naming
+
+The Aerospike set name is constructed based on lock level:
+
+| Lock Level | Set name format |
+|------------|----------------|
+| `DC` | `DC##` |
+| `XDC` | `XDC#` |
+
+This means `DC` locks from different farms are stored in different sets, providing natural isolation.
+
+## Retry behavior
+
+All Aerospike operations are wrapped in a `guava-retrying` retryer:
+
+| Setting | Value |
+|---------|-------|
+| Retry on | `AerospikeException` |
+| Max attempts | 5 |
+| Wait between attempts | 80 ms (fixed) |
+| Block strategy | Thread sleep |
+
+If all retries are exhausted, a `DLMException` with `ErrorCode.RETRIES_EXHAUSTED` (for `remove`) or `ErrorCode.CONNECTION_ERROR` (for `write`) is thrown.
+
+## Bin layout
+
+Each lock record contains two bins:
+
+| Bin name | Format | Content |
+|----------|--------|---------|
+| `##data` | Integer | `1` (marker) |
+| `##uat` | Long | Timestamp of lock acquisition (`System.currentTimeMillis()`) |
+
+## Initialization
+
+`AerospikeStore.initialize()` is a **no-op** — Aerospike sets are created on first write automatically.
+
+## Cleanup
+
+`AerospikeStore.close()` calls `aerospikeClient.close()`. This is invoked when you call `lockManager.destroy()`.
+
+!!! note
+ If you share the `IAerospikeClient` instance with other parts of your application, be aware that `destroy()` will close it.
diff --git a/docs/docs/storages/hbase.md b/docs/docs/storages/hbase.md
new file mode 100644
index 0000000..f283a16
--- /dev/null
+++ b/docs/docs/storages/hbase.md
@@ -0,0 +1,104 @@
+# HBase Backend
+
+Use `HBaseStore` when your platform standard is Apache HBase and you need lock operations backed by HBase's atomic `checkAndMutate`.
+
+## Configuration
+
+```java
+HBaseStore store = HBaseStore.builder()
+ .connection(connection) // org.apache.hadoop.hbase.client.Connection
+ .tableName("dlm_locks") // HBase table name
+ .build();
+```
+
+| Parameter | Type | Description |
+|--------------|--------------|-------------|
+| `connection` | `Connection` | An already-established HBase connection. DLM does **not** create the connection, but will close it on `destroy()`. |
+| `tableName` | `String` | Name of the HBase table used for lock storage. Created automatically by `initialize()` if it does not exist. |
+
+## Initialization — auto table creation
+
+When you call `lockManager.initialize()`, `HBaseStore` checks whether the table exists and creates it if needed:
+
+```java
+TableDescriptor tableDescriptor = TableDescriptorBuilder
+ .newBuilder(TableName.valueOf(tableName))
+ .setColumnFamily(ColumnFamilyDescriptorBuilder
+ .newBuilder("D")
+ .setCompressionType(Compression.Algorithm.GZ)
+ .setMaxVersions(1)
+ .build())
+ .build();
+```
+
+The table is **pre-split** using a 256-bucket one-byte hash prefix to distribute writes evenly across regions.
+
+!!! warning
+ For best performance, **do not** pre-create the HBase table manually.
+ Let DLM create it with the correct schema, column family, compression, and pre-split configuration.
+
+If table creation fails, a `DLMException` with `ErrorCode.TABLE_CREATION_ERROR` is thrown.
+
+## How locking works
+
+HBase's **`checkAndMutate`** provides atomic compare-and-set semantics:
+
+1. A `Put` is created with the lock data and a **TTL** (cell-level TTL in milliseconds).
+2. `checkAndMutate` checks if the column `D:L` **does not exist** on the row.
+3. If absent → the `Put` succeeds → lock acquired.
+4. If present → returns `false` → DLM throws `DLMException` with `ErrorCode.LOCK_UNAVAILABLE`.
+
+```mermaid
+flowchart LR
+ A["checkAndMutate\n(ifNotExists)"] -->|Column absent| B["Put succeeds → lock acquired"]
+ A -->|Column exists| C["Returns false → LOCK_UNAVAILABLE"]
+```
+
+### TTL behavior
+
+The lock TTL is set as the cell-level TTL on the `Put`:
+
+```java
+new Put(rowKey, System.currentTimeMillis())
+ .setTTL(ttlSeconds * 1_000L) // milliseconds
+ .addColumn(COLUMN_FAMILY, COLUMN_NAME, COLUMN_DATA);
+```
+
+After the TTL expires, HBase automatically removes the cell, making the lock available for re-acquisition.
+
+!!! note
+ HBase cell TTL depends on the region server's compaction cycle. In practice, the cell becomes invisible to reads immediately after TTL expiry, but physical deletion happens during the next major compaction.
+
+## Row key design
+
+Row keys are **hash-prefixed** using `RowKeyDistributorByHashPrefix` with a `OneByteSimpleHash(256)` hasher to prevent hotspotting:
+
+```
+<1-byte-hash-prefix> +
+```
+
+The logical key varies by lock level:
+
+| Lock Level | Logical key format |
+|------------|-------------------|
+| `DC` | `DC##` |
+| `XDC` | `XDC#` |
+
+## Column layout
+
+| Column Family | Qualifier | Value |
+|---------------|-----------|-------|
+| `D` | `L` | `M` (marker byte) |
+
+A single column family (`D`) with GZ compression and `maxVersions=1` keeps the storage footprint minimal.
+
+## Release
+
+`HBaseStore.remove()` issues a `Delete` on the row key. This removes the lock record immediately, without waiting for TTL expiry.
+
+## Cleanup
+
+`HBaseStore.close()` calls `connection.close()`. This is invoked when you call `lockManager.destroy()`.
+
+!!! note
+ If you share the `Connection` instance with other parts of your application, be aware that `destroy()` will close it.
diff --git a/docs/docs/usage.md b/docs/docs/usage.md
new file mode 100644
index 0000000..1f81782
--- /dev/null
+++ b/docs/docs/usage.md
@@ -0,0 +1,283 @@
+# Usage
+
+## Builder parameters
+
+| Parameter | Type | Required | Description |
+|------------|------------|----------|-------------|
+| `clientId` | `String` | Yes | Unique identifier for the calling service. Lock IDs are scoped as `clientId#lockId`, so different clients can lock the same entity independently. |
+| `farmId` | `String` | Yes | Data center / farm identifier. Used in key construction for `DC`-level locks. |
+| `lockBase` | `LockBase` | Yes | The lock engine — wraps the storage backend and lock mode. |
+
+## Initialize lock manager
+
+=== "Aerospike"
+
+ ```java
+ DistributedLockManager lockManager = DistributedLockManager.builder()
+ .clientId("CLIENT_ID")
+ .farmId("FA1")
+ .lockBase(LockBase.builder()
+ .mode(LockMode.EXCLUSIVE)
+ .lockStore(AerospikeStore.builder()
+ .aerospikeClient(aerospikeClient)
+ .namespace("NAMESPACE")
+ .setSuffix("distributed_lock")
+ .build())
+ .build())
+ .build();
+
+ lockManager.initialize();
+ ```
+
+=== "HBase"
+
+ ```java
+ DistributedLockManager lockManager = DistributedLockManager.builder()
+ .clientId("CLIENT_ID")
+ .farmId("FA1")
+ .lockBase(LockBase.builder()
+ .mode(LockMode.EXCLUSIVE)
+ .lockStore(HBaseStore.builder()
+ .connection(connection)
+ .tableName("table_name")
+ .build())
+ .build())
+ .build();
+
+ lockManager.initialize();
+ ```
+
+=== "With custom lock timing"
+
+ ```java
+ LockConfiguration config = LockConfiguration.builder()
+ .lockTtl(Duration.ofSeconds(30))
+ .waitForLock(Duration.ofSeconds(10))
+ .sleepBetweenRetries(Duration.ofMillis(500))
+ .build();
+
+ DistributedLockManager lockManager = DistributedLockManager.builder()
+ .clientId("CLIENT_ID")
+ .farmId("FA1")
+ .lockBase(LockBase.builder()
+ .mode(LockMode.EXCLUSIVE)
+ .lockConfiguration(config)
+ .lockStore(AerospikeStore.builder()
+ .aerospikeClient(aerospikeClient)
+ .namespace("NAMESPACE")
+ .setSuffix("distributed_lock")
+ .build())
+ .build())
+ .build();
+
+ lockManager.initialize();
+ ```
+
+!!! warning
+ Always call `lockManager.initialize()` before any lock operations.
+ For HBase, this creates the table if it does not exist.
+
+## Configuring lock timing
+
+By default, `LockBase` uses the library's built-in timing constants from `LockConfiguration`. You can override any or all of them by supplying a custom `LockConfiguration` to the `LockBase` builder.
+
+### `LockConfiguration` parameters
+
+| Parameter | Type | Default | Description |
+|-----------------------|------------|------------|-------------|
+| `lockTtl` | `Duration` | 90 seconds | How long the lock is held before the storage layer expires it automatically. |
+| `waitForLock` | `Duration` | 90 seconds | Maximum time a blocking `acquireLock` call waits for a contended lock. |
+| `sleepBetweenRetries` | `Duration` | 1000 ms | Sleep interval between successive acquisition attempts when a lock is unavailable. |
+
+### Default configuration
+
+Omitting `lockConfiguration(...)` from the builder is fully supported — `LockBase` internally creates a default `LockConfiguration` with all defaults applied:
+
+```java
+// These two are equivalent:
+LockBase.builder().mode(LockMode.EXCLUSIVE).lockStore(store).build();
+LockBase.builder().mode(LockMode.EXCLUSIVE).lockStore(store)
+ .lockConfiguration(LockConfiguration.builder().build()) // all defaults
+ .build();
+```
+
+### Custom configuration examples
+
+=== "Tight SLO service"
+
+ ```java
+ LockConfiguration config = LockConfiguration.builder()
+ .lockTtl(Duration.ofSeconds(30)) // short-lived locks
+ .waitForLock(Duration.ofSeconds(10)) // fail fast on contention
+ .sleepBetweenRetries(Duration.ofMillis(500)) // poll twice as fast
+ .build();
+ ```
+
+=== "Long-running batch job"
+
+ ```java
+ LockConfiguration config = LockConfiguration.builder()
+ .lockTtl(Duration.ofMinutes(10)) // hold lock for batch duration
+ .waitForLock(Duration.ofMinutes(5)) // willing to wait longer
+ .sleepBetweenRetries(Duration.ofSeconds(5)) // poll less frequently
+ .build();
+ ```
+
+=== "Override only TTL"
+
+ ```java
+ // waitForLock and sleepBetweenRetries keep their defaults (90s and 1000ms)
+ LockConfiguration config = LockConfiguration.builder()
+ .lockTtl(Duration.ofSeconds(60))
+ .build();
+ ```
+
+!!! info "How timing flows through the API"
+ When you call `acquireLock(lock)` without explicit duration/timeout arguments, the values are read from `LockConfiguration`:
+
+ - `tryAcquireLock(lock)` → uses `lockConfiguration.getLockTtl()` as TTL.
+ - `acquireLock(lock)` → uses `lockConfiguration.getLockTtl()` as TTL and `lockConfiguration.getWaitForLock()` as timeout.
+ - `acquireLock(lock, duration)` → uses the explicit `duration` but `lockConfiguration.getWaitForLock()` as timeout.
+ - `acquireLock(lock, duration, timeout)` → uses both explicit values; config is not consulted.
+ - Retry sleep always uses `lockConfiguration.getSleepBetweenRetries()`.
+
+## Get a lock instance
+
+```java
+Lock lock = lockManager.getLockInstance("order-123", LockLevel.DC);
+```
+
+The returned `Lock` object contains:
+
+- **lockId** — composed as `clientId#order-123`.
+- **lockLevel** — `DC` or `XDC`.
+- **farmId** — inherited from the manager.
+- **acquiredStatus** — an `AtomicBoolean` tracking whether this instance currently holds the lock.
+
+!!! info
+ The `Lock` object is lightweight and does not perform any I/O on creation.
+ Actual storage interaction happens only on `acquire` / `release`.
+
+## Acquiring locks
+
+### Non-blocking — `tryAcquireLock`
+
+Attempts to acquire immediately. Throws `DLMException` with `ErrorCode.LOCK_UNAVAILABLE` if the lock is already held.
+
+```java
+// Default TTL (90 seconds)
+lockManager.tryAcquireLock(lock);
+
+// Custom TTL
+lockManager.tryAcquireLock(lock, Duration.ofSeconds(120));
+```
+
+### Blocking — `acquireLock`
+
+Retries in a loop (1-second intervals) until the lock is acquired or the timeout expires.
+
+```java
+// Default TTL (90s) and default timeout (90s)
+lockManager.acquireLock(lock);
+
+// Custom TTL, default timeout (90s)
+lockManager.acquireLock(lock, Duration.ofSeconds(30));
+
+// Custom TTL and custom timeout
+lockManager.acquireLock(lock, Duration.ofSeconds(30), Duration.ofSeconds(10));
+```
+
+!!! note "Default values"
+ These defaults come from `LockConfiguration` and can be overridden per `LockBase` instance.
+ See [Configuring lock timing](#configuring-lock-timing) for details.
+
+ | Constant | Default |
+ |--------------------------------|------------|
+ | `DEFAULT_LOCK_TTL` | 90 seconds |
+ | `DEFAULT_WAIT_FOR_LOCK` | 90 seconds |
+ | `DEFAULT_SLEEP_BETWEEN_RETRIES`| 1000 ms |
+
+## Releasing locks
+
+```java
+boolean released = lockManager.releaseLock(lock);
+```
+
+- Returns `true` if the lock was held and successfully released.
+- Returns `false` if the lock was not held by this instance (i.e. `acquiredStatus` was already `false`).
+
+!!! warning
+ Always release in a `finally` block to avoid lock leaks.
+
+## Error handling
+
+All lock operations throw `DLMException`. Use `getErrorCode()` to distinguish failure reasons:
+
+```java
+Lock lock = lockManager.getLockInstance("order-123", LockLevel.DC);
+try {
+ lockManager.tryAcquireLock(lock, Duration.ofSeconds(60));
+ // critical section
+} catch (DLMException e) {
+ switch (e.getErrorCode()) {
+ case LOCK_UNAVAILABLE -> log.warn("Lock held by another holder");
+ case CONNECTION_ERROR -> log.error("Storage backend unreachable", e);
+ case RETRIES_EXHAUSTED -> log.error("All retry attempts failed", e);
+ default -> log.error("Unexpected error", e);
+ }
+} finally {
+ lockManager.releaseLock(lock);
+}
+```
+
+See [Error Codes](locking.md#error-codes) for the full list.
+
+## Complete lifecycle example
+
+```java
+// ── Setup (application startup) ──
+DistributedLockManager lockManager = DistributedLockManager.builder()
+ .clientId("payment-service")
+ .farmId("dc1")
+ .lockBase(LockBase.builder()
+ .mode(LockMode.EXCLUSIVE)
+ .lockStore(AerospikeStore.builder()
+ .aerospikeClient(aerospikeClient)
+ .namespace("locks")
+ .setSuffix("distributed_lock")
+ .build())
+ .build())
+ .build();
+lockManager.initialize();
+
+// ── Use (request handling) ──
+Lock lock = lockManager.getLockInstance("txn-456", LockLevel.DC);
+try {
+ lockManager.acquireLock(lock, Duration.ofSeconds(30), Duration.ofSeconds(10));
+ processPayment("txn-456");
+} catch (DLMException e) {
+ if (e.getErrorCode() == ErrorCode.LOCK_UNAVAILABLE) {
+ // another instance is processing this transaction
+ }
+} finally {
+ lockManager.releaseLock(lock);
+}
+
+// ── Teardown (application shutdown) ──
+lockManager.destroy();
+```
+
+## Cleanup
+
+When the application shuts down, call `destroy()` to close the underlying store connection and release resources:
+
+```java
+lockManager.destroy();
+```
+
+- For **Aerospike**, this calls `aerospikeClient.close()`.
+- For **HBase**, this calls `connection.close()`.
+
+!!! danger
+ Failing to call `destroy()` may leave dangling connections to the storage backend.
+ Invoke it in a shutdown hook or your framework's lifecycle callback.
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..1af7a7a
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,5 @@
+zensical
+markdown-include
+pymdown-extensions
+mkdocs-awesome-pages-plugin
+mkdocs-glightbox
diff --git a/docs/zensical.toml b/docs/zensical.toml
new file mode 100644
index 0000000..8b2ba04
--- /dev/null
+++ b/docs/zensical.toml
@@ -0,0 +1,85 @@
+[project]
+site_name = "Distributed Lock Manager (DLM)"
+site_url = "https://phonepe.github.io/DLM"
+repo_url = "https://github.com/PhonePe/DLM"
+copyright = "Copyright © 2024-2026 PhonePe India Pvt. Ltd."
+
+nav = [
+ {"Home" = "index.md"},
+ {"Getting Started" = "getting-started.md"},
+ {"Usage" = "usage.md"},
+ {"Locking Semantics" = "locking.md"},
+ {"Storage Backends" = [
+ {"Aerospike" = "storages/aerospike.md"},
+ {"HBase" = "storages/hbase.md"}
+ ]}
+]
+
+[project.theme]
+features = [
+ "navigation.tracking",
+ "navigation.path",
+ "navigation.tabs",
+ "navigation.tabs.sticky",
+ "navigation.expand",
+ "navigation.top",
+ "toc.follow",
+ "content.code.copy",
+ "content.code.annotate",
+ "search.suggest",
+ "search.highlight",
+ "search.share"
+]
+
+[[project.theme.palette]]
+media = "(prefers-color-scheme: light)"
+scheme = "default"
+
+[project.theme.palette.toggle]
+icon = "material/brightness-7"
+name = "Switch to dark mode"
+
+[[project.theme.palette]]
+media = "(prefers-color-scheme)"
+
+[project.theme.palette.toggle]
+icon = "material/brightness-auto"
+name = "Switch to light mode"
+
+[[project.theme.palette]]
+media = "(prefers-color-scheme: dark)"
+scheme = "slate"
+
+[project.theme.palette.toggle]
+icon = "material/brightness-4"
+name = "Switch to system preference"
+
+[project.plugins.glightbox]
+
+[project.plugins.awesome-pages]
+
+[project.markdown_extensions.toc]
+permalink = true
+
+[project.markdown_extensions.attr_list]
+
+[project.markdown_extensions.md_in_html]
+
+[project.markdown_extensions.admonition]
+
+[project.markdown_extensions.tables]
+
+[project.markdown_extensions."pymdownx.details"]
+
+[project.markdown_extensions."pymdownx.superfences"]
+custom_fences = [
+ { name = "mermaid", class = "mermaid", format = "pymdownx.superfences.fence_code_format" }
+]
+
+[project.markdown_extensions."pymdownx.tabbed"]
+alternate_style = true
+
+[project.markdown_extensions."pymdownx.emoji"]
+emoji_index = "zensical.extensions.emoji.twemoji"
+emoji_generator = "zensical.extensions.emoji.to_svg"
+