TinyKVS

A low-memory, sorted key-value store for Go built on LSM-tree architecture with configurable compression (zstd, snappy, or none).

Features

Sorted storage - Lexicographic key ordering, efficient range scans
Ultra-low memory - Runs 1B+ records on t4g.micro (1GB RAM) with swap
Configurable memory - Block cache, memtable size, bloom filters all tunable
Concurrent access - Concurrent reads and writes, optimized for read-heavy workloads
Durability - Write-ahead log with configurable sync modes
Compression - zstd (default), snappy, or none with configurable levels
Bloom filters - Fast negative lookups (can be disabled to save memory)

Installation

go get github.com/freeeve/tinykvs

Quick Start

package main

import (
    "fmt"
    "log"

    "github.com/freeeve/tinykvs"
)

func main() {
    // Open a store
    store, err := tinykvs.Open("/tmp/mydb", tinykvs.DefaultOptions("/tmp/mydb"))
    if err != nil {
        log.Fatal(err)
    }
    defer store.Close()

    // Write values
    store.PutString([]byte("name"), "Alice")
    store.PutInt64([]byte("age"), 30)
    store.PutFloat64([]byte("score"), 95.5)
    store.PutBool([]byte("active"), true)

    // Read values
    name, _ := store.GetString([]byte("name"))
    age, _ := store.GetInt64([]byte("age"))

    fmt.Printf("Name: %s, Age: %d\n", name, age)

    // Flush to disk
    store.Flush()
}

API

Store Operations

// Open or create a store
func Open(path string, opts Options) (*Store, error)

// Close the store
func (s *Store) Close() error

// Flush all data to disk
func (s *Store) Flush() error

Read/Write

// Generic value operations
func (s *Store) Put(key []byte, value Value) error
func (s *Store) Get(key []byte) (Value, error)
func (s *Store) Delete(key []byte) error

// Typed convenience methods
func (s *Store) PutString(key []byte, value string) error
func (s *Store) PutInt64(key []byte, value int64) error
func (s *Store) PutFloat64(key []byte, value float64) error
func (s *Store) PutBool(key []byte, value bool) error
func (s *Store) PutBytes(key []byte, value []byte) error

func (s *Store) GetString(key []byte) (string, error)
func (s *Store) GetInt64(key []byte) (int64, error)
func (s *Store) GetFloat64(key []byte) (float64, error)
func (s *Store) GetBool(key []byte) (bool, error)
func (s *Store) GetBytes(key []byte) ([]byte, error)

// Struct and map storage (uses msgpack internally)
func (s *Store) PutStruct(key []byte, v any) error
func (s *Store) GetStruct(key []byte, dest any) error
func (s *Store) PutMap(key []byte, fields map[string]any) error
func (s *Store) GetMap(key []byte) (map[string]any, error)

// JSON storage (stores as string, queryable in shell)
func (s *Store) PutJson(key []byte, v any) error
func (s *Store) GetJson(key []byte, dest any) error

Batch Operations

// Create a batch for atomic writes
batch := tinykvs.NewBatch()
batch.Put(key, value)
batch.PutString(key, "value")
batch.PutInt64(key, 42)
batch.PutStruct(key, myStruct)
batch.PutMap(key, map[string]any{"field": "value"})
batch.Delete(key)

// Apply atomically
store.WriteBatch(batch)

Range Scans

// Iterate over all keys with a given prefix (sorted order)
// Return false from callback to stop iteration
func (s *Store) ScanPrefix(prefix []byte, fn func(key []byte, value Value) bool) error

Value Types

type Value struct {
    Type    ValueType
    Int64   int64
    Float64 float64
    Bool    bool
    Bytes   []byte
    Record  map[string]any  // For struct/map storage
}

// Value constructors
func Int64Value(v int64) Value
func Float64Value(v float64) Value
func BoolValue(v bool) Value
func StringValue(v string) Value
func BytesValue(v []byte) Value
func RecordValue(v map[string]any) Value

Configuration

type Options struct {
    Dir              string          // Data directory
    MemtableSize     int64           // Max memtable size before flush (default: 4MB)
    BlockCacheSize   int64           // LRU cache size (default: 64MB, 0 to disable)
    BlockSize        int             // Target block size (default: 16KB)
    CompressionType  CompressionType // zstd, snappy, or none (default: zstd)
    CompressionLevel int             // zstd level 1-4 (default: 1 = fastest)
    BloomFPRate      float64         // Bloom filter false positive rate (default: 0.01)
    WALSyncMode      WALSyncMode     // WAL sync behavior
    VerifyChecksums  bool            // Verify on read (default: true)
}

// Compression types
const (
    CompressionZstd   // Default, good compression and speed
    CompressionSnappy // Faster, less compression
    CompressionNone   // No compression
)

// Preset configurations
func DefaultOptions(dir string) Options          // Balanced defaults
func LowMemoryOptions(dir string) Options        // Minimal memory (4MB memtable, no cache, no bloom)
func HighPerformanceOptions(dir string) Options  // Max throughput

Architecture

┌─────────────────────────────────────────────────────────┐
│                        Store                            │
├─────────────────────────────────────────────────────────┤
│  Write Path                    Read Path                │
│  ┌─────────┐                   ┌─────────────────────┐  │
│  │   WAL   │                   │ Memtable (newest)   │  │
│  └────┬────┘                   ├─────────────────────┤  │
│       │                        │ Immutable Memtables │  │
│       v                        ├─────────────────────┤  │
│  ┌─────────┐                   │ L0 SSTables         │  │
│  │Memtable │                   ├─────────────────────┤  │
│  └────┬────┘                   │ L1+ SSTables        │  │
│       │ flush                  └─────────┬───────────┘  │
│       v                                  │              │
│  ┌─────────┐    ┌───────────┐            │              │
│  │ SSTable │◄───│ LRU Cache │◄───────────┘              │
│  └─────────┘    └───────────┘                           │
│       │                                                 │
│       v compaction                                      │
│  ┌─────────────────────────────────────────────────┐    │
│  │ L0 → L1 → L2 → ... → L6 (leveled compaction)    │    │
│  └─────────────────────────────────────────────────┘    │
└─────────────────────────────────────────────────────────┘

Key Design Decisions

Component	Choice	Rationale
Compression	zstd/snappy/none	Configurable speed vs size tradeoff
I/O	Explicit syscalls	Control over caching
Index	Sparse (per block)	Low memory footprint
Compaction	Leveled	Read-optimized
Concurrency	RWMutex	Simple, read-optimized
L1+ Scans	Lazy loading	Only load tables when needed for LIMIT queries

SSTable Format

┌────────────────────────────────────┐
│ Data Block 0 (compressed)          │
│ Data Block 1                       │
│ ...                                │
│ Data Block N                       │
├────────────────────────────────────┤
│ Bloom Filter                       │
├────────────────────────────────────┤
│ Index Block (sparse)               │
├────────────────────────────────────┤
│ Metadata Block                     │
├────────────────────────────────────┤
│ Footer (64 bytes)                  │
└────────────────────────────────────┘

Version Compatibility

Store files are not compatible between minor versions (e.g., v0.3.x stores cannot be read by v0.4.x). If upgrading, export your data first or recreate the store.

Memory Usage

Component	Memory
Block cache	Configurable (default 64MB)
Memtable	Configurable (default 4MB)
Bloom filters	~1.2MB per 1M keys
Sparse index	~140KB per 1M keys (with 16KB blocks)

For minimal memory (billions of records), use LowMemoryOptions():

4MB memtable
No block cache
No bloom filters
Index: ~140MB for 1B keys

Performance

Apple M3 Max

Operation	Latency	Throughput
Sequential read	304 ns	3.3M ops/sec
Sequential write	465 ns	2.2M ops/sec
Mixed (80% read)	392 ns	2.6M ops/sec
SSTable read (cached)	300 ns	3.3M ops/sec

Block cache impact (random reads, 100K keys):

Cache	Latency	Hit Rate
0 MB	42 µs	0%
64 MB	300 ns	99.9%

AWS t4g.micro (1GB RAM, ARM64)

1 billion record benchmark with GOMEMLIMIT=700MiB:

zstd compression (default), 100M records

Operation	Throughput
Sequential write	579K ops/sec
Random read (no cache)	16K ops/sec
Random read (64MB cache)	16K ops/sec
Full scan	1.4M keys/sec
Random prefix scan	15K scans/sec
Prefix scan with LIMIT 100	7K scans/sec

Prefix scans with LIMIT benefit from lazy loading: L1+ tables are sorted and non-overlapping, so only the tables actually needed are loaded.

Write time: ~3 min for 100M records, ~1.5h for 1B records

Memory usage during benchmark:

Heap: 50-200 MB
Sys: 450-700 MB
Index: ~35 MB (for 1B records)

Configuration:

4MB memtable
16KB block size
No block cache
No bloom filters
WAL sync disabled (for throughput)

Complexity

Writes: O(log n) memtable insert, sequential I/O for WAL
Reads: O(L × log n) where L is number of levels (max 7), bloom filters skip levels without matches
Space: Varies by data - sequential keys compress to ~0.1x with zstd, random data ~0.5-0.8x

Examples

Persistence and Recovery

// Data persists across restarts
store, _ := tinykvs.Open("/tmp/mydb", tinykvs.DefaultOptions("/tmp/mydb"))
store.PutString([]byte("key"), "value")
store.Flush() // Ensure durability
store.Close()

// Reopen - data is still there
store, _ = tinykvs.Open("/tmp/mydb", tinykvs.DefaultOptions("/tmp/mydb"))
val, _ := store.GetString([]byte("key"))
fmt.Println(val) // "value"

Low Memory Configuration

opts := tinykvs.LowMemoryOptions("/tmp/mydb")
opts.MemtableSize = 512 * 1024 // 512KB
store, _ := tinykvs.Open("/tmp/mydb", opts)

Low Memory (Billions of Records)

For running on memory-constrained systems like t4g.micro (1GB RAM) with billions of records:

// Use LowMemoryOptions: 4MB memtable, no cache, no bloom filters
opts := tinykvs.LowMemoryOptions("/data/mydb")
store, _ := tinykvs.Open("/data/mydb", opts)

// Combined with GOMEMLIMIT for Go runtime memory control:
// GOMEMLIMIT=600MiB ./myapp

This configuration can handle 1B+ records while staying within tight memory limits.

Prefix Scanning

// Store user data with prefixed keys
store.PutString([]byte("user:001:name"), "Alice")
store.PutInt64([]byte("user:001:age"), 30)
store.PutString([]byte("user:002:name"), "Bob")
store.PutInt64([]byte("user:002:age"), 25)

// Scan all keys for user:001
store.ScanPrefix([]byte("user:001:"), func(key []byte, value tinykvs.Value) bool {
    fmt.Printf("%s = %v\n", key, value)
    return true // continue scanning
})

// Scan all users (returns keys in sorted order)
store.ScanPrefix([]byte("user:"), func(key []byte, value tinykvs.Value) bool {
    fmt.Printf("%s\n", key)
    return true
})

Statistics

stats := store.Stats()
fmt.Printf("Memtable: %d bytes, %d keys\n", stats.MemtableSize, stats.MemtableCount)
fmt.Printf("Cache hit rate: %.1f%%\n", stats.CacheStats.HitRate())
for _, level := range stats.Levels {
    fmt.Printf("L%d: %d tables, %d keys\n", level.Level, level.NumTables, level.NumKeys)
}

Storing Structs and Maps

TinyKVS has built-in support for storing Go structs and maps using msgpack serialization:

type Address struct {
    City    string `msgpack:"city"`
    Country string `msgpack:"country"`
}

type User struct {
    Name    string  `msgpack:"name"`
    Email   string  `msgpack:"email"`
    Age     int     `msgpack:"age"`
    Address Address `msgpack:"address"`
}

// Store a struct
user := User{
    Name:    "Alice",
    Email:   "[email protected]",
    Age:     30,
    Address: Address{City: "NYC", Country: "USA"},
}
store.PutStruct([]byte("user:1"), user)

// Retrieve into a struct
var retrieved User
store.GetStruct([]byte("user:1"), &retrieved)

// Store a map directly
store.PutMap([]byte("config:app"), map[string]any{
    "debug":   true,
    "timeout": 30,
})

// Retrieve as map
config, _ := store.GetMap([]byte("config:app"))

Nested structs are fully supported and can be queried in the interactive shell.

JSON Storage

For human-readable storage or shell querying:

// Store as JSON string
store.PutJson([]byte("user:2"), User{Name: "Bob", Age: 25})

// Retrieve from JSON
var user User
store.GetJson([]byte("user:2"), &user)

Manual Serialization

For other formats (Gob, Protobuf, etc.), serialize to bytes:

// Gob
var buf bytes.Buffer
gob.NewEncoder(&buf).Encode(user)
store.PutBytes([]byte("user:1"), buf.Bytes())

// Protobuf
data, _ := proto.Marshal(user)
store.PutBytes([]byte("user:1"), data)

Interactive Shell

TinyKVS includes an interactive SQL-like shell for exploring and manipulating data:

go install github.com/freeeve/tinykvs/cmd/tinykvs@latest
tinykvs shell -dir /path/to/db

# Or use environment variable
export TINYKVS_STORE=/path/to/db
tinykvs shell

Results are displayed in a formatted table:

┌────────┬───────────────────────────┐
│ k      │ v                         │
├────────┼───────────────────────────┤
│ user:1 │ {"age":30,"name":"Alice"} │
│ user:2 │ {"age":25,"name":"Bob"}   │
└────────┴───────────────────────────┘
(2 rows) scanned 2 keys, 0 blocks, 0ms

SQL Commands

-- Query data
SELECT * FROM kv WHERE k = 'user:1'
SELECT * FROM kv WHERE k LIKE 'user:%'
SELECT * FROM kv WHERE k BETWEEN 'a' AND 'z' LIMIT 10
SELECT * FROM kv LIMIT 100

-- Extract record fields
SELECT v.name, v.age FROM kv WHERE k = 'user:1'
SELECT v.address.city FROM kv WHERE k = 'user:1'

-- ORDER BY (buffers results for sorting)
SELECT * FROM kv ORDER BY k DESC LIMIT 10
SELECT v.name, v.age FROM kv ORDER BY v.age DESC, v.name
SELECT * FROM kv WHERE k LIKE 'user:%' ORDER BY v.score LIMIT 100

-- Insert data (JSON auto-detected as records)
INSERT INTO kv VALUES ('user:1', '{"name":"Alice","age":30}')
INSERT INTO kv VALUES ('key', 'simple string value')
INSERT INTO kv VALUES ('bin', x'deadbeef')  -- hex bytes

-- Update and delete
UPDATE kv SET v = 'newvalue' WHERE k = 'key'
DELETE FROM kv WHERE k = 'key'
DELETE FROM kv WHERE k LIKE 'temp:%'

Shell Commands

\help, \h, \?      Show help
\stats             Show store statistics
\flush             Flush memtable to disk
\compact           Run compaction
\tables            Show table schema
\export <file>     Export to CSV
\import <file>     Import from CSV
\q, \quit          Exit shell

Binary Key Functions

The shell supports functions for constructing binary keys:

-- uint64_be(n) - 8-byte big-endian encoding
SELECT * FROM kv WHERE k = x'14' || uint64_be(28708)

-- uint64_le(n) - 8-byte little-endian encoding
-- uint32_be(n) - 4-byte big-endian encoding
-- uint32_le(n) - 4-byte little-endian encoding

-- byte(n) - single byte (0-255)
SELECT * FROM kv WHERE k = byte(0x14) || uint64_be(12345)

-- fnv64(s) - FNV-1a 64-bit hash of string
SELECT * FROM kv WHERE k LIKE byte(0x10) || fnv64('user-123') || '%'

-- Hex concatenation
SELECT * FROM kv WHERE k = x'14' || uint64_be(28708) || fnv64('item-456')

These are useful for querying data with composite binary keys.

CSV Import/Export

Export creates a simple key,value CSV:

key,value
user:1,{"name":"Alice","age":30}
counter,42
flag,true

Import auto-detects the format:

2 columns (key,value) - values auto-detect type:

key,value
user:1,hello
user:2,42
user:3,{"name":"Bob"}

3+ columns - first column is key, rest become record fields:

id,name,age,active
user:1,Alice,30,true
user:2,Bob,25,false

This creates records like {"name":"Alice","age":30,"active":true}

Type hints - prevent unwanted auto-detection (e.g., zip codes):

id,zip:string,count:int,price:float,active:bool,data:json
item:1,02134,100,19.99,true,{"x":1}

Supported hints: string, int, float, bool, json

Nested Field Access

Records with nested structures support dot notation for field access:

-- Given: {"name":"Alice","address":{"city":"NYC","geo":{"lat":40.7}}}

SELECT v.name FROM kv WHERE k = 'user:1'           -- Alice
SELECT v.address.city FROM kv WHERE k = 'user:1'   -- NYC
SELECT v.`address.geo.lat` FROM kv WHERE k = 'user:1'  -- 40.7 (3+ levels need backticks)

Streaming Aggregations

Aggregation functions compute results in a single pass with O(1) memory:

SELECT count() FROM kv                              -- count all rows
SELECT count() FROM kv WHERE k LIKE 'user:%'        -- count with filter
SELECT sum(v.age), avg(v.age) FROM kv               -- sum and average
SELECT min(v.score), max(v.score) FROM kv           -- min and max
SELECT count(), sum(v.price), avg(v.price) FROM kv  -- multiple aggregates
SELECT sum(v.stats.count) FROM kv                   -- nested fields work too

License

MIT

Name		Name	Last commit message	Last commit date
Latest commit History 118 Commits
.github/workflows		.github/workflows
cmd		cmd
scripts		scripts
testdata/fuzz		testdata/fuzz
.gitignore		.gitignore
BENCHMARKS.md		BENCHMARKS.md
LICENSE		LICENSE
README.md		README.md
batch.go		batch.go
batch_parallel.go		batch_parallel.go
batch_parallel_test.go		batch_parallel_test.go
batch_test.go		batch_test.go
benchmark_test.go		benchmark_test.go
block.go		block.go
block_test.go		block_test.go
build.sh		build.sh
cache.go		cache.go
cache_test.go		cache_test.go
compaction.go		compaction.go
compaction_edge_test.go		compaction_edge_test.go
compaction_levels_test.go		compaction_levels_test.go
compaction_test.go		compaction_test.go
convenience.go		convenience.go
convenience_test.go		convenience_test.go
flush_bench_test.go		flush_bench_test.go
fuzz_test.go		fuzz_test.go
go.mod		go.mod
go.sum		go.sum
index.go		index.go
index_test.go		index_test.go
lock_unix.go		lock_unix.go
lock_windows.go		lock_windows.go
manifest.go		manifest.go
memtable.go		memtable.go
memtable_test.go		memtable_test.go
merge.go		merge.go
merge_test.go		merge_test.go
msgpack_bench_test.go		msgpack_bench_test.go
options.go		options.go
query.go		query.go
query_test.go		query_test.go
reader.go		reader.go
reader_range_test.go		reader_range_test.go
reader_scan_test.go		reader_scan_test.go
reader_test.go		reader_test.go
sonar-project.properties		sonar-project.properties
sstable.go		sstable.go
sstable_test.go		sstable_test.go
store.go		store.go
store_edge_test.go		store_edge_test.go
store_json_test.go		store_json_test.go
store_levels_test.go		store_levels_test.go
store_test.go		store_test.go
types.go		types.go
types_test.go		types_test.go
wal.go		wal.go
wal_test.go		wal_test.go
writer.go		writer.go
writer_crash_test.go		writer_crash_test.go
writer_edge_test.go		writer_edge_test.go
writer_recovery_test.go		writer_recovery_test.go
writer_scan_test.go		writer_scan_test.go

License

freeeve/tinykvs

Folders and files

Latest commit

History

Repository files navigation

TinyKVS

Features

Installation

Quick Start

API

Store Operations

Read/Write

Batch Operations

Range Scans

Value Types

Configuration

Architecture

Key Design Decisions

SSTable Format

Version Compatibility

Memory Usage

Performance

Apple M3 Max

AWS t4g.micro (1GB RAM, ARM64)

Complexity

Examples

Persistence and Recovery

Low Memory Configuration

Low Memory (Billions of Records)

Prefix Scanning

Statistics

Storing Structs and Maps

JSON Storage

Manual Serialization

Interactive Shell

SQL Commands

Shell Commands

Binary Key Functions

CSV Import/Export

Nested Field Access

Streaming Aggregations

License

About

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases 9

Packages 0

Contributors 2

Uh oh!

Languages

Packages