diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index e5dacf7..2896cca 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -14,7 +14,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - go-version: [1.21.x,1.22.x,1.23.x] + go-version: [1.23.x,1.24.x,1.25.x] os: [ubuntu-latest] steps: - name: Set up Go ${{ matrix.go-version }} on ${{ matrix.os }} @@ -30,6 +30,16 @@ jobs: env: GO111MODULE: on run: | - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.60.1 - $(go env GOPATH)/bin/golangci-lint run --timeout=5m --config ./.golangci.yml go test -race ./... + golangci: + name: lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v6 + with: + go-version: stable + - name: golangci-lint + uses: golangci/golangci-lint-action@v8 + with: + version: v2.1.6 diff --git a/.github/workflows/vulncheck.yml b/.github/workflows/vulncheck.yml index ea7d308..2ebc56d 100644 --- a/.github/workflows/vulncheck.yml +++ b/.github/workflows/vulncheck.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - go-version: [ 1.22.x ] + go-version: [ 1.25.x ] steps: - name: Check out code into the Go module directory uses: actions/checkout@v3 diff --git a/.golangci.yml b/.golangci.yml index 99142fd..b9970d6 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,27 +1,40 @@ -linters-settings: - staticcheck: - checks: - - all - - '-SA6002' # disable the rule SA6002, slices are fine. - misspell: - locale: US - +version: "2" linters: - disable-all: true + default: none enable: - - typecheck - - goimports - - misspell - govet - ineffassign - - gosimple + - misspell - staticcheck - unused - -issues: - exclude-use-default: false - exclude: - - should have a package comment - - comment on exported method - - should have comment or be unexported - - error strings should not be capitalized or end with punctuation or a newline + settings: + misspell: + locale: US + staticcheck: + checks: + - all + - -SA6002 + exclusions: + generated: lax + rules: + - path: (.+)\.go$ + text: should have a package comment + - path: (.+)\.go$ + text: comment on exported method + - path: (.+)\.go$ + text: should have comment or be unexported + - path: (.+)\.go$ + text: error strings should not be capitalized or end with punctuation or a newline + paths: + - third_party$ + - builtin$ + - examples$ +formatters: + enable: + - goimports + exclusions: + generated: lax + paths: + - third_party$ + - builtin$ + - examples$ diff --git a/README.md b/README.md index c2d9f24..279d7a0 100644 --- a/README.md +++ b/README.md @@ -81,17 +81,123 @@ Similar to [stdlib zip](https://golang.org/pkg/archive/zip/), not all methods/fl For expert users, `(*File).OpenRaw` allows access to the compressed data. + ## Compression Methods By default, zipindex keeps files stored uncompressed or deflate compressed. This covers the most commonly seen compression methods. -Furthermore, files compressed with [zstandard](https://facebook.github.io/zstd/) +Furthermore, files compressed with [zstandard](https://facebook.github.io/zstd/) as method 93 will be preserved and can be read back. -Use [`RegisterDecompressor`](https://pkg.go.dev/github.com/minio/zipindex#RegisterDecompressor) +Use [`RegisterDecompressor`](https://pkg.go.dev/github.com/minio/zipindex#RegisterDecompressor) to register non-standard decompressors. +# Layered Indexes + +The `LayeredIndex[T]` type provides a way to combine multiple zip indexes into a single searchable entity. This is useful when you need to overlay multiple archives or apply incremental updates without rebuilding the entire index. + +## Key Features + +- **Generic type parameter**: Each layer is associated with a comparable reference type `T` (e.g., version number, timestamp, file path) +- **Override semantics**: Files in newer layers override files with the same path in older layers +- **Delete layers**: Special layers that remove files from previous layers +- **Efficient lookups**: Find files across all layers with proper precedence + +## Basic Usage + +```go +// Create a new layered index with string references +layered := zipindex.NewLayeredIndex[string]() + +// Add base layer +baseFiles, _ := zipindex.ReadFile("base.zip") +err := layered.AddLayer(baseFiles, "v1.0") + +// Add update layer (overrides files from base) +updateFiles, _ := zipindex.ReadFile("update.zip") +err = layered.AddLayer(updateFiles, "v1.1") + +// Add a delete layer (removes specified files) +deleteFiles := zipindex.Files{{Name: "obsolete.txt"}} +err = layered.AddDeleteLayer(deleteFiles, "cleanup") + +// Find a file across all layers +file, found := layered.Find("readme.txt") +if found { + // file.File contains the file info + // file.LayerRef contains the layer reference (e.g., "v1.1") +} + +// Iterate over all files +for ref, file := range layered.FilesIter() { + fmt.Printf("File %s from layer %v\n", file.Name, ref) +} + +// Merge all layers into a single index, this will lose the reference information +merged := layered.ToSingleIndex() +serialized, _ := merged.Serialize() +``` + +## API Reference + +### Creation and Layer Management +- `NewLayeredIndex[T]()` - Create a new empty layered index +- `AddLayer(files, ref)` - Add a layer (returns error if ref already exists) +- `AddDeleteLayer(files, ref)` - Add a delete layer to remove files from previous layers +- `RemoveLayer(index)` - Remove layer by index +- `RemoveLayerByRef(ref)` - Remove all layers with the given reference +- `Clear()` - Remove all layers + +### File Access +- `Find(name)` - Find a file across all layers, returns `(*FileWithRef[T], bool)` +- `FindInLayer(name, ref)` - Find a file in a specific layer only +- `FilesIter()` - Iterator that yields `(T, File)` pairs on merged indexes +- `Files()` - Get all files as `[]FileWithRef[T]` after applying layer operations +- `HasFile(name)` - Check if a file exists + +### Layer Information +- `LayerCount()` - Number of layers +- `GetLayerRef(index)` - Get reference for a layer +- `FileCount()` - Total unique files after applying operations +- `IsEmpty()` - True if no files remain after applying all operations + +### Conversion +- `ToSingleIndex()` - Merge all layers into a single `Files` collection + +### Serialization +- `SerializeLayered(RefSerializer[T])` - Serialize the layered index preserving all layers +- `DeserializeLayered[T](data, RefSerializer[T])` - Reconstruct a layered index from serialized data + +The serialization requires providing a `RefSerializer[T]` with functions to convert your reference type to/from bytes: + +```go +// Example for string references +stringSerializer := RefSerializer[string]{ + Marshal: func(s string) ([]byte, error) { + return []byte(s), nil + }, + Unmarshal: func(b []byte) (string, error) { + return string(b), nil + }, +} + +// Serialize +data, err := layered.SerializeLayered(stringSerializer) + +// Deserialize +layered2, err := DeserializeLayered(data, stringSerializer) +``` + +### Important Notes + +1. **Deletion semantics**: Delete layers only remove files that exist in *previous* layers. Files added in subsequent layers are not affected. + +2. **Directory handling**: When a file is deleted, empty parent directories are automatically removed. A directory is kept if it still contains any files. + +3. **Duplicate references**: The same reference cannot be used twice. Attempting to add a layer with an existing reference returns an error. + +4. **Performance**: The layered index maintains files in memory. For large numbers of layers or files, consider merging to a single index periodically. ## License diff --git a/file_gen.go b/file_gen.go index ceb3b50..bc852ee 100644 --- a/file_gen.go +++ b/file_gen.go @@ -1,7 +1,7 @@ -package zipindex - // Code generated by github.com/tinylib/msgp DO NOT EDIT. +package zipindex + import ( "github.com/tinylib/msgp/msgp" ) @@ -62,21 +62,19 @@ func (z *File) DecodeMsg(dc *msgp.Reader) (err error) { if z.Custom == nil { z.Custom = make(map[string]string, zb0002) } else if len(z.Custom) > 0 { - for key := range z.Custom { - delete(z.Custom, key) - } + clear(z.Custom) } var field []byte _ = field for zb0002 > 0 { zb0002-- var za0001 string - var za0002 string za0001, err = dc.ReadString() if err != nil { err = msgp.WrapError(err, "Custom") return } + var za0002 string za0002, err = dc.ReadString() if err != nil { err = msgp.WrapError(err, "Custom", za0001) diff --git a/file_gen_test.go b/file_gen_test.go index 642bd10..adbe40d 100644 --- a/file_gen_test.go +++ b/file_gen_test.go @@ -1,7 +1,7 @@ -package zipindex - // Code generated by github.com/tinylib/msgp DO NOT EDIT. +package zipindex + import ( "bytes" "testing" diff --git a/go.mod b/go.mod index 8795fc2..734f36b 100644 --- a/go.mod +++ b/go.mod @@ -1,10 +1,10 @@ module github.com/minio/zipindex -go 1.20 +go 1.23 require ( - github.com/klauspost/compress v1.17.9 - github.com/tinylib/msgp v1.2.0 + github.com/klauspost/compress v1.18.0 + github.com/tinylib/msgp v1.4.0 ) -require github.com/philhofer/fwd v1.1.3-0.20240612014219-fbbf4953d986 // indirect +require github.com/philhofer/fwd v1.2.0 // indirect diff --git a/go.sum b/go.sum index cd21f91..092e07d 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,12 @@ github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/philhofer/fwd v1.1.3-0.20240612014219-fbbf4953d986 h1:jYi87L8j62qkXzaYHAQAhEapgukhenIMZRBKTNRLHJ4= github.com/philhofer/fwd v1.1.3-0.20240612014219-fbbf4953d986/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= +github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= +github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= github.com/tinylib/msgp v1.2.0 h1:0uKB/662twsVBpYUPbokj4sTSKhWFKB7LopO2kWK8lY= github.com/tinylib/msgp v1.2.0/go.mod h1:2vIGs3lcUo8izAATNobrCHevYZC/LMsJtw4JPiYPHro= +github.com/tinylib/msgp v1.4.0 h1:SYOeDRiydzOw9kSiwdYp9UcBgPFtLU2WDHaJXyHruf8= +github.com/tinylib/msgp v1.4.0/go.mod h1:cvjFkb4RiC8qSBOPMGPSzSAx47nAsfhLVTCZZNuHv5o= diff --git a/layered.go b/layered.go new file mode 100644 index 0000000..49bafaa --- /dev/null +++ b/layered.go @@ -0,0 +1,580 @@ +/* + * zipindex, (C)2025 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package zipindex + +import ( + "fmt" + "iter" + "sort" + "strings" + "sync" + + "github.com/tinylib/msgp/msgp" +) + +// LayeredIndex represents multiple indexes layered on top of each other. +// Files from newer layers override files from older layers with the same path. +type LayeredIndex[T comparable] struct { + layers []layer[T] +} + +// layer represents a single index layer with metadata. +type layer[T comparable] struct { + index Files + ref T + isDelete bool // If true, files in this layer are deleted from the result +} + +// NewLayeredIndex creates a new empty layered index. +func NewLayeredIndex[T comparable]() *LayeredIndex[T] { + return &LayeredIndex[T]{ + layers: make([]layer[T], 0), + } +} + +// AddLayer adds a new index layer with the given reference. +// Files in this layer will override files with the same path in previous layers. +// Returns an error if a layer with the same reference already exists. +// Files are sorted by name for efficient lookups. +func (l *LayeredIndex[T]) AddLayer(index Files, ref T) error { + // Check for duplicate reference + for _, layer := range l.layers { + if layer.ref == ref { + return fmt.Errorf("layer with reference %v already exists", ref) + } + } + // Sort files by name for efficient binary search + index.SortByName() + l.layers = append(l.layers, layer[T]{ + index: index, + ref: ref, + isDelete: false, + }) + return nil +} + +// AddDeleteLayer adds a deletion layer with the given reference. +// Files in this layer will be removed from the final result. +// Returns an error if a layer with the same reference already exists. +// Files are sorted by name for efficient lookups. +func (l *LayeredIndex[T]) AddDeleteLayer(index Files, ref T) error { + // Check for duplicate reference + for _, layer := range l.layers { + if layer.ref == ref { + return fmt.Errorf("layer with reference %v already exists", ref) + } + } + // Sort files by name for efficient binary search + index.SortByName() + l.layers = append(l.layers, layer[T]{ + index: index, + ref: ref, + isDelete: true, + }) + return nil +} + +// LayerCount returns the number of layers in the index. +func (l *LayeredIndex[T]) LayerCount() int { + return len(l.layers) +} + +// GetLayerRef returns the reference for the layer at the given index. +// Returns the zero value of T and false if the index is out of bounds. +func (l *LayeredIndex[T]) GetLayerRef(index int) (T, bool) { + var zero T + if index < 0 || index >= len(l.layers) { + return zero, false + } + return l.layers[index].ref, true +} + +// RemoveLayer removes the layer at the given index. +// Returns an error if the index is out of bounds. +func (l *LayeredIndex[T]) RemoveLayer(index int) error { + if index < 0 || index >= len(l.layers) { + return fmt.Errorf("layer index %d out of bounds [0, %d)", index, len(l.layers)) + } + l.layers = append(l.layers[:index], l.layers[index+1:]...) + return nil +} + +// RemoveLayerByRef removes all layers with the given reference. +// Returns the number of layers removed. +func (l *LayeredIndex[T]) RemoveLayerByRef(ref T) int { + removed := 0 + newLayers := make([]layer[T], 0, len(l.layers)) + for _, layer := range l.layers { + if layer.ref != ref { + newLayers = append(newLayers, layer) + } else { + removed++ + } + } + l.layers = newLayers + return removed +} + +// FileWithRef represents a file with its layer reference. +type FileWithRef[T comparable] struct { + File + LayerRef T +} + +// FilesIter returns an iterator over all files in the layered index. +// Each iteration yields the layer reference and the file. +// Files are returned in name order after applying all layer operations. +func (l *LayeredIndex[T]) FilesIter() iter.Seq2[T, File] { + return func(yield func(T, File) bool) { + files := l.Files() + for _, f := range files { + if !yield(f.LayerRef, f.File) { + return + } + } + } +} + +// Files returns all files in the layered index after applying layer operations. +// Files from newer layers override files from older layers with the same path. +// Delete layers remove files that exist in previous layers. +func (l *LayeredIndex[T]) Files() []FileWithRef[T] { + fileMap := make(map[string]FileWithRef[T]) + + // Process layers in order + for _, layer := range l.layers { + if layer.isDelete { + // Remove files from the map + for _, file := range layer.index { + delete(fileMap, file.Name) + } + + // After removing all files in this delete layer, check for empty directories + // We need to check ALL directories to see if they're now empty + var dirsToCheck []string + for name := range fileMap { + if strings.HasSuffix(name, "/") { + dirsToCheck = append(dirsToCheck, name) + } + } + + // Sort dirs by length (deepest first) to check from bottom up + sort.Slice(dirsToCheck, func(i, j int) bool { + return len(dirsToCheck[i]) > len(dirsToCheck[j]) + }) + + // Check each directory to see if it's empty + for _, dirPath := range dirsToCheck { + hasChildren := false + dirPrefix := dirPath + + // Check if any files or subdirectories exist in this directory + for name := range fileMap { + if name != dirPath && strings.HasPrefix(name, dirPrefix) { + // Check if this is a direct child or deeper descendant + remainder := name[len(dirPrefix):] + // If there's content after the prefix, it's a child + if len(remainder) > 0 { + hasChildren = true + break + } + } + } + + if !hasChildren { + delete(fileMap, dirPath) + } + } + } else { + // Add or override files + for _, file := range layer.index { + fileMap[file.Name] = FileWithRef[T]{ + File: file, + LayerRef: layer.ref, + } + } + } + } + + // Convert map to slice + result := make([]FileWithRef[T], 0, len(fileMap)) + for _, file := range fileMap { + result = append(result, file) + } + + // Sort by name for consistent output + sort.Slice(result, func(i, j int) bool { + return result[i].Name < result[j].Name + }) + + return result +} + +// binarySearchFile performs a binary search for a file by name in a sorted Files slice. +func binarySearchFile(files Files, name string) *File { + left, right := 0, len(files)-1 + for left <= right { + mid := (left + right) / 2 + if files[mid].Name == name { + return &files[mid] + } + if files[mid].Name < name { + left = mid + 1 + } else { + right = mid - 1 + } + } + return nil +} + +// Find searches for a file by name across all layers using binary search. +// Returns the file and its layer reference if found. +// Delete layers remove the file if it exists in previous layers. +// Empty directories are automatically considered deleted. +func (l *LayeredIndex[T]) Find(name string) (*FileWithRef[T], bool) { + var found *FileWithRef[T] + + // Process layers in order + for _, layer := range l.layers { + if layer.isDelete { + // Binary search in sorted delete layer + if file := binarySearchFile(layer.index, name); file != nil { + // File was deleted + found = nil + } + } else { + // Binary search in sorted regular layer + if file := binarySearchFile(layer.index, name); file != nil { + found = &FileWithRef[T]{ + File: *file, + LayerRef: layer.ref, + } + } + } + } + + // For directories, we need to check if they should be auto-removed due to being empty + // This requires checking the full state, so we use Files() for consistency + if found != nil && strings.HasSuffix(name, "/") { + // Use Files() to get the accurate state with directory cleanup applied + files := l.Files() + for i := range files { + if files[i].Name == name { + return &files[i], true + } + } + // Directory was removed as empty + return nil, false + } + + return found, found != nil +} + +// FindInLayer searches for a file by name in a specific layer using binary search. +// Returns the file if found in the specified layer. +func (l *LayeredIndex[T]) FindInLayer(name string, ref T) (*File, bool) { + for _, layer := range l.layers { + if layer.ref == ref { + // Use binary search in the sorted layer + if file := binarySearchFile(layer.index, name); file != nil { + return file, true + } + return nil, false + } + } + return nil, false +} + +// ToSingleIndex merges all layers into a single Files collection. +// Files from newer layers override files from older layers with the same path. +// Files in delete layers are removed from the result. +func (l *LayeredIndex[T]) ToSingleIndex() Files { + filesWithRef := l.Files() + result := make(Files, len(filesWithRef)) + for i, f := range filesWithRef { + result[i] = f.File + } + return result +} + +// Clear removes all layers from the index. +func (l *LayeredIndex[T]) Clear() { + l.layers = l.layers[:0] +} + +// IsEmpty returns true if the index has no files after applying all layer operations. +// This accounts for files that have been deleted by delete layers. +func (l *LayeredIndex[T]) IsEmpty() bool { + return l.FileCount() == 0 +} + +// FileCount returns the total number of unique files after applying all layer operations. +func (l *LayeredIndex[T]) FileCount() int { + return len(l.Files()) +} + +// HasFile returns true if the file exists in the layered index after applying all operations. +func (l *LayeredIndex[T]) HasFile(name string) bool { + _, found := l.Find(name) + return found +} + +// RefSerializer provides functions to convert layer references to/from byte slices. +type RefSerializer[T comparable] struct { + // Marshal converts a reference to bytes + Marshal func(T) ([]byte, error) + // Unmarshal converts bytes to a reference + Unmarshal func([]byte) (T, error) +} + +// SerializeLayered serializes the layered index with all layers preserved. +// Uses concurrent serialization for better performance with large indexes. +func (l *LayeredIndex[T]) SerializeLayered(refSerializer RefSerializer[T]) ([]byte, error) { + if refSerializer.Marshal == nil { + return nil, fmt.Errorf("marshal function is required") + } + + // Write header manually using msgp + // Format: [version:uint8, layers:uint32] + result := make([]byte, 0, 1024) + + // Write version (uint8) + result = msgp.AppendUint8(result, 1) + + // Write number of layers (uint32) + result = msgp.AppendUint32(result, uint32(len(l.layers))) + + // Serialize layers concurrently + type layerResult struct { + index int + data []byte + err error + } + + results := make(chan layerResult, len(l.layers)) + var wg sync.WaitGroup + + for i, lay := range l.layers { + wg.Add(1) + go func(idx int, layer layer[T]) { + defer wg.Done() + + // Serialize reference + refData, err := refSerializer.Marshal(layer.ref) + if err != nil { + results <- layerResult{index: idx, err: fmt.Errorf("failed to marshal ref for layer %d: %w", idx, err)} + return + } + + // Serialize files + filesData, err := layer.index.Serialize() + if err != nil { + results <- layerResult{index: idx, err: fmt.Errorf("failed to serialize files for layer %d: %w", idx, err)} + return + } + + // Manual msgpack serialization of layer + // Format: [refData:bin, isDelete:bool, filesData:bin] + layerBuf := make([]byte, 0, len(refData)+len(filesData)+64) + + // Write ref data as binary + layerBuf = msgp.AppendBytes(layerBuf, refData) + + // Write isDelete flag + layerBuf = msgp.AppendBool(layerBuf, layer.isDelete) + + // Write files data as binary + layerBuf = msgp.AppendBytes(layerBuf, filesData) + + results <- layerResult{index: idx, data: layerBuf} + }(i, lay) + } + + // Wait for all goroutines to complete + wg.Wait() + close(results) + + // Collect results in order + layerData := make([][]byte, len(l.layers)) + for res := range results { + if res.err != nil { + return nil, res.err + } + layerData[res.index] = res.data + } + + // Append all layer data in order + for _, data := range layerData { + result = append(result, data...) + } + + return result, nil +} + +// DeserializeLayered reconstructs a layered index from serialized data. +// Uses concurrent deserialization for better performance with large indexes. +func DeserializeLayered[T comparable](data []byte, refSerializer RefSerializer[T]) (*LayeredIndex[T], error) { + if refSerializer.Unmarshal == nil { + return nil, fmt.Errorf("unmarshal function is required") + } + + // Read header manually using msgp + // Format: [version:uint8, layers:uint32] + var err error + remaining := data + + // Read version + var version uint8 + version, remaining, err = msgp.ReadUint8Bytes(remaining) + if err != nil { + return nil, fmt.Errorf("failed to read version: %w", err) + } + + if version != 1 { + return nil, fmt.Errorf("unsupported version: %d", version) + } + + // Read number of layers + var numLayers uint32 + numLayers, remaining, err = msgp.ReadUint32Bytes(remaining) + if err != nil { + return nil, fmt.Errorf("failed to read layer count: %w", err) + } + + // Parse layer data + layerData := make([][]byte, 0, numLayers) + for i := uint32(0); i < numLayers; i++ { + // Peek ahead to find the size of this layer + tempRemaining := remaining + + // Read ref data (skip) + var refData []byte + refData, tempRemaining, err = msgp.ReadBytesZC(tempRemaining) + if err != nil { + return nil, fmt.Errorf("failed to read ref data for layer %d: %w", i, err) + } + _ = refData + + // Read isDelete flag (skip) + var isDelete bool + isDelete, tempRemaining, err = msgp.ReadBoolBytes(tempRemaining) + if err != nil { + return nil, fmt.Errorf("failed to read isDelete for layer %d: %w", i, err) + } + _ = isDelete + + // Read files data (skip) + var filesData []byte + filesData, tempRemaining, err = msgp.ReadBytesZC(tempRemaining) + if err != nil { + return nil, fmt.Errorf("failed to read files data for layer %d: %w", i, err) + } + _ = filesData + + // Calculate layer size and save it + layerSize := len(remaining) - len(tempRemaining) + layerBytes := remaining[:layerSize] + layerData = append(layerData, layerBytes) + remaining = tempRemaining + } + + // Deserialize layers concurrently + type layerResult struct { + index int + layer layer[T] + err error + } + + results := make(chan layerResult, len(layerData)) + var wg sync.WaitGroup + + for i, layerBytes := range layerData { + wg.Add(1) + go func(idx int, data []byte) { + defer wg.Done() + + // Manual msgpack deserialization of layer + // Format: [refData:bin, isDelete:bool, filesData:bin] + + // Read ref data + refData, data, err := msgp.ReadBytesZC(data) + if err != nil { + results <- layerResult{index: idx, err: fmt.Errorf("failed to read ref data for layer %d: %w", idx, err)} + return + } + + // Read isDelete flag + isDelete, data, err := msgp.ReadBoolBytes(data) + if err != nil { + results <- layerResult{index: idx, err: fmt.Errorf("failed to read isDelete for layer %d: %w", idx, err)} + return + } + + // Read files data + filesData, _, err := msgp.ReadBytesZC(data) + if err != nil { + results <- layerResult{index: idx, err: fmt.Errorf("failed to read files data for layer %d: %w", idx, err)} + return + } + + // Unmarshal reference + ref, err := refSerializer.Unmarshal(refData) + if err != nil { + results <- layerResult{index: idx, err: fmt.Errorf("failed to unmarshal ref for layer %d: %w", idx, err)} + return + } + + // Deserialize files + files, err := DeserializeFiles(filesData) + if err != nil { + results <- layerResult{index: idx, err: fmt.Errorf("failed to deserialize files for layer %d: %w", idx, err)} + return + } + + // Files are already sorted from AddLayer, but ensure they're sorted + files.SortByName() + + results <- layerResult{ + index: idx, + layer: layer[T]{ + index: files, + ref: ref, + isDelete: isDelete, + }, + } + }(i, layerBytes) + } + + // Wait for all goroutines to complete + wg.Wait() + close(results) + + // Collect results in order + layers := make([]layer[T], len(layerData)) + for res := range results { + if res.err != nil { + return nil, res.err + } + layers[res.index] = res.layer + } + + // Create the layered index + l := NewLayeredIndex[T]() + l.layers = layers + + return l, nil +} diff --git a/layered_test.go b/layered_test.go new file mode 100644 index 0000000..934270c --- /dev/null +++ b/layered_test.go @@ -0,0 +1,1482 @@ +/* + * zipindex, (C)2025 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package zipindex + +import ( + "encoding/binary" + "fmt" + "reflect" + "slices" + "sort" + "testing" +) + +// Test basic creation and layer management +func TestLayeredIndex_Basic(t *testing.T) { + l := NewLayeredIndex[string]() + + // Test empty index + if !l.IsEmpty() { + t.Error("New layered index should be empty") + } + if l.LayerCount() != 0 { + t.Errorf("Expected 0 layers, got %d", l.LayerCount()) + } + if l.FileCount() != 0 { + t.Errorf("Expected 0 files, got %d", l.FileCount()) + } + + // Add first layer + files1 := Files{ + {Name: "file1.txt", CompressedSize64: 100, UncompressedSize64: 150}, + {Name: "file2.txt", CompressedSize64: 200, UncompressedSize64: 250}, + } + err := l.AddLayer(files1, "layer1") + if err != nil { + t.Errorf("Failed to add layer: %v", err) + } + + if l.IsEmpty() { + t.Error("Index should not be empty after adding files") + } + if l.LayerCount() != 1 { + t.Errorf("Expected 1 layer, got %d", l.LayerCount()) + } + if l.FileCount() != 2 { + t.Errorf("Expected 2 files, got %d", l.FileCount()) + } + + // Test duplicate reference + err = l.AddLayer(Files{}, "layer1") + if err == nil { + t.Error("Should have failed to add layer with duplicate reference") + } + + // Add second layer + files2 := Files{ + {Name: "file3.txt", CompressedSize64: 300, UncompressedSize64: 350}, + } + err = l.AddLayer(files2, "layer2") + if err != nil { + t.Errorf("Failed to add second layer: %v", err) + } + + if l.LayerCount() != 2 { + t.Errorf("Expected 2 layers, got %d", l.LayerCount()) + } + if l.FileCount() != 3 { + t.Errorf("Expected 3 files, got %d", l.FileCount()) + } +} + +// Test file override semantics +func TestLayeredIndex_Override(t *testing.T) { + l := NewLayeredIndex[int]() + + // Add base layer + files1 := Files{ + {Name: "file1.txt", CompressedSize64: 100, CRC32: 1111}, + {Name: "file2.txt", CompressedSize64: 200, CRC32: 2222}, + {Name: "file3.txt", CompressedSize64: 300, CRC32: 3333}, + } + l.AddLayer(files1, 1) + + // Add override layer + files2 := Files{ + {Name: "file2.txt", CompressedSize64: 999, CRC32: 9999}, // Override file2 + {Name: "file4.txt", CompressedSize64: 400, CRC32: 4444}, // New file + } + l.AddLayer(files2, 2) + + // Check that file2 was overridden + file, found := l.Find("file2.txt") + if !found { + t.Error("file2.txt should exist") + } + if file.CRC32 != 9999 { + t.Errorf("file2.txt should have been overridden, got CRC %d", file.CRC32) + } + if file.LayerRef != 2 { + t.Errorf("file2.txt should be from layer 2, got layer %d", file.LayerRef) + } + + // Check that file1 is still from layer 1 + file, found = l.Find("file1.txt") + if !found { + t.Error("file1.txt should exist") + } + if file.LayerRef != 1 { + t.Errorf("file1.txt should be from layer 1, got layer %d", file.LayerRef) + } + + // Check total file count + if l.FileCount() != 4 { + t.Errorf("Expected 4 unique files, got %d", l.FileCount()) + } +} + +// Test delete layer semantics +func TestLayeredIndex_Delete(t *testing.T) { + l := NewLayeredIndex[string]() + + // Add base layer + files1 := Files{ + {Name: "file1.txt", CompressedSize64: 100}, + {Name: "file2.txt", CompressedSize64: 200}, + {Name: "file3.txt", CompressedSize64: 300}, + } + l.AddLayer(files1, "base") + + // Add delete layer + deleteFiles := Files{ + {Name: "file2.txt"}, + } + err := l.AddDeleteLayer(deleteFiles, "delete1") + if err != nil { + t.Errorf("Failed to add delete layer: %v", err) + } + + // Check that file2 was deleted + _, found := l.Find("file2.txt") + if found { + t.Error("file2.txt should have been deleted") + } + + // Check that other files still exist + if !l.HasFile("file1.txt") { + t.Error("file1.txt should still exist") + } + if !l.HasFile("file3.txt") { + t.Error("file3.txt should still exist") + } + + // Check file count + if l.FileCount() != 2 { + t.Errorf("Expected 2 files after deletion, got %d", l.FileCount()) + } + + // Add file2 back in a new layer - it should reappear + files2 := Files{ + {Name: "file2.txt", CompressedSize64: 999}, + } + l.AddLayer(files2, "restore") + + // Check that file2 is back + file, found := l.Find("file2.txt") + if !found { + t.Error("file2.txt should exist after being re-added") + } + if file.LayerRef != "restore" { + t.Errorf("file2.txt should be from 'restore' layer, got %s", file.LayerRef) + } + if file.CompressedSize64 != 999 { + t.Errorf("file2.txt should have new size 999, got %d", file.CompressedSize64) + } +} + +// Test directory handling with deletions +func TestLayeredIndex_DirectoryHandling(t *testing.T) { + l := NewLayeredIndex[string]() + + // Add files with directory structure + files1 := Files{ + {Name: "dir1/"}, + {Name: "dir1/file1.txt"}, + {Name: "dir1/file2.txt"}, + {Name: "dir1/subdir/"}, + {Name: "dir1/subdir/file3.txt"}, + {Name: "dir2/"}, + {Name: "dir2/file4.txt"}, + } + l.AddLayer(files1, "base") + + if l.FileCount() != 7 { + t.Errorf("Expected 7 entries, got %d", l.FileCount()) + } + + // Delete a file from dir1/subdir + deleteFiles := Files{ + {Name: "dir1/subdir/file3.txt"}, + } + l.AddDeleteLayer(deleteFiles, "delete1") + + // Check that the file is deleted but empty directory is also removed + if l.HasFile("dir1/subdir/file3.txt") { + t.Error("dir1/subdir/file3.txt should be deleted") + } + if l.HasFile("dir1/subdir/") { + t.Error("dir1/subdir/ should be deleted as it's now empty") + } + + // dir1/ should still exist as it has other files + if !l.HasFile("dir1/") { + t.Error("dir1/ should still exist") + } + if !l.HasFile("dir1/file1.txt") { + t.Error("dir1/file1.txt should still exist") + } + + // Delete all files from dir1 + deleteFiles2 := Files{ + {Name: "dir1/file1.txt"}, + {Name: "dir1/file2.txt"}, + } + l.AddDeleteLayer(deleteFiles2, "delete2") + + // Now dir1/ should also be removed + if l.HasFile("dir1/") { + t.Error("dir1/ should be deleted as it's now empty") + } + + // dir2 should still exist + if !l.HasFile("dir2/") { + t.Error("dir2/ should still exist") + } + if !l.HasFile("dir2/file4.txt") { + t.Error("dir2/file4.txt should still exist") + } +} + +// Test Files() method +func TestLayeredIndex_Files(t *testing.T) { + l := NewLayeredIndex[int]() + + // Add multiple layers + l.AddLayer(Files{ + {Name: "a.txt", CompressedSize64: 1}, + {Name: "b.txt", CompressedSize64: 2}, + }, 1) + + l.AddLayer(Files{ + {Name: "b.txt", CompressedSize64: 22}, // Override + {Name: "c.txt", CompressedSize64: 3}, + }, 2) + + l.AddDeleteLayer(Files{ + {Name: "a.txt"}, + }, 3) + + l.AddLayer(Files{ + {Name: "d.txt", CompressedSize64: 4}, + {Name: "a.txt", CompressedSize64: 11}, // Re-add after delete + }, 4) + + files := l.Files() + + // Should have 4 files: a.txt (from layer 4), b.txt (from layer 2), c.txt (from layer 2), d.txt (from layer 4) + if len(files) != 4 { + t.Errorf("Expected 4 files, got %d", len(files)) + } + + // Check files are sorted by name + expectedNames := []string{"a.txt", "b.txt", "c.txt", "d.txt"} + for i, f := range files { + if f.Name != expectedNames[i] { + t.Errorf("Expected file %s at index %d, got %s", expectedNames[i], i, f.Name) + } + } + + // Check layer references + fileMap := make(map[string]int) + for _, f := range files { + fileMap[f.Name] = f.LayerRef + } + + if fileMap["a.txt"] != 4 { + t.Errorf("a.txt should be from layer 4, got %d", fileMap["a.txt"]) + } + if fileMap["b.txt"] != 2 { + t.Errorf("b.txt should be from layer 2, got %d", fileMap["b.txt"]) + } + if fileMap["c.txt"] != 2 { + t.Errorf("c.txt should be from layer 2, got %d", fileMap["c.txt"]) + } + if fileMap["d.txt"] != 4 { + t.Errorf("d.txt should be from layer 4, got %d", fileMap["d.txt"]) + } +} + +// Test FilesIter iterator +func TestLayeredIndex_FilesIter(t *testing.T) { + l := NewLayeredIndex[string]() + + l.AddLayer(Files{ + {Name: "file1.txt", CompressedSize64: 100}, + {Name: "file2.txt", CompressedSize64: 200}, + }, "layer1") + + l.AddLayer(Files{ + {Name: "file3.txt", CompressedSize64: 300}, + }, "layer2") + + // Collect all files using iterator + var collected []struct { + ref string + name string + size uint64 + } + + for ref, file := range l.FilesIter() { + collected = append(collected, struct { + ref string + name string + size uint64 + }{ref, file.Name, file.CompressedSize64}) + } + + if len(collected) != 3 { + t.Errorf("Expected 3 files from iterator, got %d", len(collected)) + } + + // Files should be in name order + expectedNames := []string{"file1.txt", "file2.txt", "file3.txt"} + for i, item := range collected { + if item.name != expectedNames[i] { + t.Errorf("Expected %s at index %d, got %s", expectedNames[i], i, item.name) + } + } + + // Test early termination + count := 0 + for range l.FilesIter() { + count++ + if count == 2 { + break + } + } + if count != 2 { + t.Errorf("Expected to break after 2 iterations, got %d", count) + } +} + +// Test layer management methods +func TestLayeredIndex_LayerManagement(t *testing.T) { + l := NewLayeredIndex[string]() + + // Add multiple layers + l.AddLayer(Files{{Name: "f1.txt"}}, "layer1") + l.AddLayer(Files{{Name: "f2.txt"}}, "layer2") + l.AddLayer(Files{{Name: "f3.txt"}}, "layer3") + l.AddLayer(Files{{Name: "f4.txt"}}, "layer4") + + // Test GetLayerRef + ref, ok := l.GetLayerRef(1) + if !ok || ref != "layer2" { + t.Errorf("Expected layer2 at index 1, got %s", ref) + } + + _, ok = l.GetLayerRef(10) + if ok { + t.Error("GetLayerRef should return false for out of bounds index") + } + + // Test RemoveLayer + err := l.RemoveLayer(1) // Remove layer2 + if err != nil { + t.Errorf("Failed to remove layer: %v", err) + } + + if l.LayerCount() != 3 { + t.Errorf("Expected 3 layers after removal, got %d", l.LayerCount()) + } + + if l.HasFile("f2.txt") { + t.Error("f2.txt should not exist after removing its layer") + } + + // Test RemoveLayerByRef + l.AddLayer(Files{{Name: "dup1.txt"}}, "duplicate") + l.AddLayer(Files{{Name: "other.txt"}}, "other") + l.AddLayer(Files{{Name: "dup2.txt"}}, "duplicate2") // Different ref + + // Can't add duplicate ref + err = l.AddLayer(Files{{Name: "dup3.txt"}}, "duplicate") + if err == nil { + t.Error("Should not allow duplicate layer reference") + } + + removed := l.RemoveLayerByRef("duplicate") + if removed != 1 { + t.Errorf("Expected to remove 1 layer, removed %d", removed) + } + + if l.HasFile("dup1.txt") { + t.Error("dup1.txt should not exist after removing its layer") + } + + // Test Clear + l.Clear() + if l.LayerCount() != 0 { + t.Errorf("Expected 0 layers after clear, got %d", l.LayerCount()) + } + if !l.IsEmpty() { + t.Error("Index should be empty after clear") + } +} + +// Test FindInLayer +func TestLayeredIndex_FindInLayer(t *testing.T) { + l := NewLayeredIndex[string]() + + l.AddLayer(Files{ + {Name: "file1.txt", CompressedSize64: 100}, + {Name: "shared.txt", CompressedSize64: 200}, + }, "layer1") + + l.AddLayer(Files{ + {Name: "file2.txt", CompressedSize64: 300}, + {Name: "shared.txt", CompressedSize64: 400}, // Override + }, "layer2") + + // Find in specific layer + file, found := l.FindInLayer("shared.txt", "layer1") + if !found { + t.Error("shared.txt should exist in layer1") + } + if file.CompressedSize64 != 200 { + t.Errorf("Expected size 200 in layer1, got %d", file.CompressedSize64) + } + + file, found = l.FindInLayer("shared.txt", "layer2") + if !found { + t.Error("shared.txt should exist in layer2") + } + if file.CompressedSize64 != 400 { + t.Errorf("Expected size 400 in layer2, got %d", file.CompressedSize64) + } + + // File only in one layer + _, found = l.FindInLayer("file1.txt", "layer2") + if found { + t.Error("file1.txt should not exist in layer2") + } + + // Non-existent layer + _, found = l.FindInLayer("file1.txt", "nonexistent") + if found { + t.Error("Should not find file in non-existent layer") + } +} + +// Test ToSingleIndex +func TestLayeredIndex_ToSingleIndex(t *testing.T) { + l := NewLayeredIndex[int]() + + l.AddLayer(Files{ + {Name: "a.txt", CompressedSize64: 1, CRC32: 1111}, + {Name: "b.txt", CompressedSize64: 2, CRC32: 2222}, + }, 1) + + l.AddLayer(Files{ + {Name: "b.txt", CompressedSize64: 22, CRC32: 2223}, // Override + {Name: "c.txt", CompressedSize64: 3, CRC32: 3333}, + }, 2) + + l.AddDeleteLayer(Files{{Name: "a.txt"}}, 3) + + single := l.ToSingleIndex() + + // Should have b.txt (overridden) and c.txt, but not a.txt (deleted) + if len(single) != 2 { + t.Errorf("Expected 2 files in single index, got %d", len(single)) + } + + // Check the files + fileMap := make(map[string]File) + for _, f := range single { + fileMap[f.Name] = f + } + + if _, exists := fileMap["a.txt"]; exists { + t.Error("a.txt should not exist in merged index") + } + + if b, exists := fileMap["b.txt"]; !exists { + t.Error("b.txt should exist in merged index") + } else if b.CRC32 != 2223 { + t.Errorf("b.txt should have overridden CRC 2223, got %d", b.CRC32) + } + + if _, exists := fileMap["c.txt"]; !exists { + t.Error("c.txt should exist in merged index") + } +} + +// Test edge cases +func TestLayeredIndex_EdgeCases(t *testing.T) { + t.Run("EmptyLayers", func(t *testing.T) { + l := NewLayeredIndex[int]() + + // Add empty layer + err := l.AddLayer(Files{}, 1) + if err != nil { + t.Errorf("Should allow empty layer: %v", err) + } + + if !l.IsEmpty() { + t.Error("Index should still be empty with empty layer") + } + + // Add empty delete layer + err = l.AddDeleteLayer(Files{}, 2) + if err != nil { + t.Errorf("Should allow empty delete layer: %v", err) + } + }) + + t.Run("DeleteNonExistent", func(t *testing.T) { + l := NewLayeredIndex[int]() + + l.AddLayer(Files{{Name: "exists.txt"}}, 1) + + // Delete non-existent file + l.AddDeleteLayer(Files{{Name: "nonexistent.txt"}}, 2) + + // Should not affect existing files + if !l.HasFile("exists.txt") { + t.Error("exists.txt should still exist") + } + if l.FileCount() != 1 { + t.Errorf("Expected 1 file, got %d", l.FileCount()) + } + }) + + t.Run("MultipleOverrides", func(t *testing.T) { + l := NewLayeredIndex[int]() + + // Add same file in multiple layers + for i := 1; i <= 5; i++ { + l.AddLayer(Files{{ + Name: "file.txt", + CompressedSize64: uint64(i * 100), + }}, i) + } + + file, found := l.Find("file.txt") + if !found { + t.Error("file.txt should exist") + } + if file.CompressedSize64 != 500 { + t.Errorf("Expected size 500 from last layer, got %d", file.CompressedSize64) + } + if file.LayerRef != 5 { + t.Errorf("Expected layer 5, got %d", file.LayerRef) + } + }) + + t.Run("DeleteAndReAddMultiple", func(t *testing.T) { + l := NewLayeredIndex[string]() + + // Complex scenario with multiple add/delete cycles + l.AddLayer(Files{{Name: "cycle.txt", CRC32: 1}}, "v1") + l.AddDeleteLayer(Files{{Name: "cycle.txt"}}, "del1") + l.AddLayer(Files{{Name: "cycle.txt", CRC32: 2}}, "v2") + l.AddDeleteLayer(Files{{Name: "cycle.txt"}}, "del2") + l.AddLayer(Files{{Name: "cycle.txt", CRC32: 3}}, "v3") + + file, found := l.Find("cycle.txt") + if !found { + t.Error("cycle.txt should exist after re-adding") + } + if file.CRC32 != 3 { + t.Errorf("Expected CRC 3 from final add, got %d", file.CRC32) + } + }) +} + +// Test with custom comparable types +func TestLayeredIndex_CustomTypes(t *testing.T) { + type Version struct { + Major, Minor int + } + + l := NewLayeredIndex[Version]() + + l.AddLayer(Files{{Name: "readme.txt"}}, Version{1, 0}) + l.AddLayer(Files{{Name: "changelog.txt"}}, Version{1, 1}) + l.AddLayer(Files{{Name: "readme.txt", CompressedSize64: 999}}, Version{2, 0}) + + file, found := l.Find("readme.txt") + if !found { + t.Error("readme.txt should exist") + } + if file.LayerRef != (Version{2, 0}) { + t.Errorf("Expected version 2.0, got %v", file.LayerRef) + } + + ref, ok := l.GetLayerRef(0) + if !ok || ref != (Version{1, 0}) { + t.Errorf("Expected version 1.0 at index 0, got %v", ref) + } +} + +// Benchmark Files() method +func BenchmarkLayeredIndex_Files(b *testing.B) { + l := NewLayeredIndex[int]() + + // Create layers with many files + for layer := 0; layer < 10; layer++ { + var files Files + for i := 0; i < 1000; i++ { + files = append(files, File{ + Name: fmt.Sprintf("layer%02d_file_%04d.txt", layer, i), + CompressedSize64: uint64(i), + }) + } + l.AddLayer(files, layer) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = l.Files() + } +} + +// Benchmark Find() method with binary search optimization +func BenchmarkLayeredIndex_Find(b *testing.B) { + l := NewLayeredIndex[int]() + + // Create layers with many files + var testFiles []string + for layer := 0; layer < 10; layer++ { + var files Files + for i := 0; i < 1000; i++ { + name := fmt.Sprintf("layer%02d_file_%04d.txt", layer, i) + files = append(files, File{ + Name: name, + CompressedSize64: uint64(i), + }) + if layer == 5 && i%100 == 0 { + testFiles = append(testFiles, name) + } + } + l.AddLayer(files, layer) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, name := range testFiles { + _, _ = l.Find(name) + } + } +} + +// Benchmark FindInLayer() method with binary search optimization +func BenchmarkLayeredIndex_FindInLayer(b *testing.B) { + l := NewLayeredIndex[int]() + + // Create a layer with many files + var files Files + var testFiles []string + for i := 0; i < 10000; i++ { + name := fmt.Sprintf("file_%05d.txt", i) + files = append(files, File{ + Name: name, + CompressedSize64: uint64(i), + }) + if i%100 == 0 { + testFiles = append(testFiles, name) + } + } + l.AddLayer(files, 1) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, name := range testFiles { + _, _ = l.FindInLayer(name, 1) + } + } +} + +// Test complex directory deletion scenarios +func TestLayeredIndex_ComplexDirectoryDeletion(t *testing.T) { + l := NewLayeredIndex[string]() + + // Create a complex directory structure + files := Files{ + {Name: "root/"}, + {Name: "root/file1.txt"}, + {Name: "root/dir1/"}, + {Name: "root/dir1/file2.txt"}, + {Name: "root/dir1/file3.txt"}, + {Name: "root/dir1/subdir/"}, + {Name: "root/dir1/subdir/file4.txt"}, + {Name: "root/dir2/"}, + {Name: "root/dir2/file5.txt"}, + {Name: "root/dir2/subdir/"}, + {Name: "root/dir2/subdir/file6.txt"}, + {Name: "other/"}, + {Name: "other/file7.txt"}, + } + l.AddLayer(files, "base") + + // Delete file from nested directory + l.AddDeleteLayer(Files{{Name: "root/dir1/subdir/file4.txt"}}, "del1") + + // Verify the nested empty directory is removed + if l.HasFile("root/dir1/subdir/file4.txt") { + t.Error("file4.txt should be deleted") + } + if l.HasFile("root/dir1/subdir/") { + t.Error("root/dir1/subdir/ should be deleted as it's empty") + } + + // But parent directories with files should remain + if !l.HasFile("root/dir1/") { + t.Error("root/dir1/ should still exist") + } + if !l.HasFile("root/") { + t.Error("root/ should still exist") + } + + // Delete multiple files that empty different directories + l.AddDeleteLayer(Files{ + {Name: "root/dir2/file5.txt"}, + {Name: "root/dir2/subdir/file6.txt"}, + }, "del2") + + // Both dir2 and its subdir should be gone + if l.HasFile("root/dir2/") { + t.Error("root/dir2/ should be deleted") + } + if l.HasFile("root/dir2/subdir/") { + t.Error("root/dir2/subdir/ should be deleted") + } + + // But root should still exist (has dir1 and file1) + if !l.HasFile("root/") { + t.Error("root/ should still exist") + } +} + +// Test that all methods handle nil/empty states correctly +func TestLayeredIndex_NilEmpty(t *testing.T) { + l := NewLayeredIndex[string]() + + // Test all methods on empty index + if !l.IsEmpty() { + t.Error("Empty index should report IsEmpty") + } + + if l.FileCount() != 0 { + t.Error("Empty index should have 0 files") + } + + if l.LayerCount() != 0 { + t.Error("Empty index should have 0 layers") + } + + files := l.Files() + if len(files) != 0 { + t.Error("Empty index should return empty Files slice") + } + + single := l.ToSingleIndex() + if len(single) != 0 { + t.Error("Empty index should return empty single index") + } + + _, found := l.Find("anything") + if found { + t.Error("Empty index should not find any file") + } + + if l.HasFile("anything") { + t.Error("Empty index should not have any file") + } + + _, ok := l.GetLayerRef(0) + if ok { + t.Error("Empty index should not have any layer refs") + } + + err := l.RemoveLayer(0) + if err == nil { + t.Error("RemoveLayer should error on empty index") + } + + removed := l.RemoveLayerByRef("anything") + if removed != 0 { + t.Error("RemoveLayerByRef should remove 0 from empty index") + } + + // Clear should work without error + l.Clear() + + // Iterator should not yield anything + count := 0 + for range l.FilesIter() { + count++ + } + if count != 0 { + t.Error("Empty index iterator should not yield anything") + } +} + +// Test Files() returns consistent order +func TestLayeredIndex_ConsistentOrder(t *testing.T) { + l := NewLayeredIndex[int]() + + // Add files in random order across layers + l.AddLayer(Files{ + {Name: "zebra.txt"}, + {Name: "apple.txt"}, + {Name: "middle.txt"}, + }, 1) + + l.AddLayer(Files{ + {Name: "banana.txt"}, + {Name: "xray.txt"}, + }, 2) + + // Call Files() multiple times and verify order is consistent + var results [][]string + for i := 0; i < 5; i++ { + files := l.Files() + var names []string + for _, f := range files { + names = append(names, f.Name) + } + results = append(results, names) + } + + // All results should be identical + for i := 1; i < len(results); i++ { + if !slices.Equal(results[0], results[i]) { + t.Errorf("Files() returned different order on call %d", i+1) + } + } + + // Verify files are sorted + sorted := slices.Clone(results[0]) + sort.Strings(sorted) + if !slices.Equal(results[0], sorted) { + t.Error("Files() should return files in sorted order") + } +} + +// Test that modifications to returned slices don't affect the index +func TestLayeredIndex_ImmutableReturns(t *testing.T) { + l := NewLayeredIndex[string]() + + l.AddLayer(Files{ + {Name: "file1.txt", CompressedSize64: 100}, + {Name: "file2.txt", CompressedSize64: 200}, + }, "layer1") + + // Get files and modify the returned slice + files1 := l.Files() + originalLen := len(files1) + files1[0].CompressedSize64 = 999 + _ = append(files1, FileWithRef[string]{ + File: File{Name: "injected.txt"}, + }) + + // Get files again - should be unaffected + files2 := l.Files() + if len(files2) != originalLen { + t.Error("Modifying returned slice should not affect index") + } + if files2[0].CompressedSize64 == 999 { + t.Error("Modifying returned file should not affect index") + } + + // Same for ToSingleIndex + single1 := l.ToSingleIndex() + single1[0].CompressedSize64 = 888 + _ = append(single1, File{Name: "injected2.txt"}) + + single2 := l.ToSingleIndex() + if len(single2) != originalLen { + t.Error("Modifying returned single index should not affect index") + } + if single2[0].CompressedSize64 == 888 { + t.Error("Modifying returned file should not affect index") + } +} + +// Test RemoveLayer with various indices +func TestLayeredIndex_RemoveLayerBounds(t *testing.T) { + l := NewLayeredIndex[int]() + + // Add some layers + for i := 0; i < 5; i++ { + l.AddLayer(Files{{Name: "file" + string(rune('0'+i)) + ".txt"}}, i) + } + + // Test negative index + err := l.RemoveLayer(-1) + if err == nil { + t.Error("RemoveLayer should error on negative index") + } + + // Test index equal to count + err = l.RemoveLayer(5) + if err == nil { + t.Error("RemoveLayer should error on index >= count") + } + + // Test index greater than count + err = l.RemoveLayer(10) + if err == nil { + t.Error("RemoveLayer should error on index > count") + } + + // Remove middle layer + err = l.RemoveLayer(2) + if err != nil { + t.Errorf("Failed to remove middle layer: %v", err) + } + if l.LayerCount() != 4 { + t.Errorf("Expected 4 layers after removal, got %d", l.LayerCount()) + } + + // Verify the right file was removed + if l.HasFile("file2.txt") { + t.Error("file2.txt should be removed") + } + + // Remove first layer + err = l.RemoveLayer(0) + if err != nil { + t.Errorf("Failed to remove first layer: %v", err) + } + + // Remove last layer + err = l.RemoveLayer(l.LayerCount() - 1) + if err != nil { + t.Errorf("Failed to remove last layer: %v", err) + } +} + +// Test with large number of layers +func TestLayeredIndex_ManyLayers(t *testing.T) { + l := NewLayeredIndex[int]() + + const numLayers = 100 + for i := 0; i < numLayers; i++ { + files := Files{ + {Name: "common.txt", CompressedSize64: uint64(i)}, // Will be overridden + {Name: "layer_" + string(rune(i)) + ".txt"}, // Unique to layer + } + err := l.AddLayer(files, i) + if err != nil { + t.Fatalf("Failed to add layer %d: %v", i, err) + } + } + + if l.LayerCount() != numLayers { + t.Errorf("Expected %d layers, got %d", numLayers, l.LayerCount()) + } + + // common.txt should be from the last layer + file, found := l.Find("common.txt") + if !found { + t.Error("common.txt should exist") + } + if file.LayerRef != numLayers-1 { + t.Errorf("common.txt should be from layer %d, got %d", numLayers-1, file.LayerRef) + } + if file.CompressedSize64 != uint64(numLayers-1) { + t.Errorf("common.txt should have size %d, got %d", numLayers-1, file.CompressedSize64) + } +} + +// Test that Find and Files agree +func TestLayeredIndex_FindFilesConsistency(t *testing.T) { + l := NewLayeredIndex[string]() + + l.AddLayer(Files{ + {Name: "a.txt", CRC32: 1}, + {Name: "b.txt", CRC32: 2}, + }, "v1") + + l.AddLayer(Files{ + {Name: "b.txt", CRC32: 22}, + {Name: "c.txt", CRC32: 3}, + }, "v2") + + l.AddDeleteLayer(Files{{Name: "a.txt"}}, "del") + + l.AddLayer(Files{ + {Name: "d.txt", CRC32: 4}, + }, "v3") + + // Get all files + allFiles := l.Files() + + // For each file in Files(), Find() should return the same data + for _, fileWithRef := range allFiles { + found, ok := l.Find(fileWithRef.Name) + if !ok { + t.Errorf("Find() failed to find %s that was in Files()", fileWithRef.Name) + continue + } + + if found.LayerRef != fileWithRef.LayerRef { + t.Errorf("File %s: Find() returned layer %v but Files() returned %v", + fileWithRef.Name, found.LayerRef, fileWithRef.LayerRef) + } + + if !reflect.DeepEqual(found.File, fileWithRef.File) { + t.Errorf("File %s: Find() and Files() returned different File data", fileWithRef.Name) + } + } + + // Also verify that Find() doesn't find files not in Files() + deletedFile, found := l.Find("a.txt") + if found { + t.Errorf("Find() found deleted file a.txt: %+v", deletedFile) + } + + // Check this is consistent with HasFile + for _, fileWithRef := range allFiles { + if !l.HasFile(fileWithRef.Name) { + t.Errorf("HasFile() returned false for %s that was in Files()", fileWithRef.Name) + } + } + + if l.HasFile("a.txt") { + t.Error("HasFile() returned true for deleted file a.txt") + } +} + +// Test serialization with string references +func TestLayeredIndex_SerializationString(t *testing.T) { + // Create serializer for string references + stringSerializer := RefSerializer[string]{ + Marshal: func(s string) ([]byte, error) { + return []byte(s), nil + }, + Unmarshal: func(b []byte) (string, error) { + return string(b), nil + }, + } + + // Create a layered index + l := NewLayeredIndex[string]() + + // Add various layers + l.AddLayer(Files{ + {Name: "file1.txt", CompressedSize64: 100, CRC32: 1111}, + {Name: "file2.txt", CompressedSize64: 200, CRC32: 2222}, + }, "base") + + l.AddLayer(Files{ + {Name: "file2.txt", CompressedSize64: 250, CRC32: 2223}, // Override + {Name: "file3.txt", CompressedSize64: 300, CRC32: 3333}, + }, "update1") + + l.AddDeleteLayer(Files{ + {Name: "file1.txt"}, + }, "delete1") + + l.AddLayer(Files{ + {Name: "file4.txt", CompressedSize64: 400, CRC32: 4444}, + {Name: "file1.txt", CompressedSize64: 150, CRC32: 1112}, // Re-add + }, "update2") + + // Serialize + data, err := l.SerializeLayered(stringSerializer) + if err != nil { + t.Fatalf("Failed to serialize: %v", err) + } + + // Deserialize + l2, err := DeserializeLayered(data, stringSerializer) + if err != nil { + t.Fatalf("Failed to deserialize: %v", err) + } + + // Verify layer count + if l2.LayerCount() != l.LayerCount() { + t.Errorf("Layer count mismatch: got %d, want %d", l2.LayerCount(), l.LayerCount()) + } + + // Verify layer references + for i := 0; i < l.LayerCount(); i++ { + ref1, _ := l.GetLayerRef(i) + ref2, _ := l2.GetLayerRef(i) + if ref1 != ref2 { + t.Errorf("Layer %d ref mismatch: got %s, want %s", i, ref2, ref1) + } + } + + // Verify files + files1 := l.Files() + files2 := l2.Files() + + if len(files1) != len(files2) { + t.Errorf("File count mismatch: got %d, want %d", len(files2), len(files1)) + } + + for i := range files1 { + if files1[i].Name != files2[i].Name { + t.Errorf("File %d name mismatch: got %s, want %s", i, files2[i].Name, files1[i].Name) + } + if files1[i].CompressedSize64 != files2[i].CompressedSize64 { + t.Errorf("File %d size mismatch: got %d, want %d", i, files2[i].CompressedSize64, files1[i].CompressedSize64) + } + if files1[i].CRC32 != files2[i].CRC32 { + t.Errorf("File %d CRC mismatch: got %d, want %d", i, files2[i].CRC32, files1[i].CRC32) + } + if files1[i].LayerRef != files2[i].LayerRef { + t.Errorf("File %d layer ref mismatch: got %s, want %s", i, files2[i].LayerRef, files1[i].LayerRef) + } + } +} + +// Test serialization with integer references +func TestLayeredIndex_SerializationInt(t *testing.T) { + // Create serializer for int references + intSerializer := RefSerializer[int]{ + Marshal: func(i int) ([]byte, error) { + buf := make([]byte, 8) + binary.BigEndian.PutUint64(buf, uint64(i)) + return buf, nil + }, + Unmarshal: func(b []byte) (int, error) { + if len(b) != 8 { + return 0, fmt.Errorf("invalid int data: expected 8 bytes, got %d", len(b)) + } + return int(binary.BigEndian.Uint64(b)), nil + }, + } + + // Create a layered index + l := NewLayeredIndex[int]() + + // Add layers with int references + for i := 0; i < 5; i++ { + var files Files + for j := 0; j < 10; j++ { + files = append(files, File{ + Name: fmt.Sprintf("layer%d_file%d.txt", i, j), + CompressedSize64: uint64(i*100 + j), + CRC32: uint32(i*1000 + j), + }) + } + if i == 3 { + // Make layer 3 a delete layer + l.AddDeleteLayer(Files{{Name: "layer0_file5.txt"}}, i) + } else { + l.AddLayer(files, i) + } + } + + // Serialize + data, err := l.SerializeLayered(intSerializer) + if err != nil { + t.Fatalf("Failed to serialize: %v", err) + } + + // Deserialize + l2, err := DeserializeLayered(data, intSerializer) + if err != nil { + t.Fatalf("Failed to deserialize: %v", err) + } + + // Verify + if l2.LayerCount() != l.LayerCount() { + t.Errorf("Layer count mismatch: got %d, want %d", l2.LayerCount(), l.LayerCount()) + } + + // Check specific file + file, found := l2.Find("layer0_file5.txt") + origFile, origFound := l.Find("layer0_file5.txt") + if found != origFound { + t.Errorf("File existence mismatch for layer0_file5.txt") + } + if found && file.LayerRef != origFile.LayerRef { + t.Errorf("Layer ref mismatch for layer0_file5.txt") + } +} + +// Test serialization with custom struct references +func TestLayeredIndex_SerializationCustomType(t *testing.T) { + type Version struct { + Major int + Minor int + Patch int + } + + // Create serializer for Version + versionSerializer := RefSerializer[Version]{ + Marshal: func(v Version) ([]byte, error) { + buf := make([]byte, 12) + binary.BigEndian.PutUint32(buf[0:4], uint32(v.Major)) + binary.BigEndian.PutUint32(buf[4:8], uint32(v.Minor)) + binary.BigEndian.PutUint32(buf[8:12], uint32(v.Patch)) + return buf, nil + }, + Unmarshal: func(b []byte) (Version, error) { + if len(b) != 12 { + return Version{}, fmt.Errorf("invalid version data: expected 12 bytes, got %d", len(b)) + } + return Version{ + Major: int(binary.BigEndian.Uint32(b[0:4])), + Minor: int(binary.BigEndian.Uint32(b[4:8])), + Patch: int(binary.BigEndian.Uint32(b[8:12])), + }, nil + }, + } + + // Create layered index + l := NewLayeredIndex[Version]() + + l.AddLayer(Files{ + {Name: "main.go", CompressedSize64: 1000}, + {Name: "go.mod", CompressedSize64: 100}, + }, Version{1, 0, 0}) + + l.AddLayer(Files{ + {Name: "main.go", CompressedSize64: 1200}, // Updated + {Name: "feature.go", CompressedSize64: 500}, + }, Version{1, 1, 0}) + + l.AddDeleteLayer(Files{ + {Name: "feature.go"}, // Removed in patch + }, Version{1, 1, 1}) + + // Serialize and deserialize + data, err := l.SerializeLayered(versionSerializer) + if err != nil { + t.Fatalf("Failed to serialize: %v", err) + } + + l2, err := DeserializeLayered(data, versionSerializer) + if err != nil { + t.Fatalf("Failed to deserialize: %v", err) + } + + // Verify versions + for i := 0; i < l.LayerCount(); i++ { + v1, _ := l.GetLayerRef(i) + v2, _ := l2.GetLayerRef(i) + if v1 != v2 { + t.Errorf("Version mismatch at layer %d: got %+v, want %+v", i, v2, v1) + } + } + + // Check files + if l2.HasFile("feature.go") { + t.Error("feature.go should have been deleted") + } + + file, found := l2.Find("main.go") + if !found { + t.Error("main.go should exist") + } else if file.LayerRef != (Version{1, 1, 0}) { + t.Errorf("main.go should be from version 1.1.0, got %+v", file.LayerRef) + } +} + +// Test serialization error handling +func TestLayeredIndex_SerializationErrors(t *testing.T) { + l := NewLayeredIndex[string]() + l.AddLayer(Files{{Name: "test.txt"}}, "test") + + // Test with nil Marshal function + nilMarshal := RefSerializer[string]{ + Marshal: nil, + Unmarshal: func(b []byte) (string, error) { return "", nil }, + } + + _, err := l.SerializeLayered(nilMarshal) + if err == nil { + t.Error("Should error with nil Marshal function") + } + + // Test with nil Unmarshal function + nilUnmarshal := RefSerializer[string]{ + Marshal: func(s string) ([]byte, error) { return []byte(s), nil }, + Unmarshal: nil, + } + + data, _ := l.SerializeLayered(RefSerializer[string]{ + Marshal: func(s string) ([]byte, error) { return []byte(s), nil }, + Unmarshal: func(b []byte) (string, error) { return string(b), nil }, + }) + + _, err = DeserializeLayered(data, nilUnmarshal) + if err == nil { + t.Error("Should error with nil Unmarshal function") + } + + // Test with Marshal error + errorMarshal := RefSerializer[string]{ + Marshal: func(s string) ([]byte, error) { + return nil, fmt.Errorf("marshal error") + }, + Unmarshal: func(b []byte) (string, error) { return string(b), nil }, + } + + _, err = l.SerializeLayered(errorMarshal) + if err == nil { + t.Error("Should propagate Marshal error") + } + + // Test with Unmarshal error + errorUnmarshal := RefSerializer[string]{ + Marshal: func(s string) ([]byte, error) { return []byte(s), nil }, + Unmarshal: func(b []byte) (string, error) { + return "", fmt.Errorf("unmarshal error") + }, + } + + _, err = DeserializeLayered(data, errorUnmarshal) + if err == nil { + t.Error("Should propagate Unmarshal error") + } +} + +// Test concurrent serialization performance +func TestLayeredIndex_ConcurrentSerialization(t *testing.T) { + // Create a large layered index + l := NewLayeredIndex[int]() + + // Add many layers + for layer := 0; layer < 20; layer++ { + var files Files + for i := 0; i < 500; i++ { + files = append(files, File{ + Name: fmt.Sprintf("layer%02d/file%04d.txt", layer, i), + CompressedSize64: uint64(layer*1000 + i), + CRC32: uint32(layer*10000 + i), + Custom: map[string]string{"layer": fmt.Sprintf("%d", layer)}, + }) + } + l.AddLayer(files, layer) + } + + intSerializer := RefSerializer[int]{ + Marshal: func(i int) ([]byte, error) { + buf := make([]byte, 8) + binary.BigEndian.PutUint64(buf, uint64(i)) + return buf, nil + }, + Unmarshal: func(b []byte) (int, error) { + return int(binary.BigEndian.Uint64(b)), nil + }, + } + + // Serialize + data, err := l.SerializeLayered(intSerializer) + if err != nil { + t.Fatalf("Failed to serialize large index: %v", err) + } + + // Deserialize + l2, err := DeserializeLayered(data, intSerializer) + if err != nil { + t.Fatalf("Failed to deserialize large index: %v", err) + } + + // Verify counts + if l2.LayerCount() != 20 { + t.Errorf("Expected 20 layers, got %d", l2.LayerCount()) + } + + if l2.FileCount() != 20*500 { + t.Errorf("Expected %d files, got %d", 20*500, l2.FileCount()) + } + + // Spot check some files + file, found := l2.Find("layer10/file0250.txt") + if !found { + t.Error("Should find layer10/file0250.txt") + } else { + if file.LayerRef != 10 { + t.Errorf("Wrong layer ref: got %d, want 10", file.LayerRef) + } + if file.CompressedSize64 != 10250 { + t.Errorf("Wrong size: got %d, want 10250", file.CompressedSize64) + } + } +} + +// Benchmark serialization +func BenchmarkLayeredIndex_Serialization(b *testing.B) { + // Create a layered index + l := NewLayeredIndex[string]() + + for layer := 0; layer < 10; layer++ { + var files Files + for i := 0; i < 1000; i++ { + files = append(files, File{ + Name: fmt.Sprintf("layer%02d_file_%04d.txt", layer, i), + CompressedSize64: uint64(i), + }) + } + l.AddLayer(files, fmt.Sprintf("layer%d", layer)) + } + + stringSerializer := RefSerializer[string]{ + Marshal: func(s string) ([]byte, error) { + return []byte(s), nil + }, + Unmarshal: func(b []byte) (string, error) { + return string(b), nil + }, + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + data, err := l.SerializeLayered(stringSerializer) + if err != nil { + b.Fatal(err) + } + _ = data + } +} + +// Benchmark deserialization +func BenchmarkLayeredIndex_Deserialization(b *testing.B) { + // Create and serialize a layered index + l := NewLayeredIndex[string]() + + for layer := 0; layer < 10; layer++ { + var files Files + for i := 0; i < 1000; i++ { + files = append(files, File{ + Name: fmt.Sprintf("layer%02d_file_%04d.txt", layer, i), + CompressedSize64: uint64(i), + }) + } + l.AddLayer(files, fmt.Sprintf("layer%d", layer)) + } + + stringSerializer := RefSerializer[string]{ + Marshal: func(s string) ([]byte, error) { + return []byte(s), nil + }, + Unmarshal: func(b []byte) (string, error) { + return string(b), nil + }, + } + + data, _ := l.SerializeLayered(stringSerializer) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + l2, err := DeserializeLayered(data, stringSerializer) + if err != nil { + b.Fatal(err) + } + _ = l2 + } +}