diff --git a/Garnet.sln b/Garnet.sln
index c3df1f7c058..1bc3bd80db0 100644
--- a/Garnet.sln
+++ b/Garnet.sln
@@ -118,6 +118,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ETag", "samples\ETag\ETag.c
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Garnet.fuzz", "test\Garnet.fuzz\Garnet.fuzz.csproj", "{7A42F7AA-EE93-49B1-8711-A1D6D948F5FC}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Btree", "playground\BTree\Btree.csproj", "{CE12831B-2805-469E-8208-759DC4B4862C}"
+EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Device.benchmark", "benchmark\Device.benchmark\Device.benchmark.csproj", "{5422F66F-327C-AABE-98B2-9AFC349745D0}"
EndProject
Global
@@ -360,6 +362,14 @@ Global
{7A42F7AA-EE93-49B1-8711-A1D6D948F5FC}.Release|Any CPU.Build.0 = Release|Any CPU
{7A42F7AA-EE93-49B1-8711-A1D6D948F5FC}.Release|x64.ActiveCfg = Release|Any CPU
{7A42F7AA-EE93-49B1-8711-A1D6D948F5FC}.Release|x64.Build.0 = Release|Any CPU
+ {CE12831B-2805-469E-8208-759DC4B4862C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {CE12831B-2805-469E-8208-759DC4B4862C}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {CE12831B-2805-469E-8208-759DC4B4862C}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {CE12831B-2805-469E-8208-759DC4B4862C}.Debug|x64.Build.0 = Debug|Any CPU
+ {CE12831B-2805-469E-8208-759DC4B4862C}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {CE12831B-2805-469E-8208-759DC4B4862C}.Release|Any CPU.Build.0 = Release|Any CPU
+ {CE12831B-2805-469E-8208-759DC4B4862C}.Release|x64.ActiveCfg = Release|Any CPU
+ {CE12831B-2805-469E-8208-759DC4B4862C}.Release|x64.Build.0 = Release|Any CPU
{5422F66F-327C-AABE-98B2-9AFC349745D0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5422F66F-327C-AABE-98B2-9AFC349745D0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5422F66F-327C-AABE-98B2-9AFC349745D0}.Debug|x64.ActiveCfg = Debug|Any CPU
@@ -406,6 +416,7 @@ Global
{4FBA1587-BAFC-49F8-803A-D1CF431A26F5} = {7068BB97-1958-4060-B5F1-859464592E56}
{7A42F7AA-EE93-49B1-8711-A1D6D948F5FC} = {9A03717A-4E0B-49CA-8579-A02A4C1D003F}
{5422F66F-327C-AABE-98B2-9AFC349745D0} = {346A5A53-51E4-4A75-B7E6-491D950382CE}
+ {CE12831B-2805-469E-8208-759DC4B4862C} = {69A71E2C-00E3-42F3-854E-BE157A24834E}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {2C02C405-4798-41CA-AF98-61EDFEF6772E}
diff --git a/benchmark/BDN.benchmark/Network/StreamOperations.cs b/benchmark/BDN.benchmark/Network/StreamOperations.cs
new file mode 100644
index 00000000000..30949a5f5dd
--- /dev/null
+++ b/benchmark/BDN.benchmark/Network/StreamOperations.cs
@@ -0,0 +1,30 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using BenchmarkDotNet.Attributes;
+using Embedded.server;
+
+namespace BDN.benchmark.Network
+{
+ ///
+ /// Benchmark for BasicOperations
+ ///
+ [MemoryDiagnoser]
+ public class StreamOperations : NetworkBase
+ {
+ static ReadOnlySpan XADD => "*5\r\n$4\r\nXADD\r\n$8\r\nmystream\r\n$1\r\n*\r\n$5\r\nfield\r\n$5\r\nvalue\r\n"u8;
+ Request xadd;
+
+ public override void GlobalSetup()
+ {
+ base.GlobalSetup();
+ SetupOperation(ref xadd, XADD);
+ }
+
+ [Benchmark]
+ public async ValueTask InlineXAdd()
+ {
+ await Send(xadd);
+ }
+ }
+}
\ No newline at end of file
diff --git a/benchmark/Resp.benchmark/OpType.cs b/benchmark/Resp.benchmark/OpType.cs
index b4d7f2fbd80..55b5b82b353 100644
--- a/benchmark/Resp.benchmark/OpType.cs
+++ b/benchmark/Resp.benchmark/OpType.cs
@@ -13,6 +13,7 @@ public enum OpType
READ_TXN, WRITE_TXN, READWRITETX, WATCH_TXN, SAMPLEUPDATETX, SAMPLEDELETETX,
SCRIPTSET, SCRIPTGET, SCRIPTRETKEY,
PUBLISH, SPUBLISH,
+ XADD,
READONLY = 8888,
AUTH = 9999,
}
diff --git a/benchmark/Resp.benchmark/Program.cs b/benchmark/Resp.benchmark/Program.cs
index 6da133408bf..b00755802e5 100644
--- a/benchmark/Resp.benchmark/Program.cs
+++ b/benchmark/Resp.benchmark/Program.cs
@@ -212,6 +212,7 @@ static void WaitForServer(Options opts)
}
break;
}
+ Console.WriteLine($"Successfully connected to Redis instance at {opts.Address}:{opts.Port}");
}
static void RunBasicCommandsBenchmark(Options opts)
@@ -220,7 +221,7 @@ static void RunBasicCommandsBenchmark(Options opts)
int keyLen = opts.KeyLength;
int valueLen = opts.ValueLength;
- if (opts.Op == OpType.PUBLISH || opts.Op == OpType.SPUBLISH || opts.Op == OpType.ZADD || opts.Op == OpType.ZREM || opts.Op == OpType.ZADDREM || opts.Op == OpType.PING || opts.Op == OpType.GEOADD || opts.Op == OpType.GEOADDREM || opts.Op == OpType.SETEX || opts.Op == OpType.ZCARD || opts.Op == OpType.ZADDCARD)
+ if (opts.Op == OpType.PUBLISH || opts.Op == OpType.SPUBLISH || opts.Op == OpType.ZADD || opts.Op == OpType.ZREM || opts.Op == OpType.ZADDREM || opts.Op == OpType.PING || opts.Op == OpType.GEOADD || opts.Op == OpType.GEOADDREM || opts.Op == OpType.SETEX || opts.Op == OpType.ZCARD || opts.Op == OpType.ZADDCARD || opts.Op == OpType.XADD)
opts.SkipLoad = true;
//if we have scripts ops we need to load them in memory
diff --git a/benchmark/Resp.benchmark/ReqGen.cs b/benchmark/Resp.benchmark/ReqGen.cs
index 20164116135..887b270aad6 100644
--- a/benchmark/Resp.benchmark/ReqGen.cs
+++ b/benchmark/Resp.benchmark/ReqGen.cs
@@ -268,6 +268,11 @@ public static (int, int) OnResponse(byte* buf, int bytesRead, int opType)
for (int i = 0; i < bytesRead; i++)
if (buf[i] == '*') count++;
break;
+ case OpType.XADD:
+ // XADD returns a bulk string with the stream ID
+ for (int i = 0; i < bytesRead; i++)
+ if (buf[i] == '$') count++;
+ break;
default:
break;
}
diff --git a/benchmark/Resp.benchmark/ReqGenLoadBuffers.cs b/benchmark/Resp.benchmark/ReqGenLoadBuffers.cs
index fd452599df1..13cea2d5549 100644
--- a/benchmark/Resp.benchmark/ReqGenLoadBuffers.cs
+++ b/benchmark/Resp.benchmark/ReqGenLoadBuffers.cs
@@ -135,6 +135,7 @@ private bool GenerateBatch(int i, int start, int end, OpType opType)
OpType.SCRIPTRETKEY => System.Text.Encoding.ASCII.GetBytes($"*4\r\n$7\r\nEVALSHA\r\n{BenchUtils.sha1RetKeyScript}\r\n$1\r\n1\r\n"),
OpType.PUBLISH => System.Text.Encoding.ASCII.GetBytes($"*3\r\n$7\r\nPUBLISH\r\n"),
OpType.SPUBLISH => System.Text.Encoding.ASCII.GetBytes($"*3\r\n$8\r\nSPUBLISH\r\n"),
+ OpType.XADD => System.Text.Encoding.ASCII.GetBytes($"*5\r\n$4\r\nXADD\r\n"),
_ => null
};
@@ -178,6 +179,7 @@ private bool GenerateBatch(int i, int start, int end, OpType opType)
case OpType.SCRIPTRETKEY:
case OpType.PUBLISH:
case OpType.SPUBLISH:
+ case OpType.XADD:
writeSuccess = GenerateSingleKeyValueOp(i, opHeader, start, end, opType);
return writeSuccess;
default:
diff --git a/benchmark/Resp.benchmark/ReqGenUtils.cs b/benchmark/Resp.benchmark/ReqGenUtils.cs
index 243aa24b106..f7e920f7dd0 100644
--- a/benchmark/Resp.benchmark/ReqGenUtils.cs
+++ b/benchmark/Resp.benchmark/ReqGenUtils.cs
@@ -96,6 +96,7 @@ private bool WriteOp(ref byte* curr, byte* vend, OpType opType)
case OpType.SCRIPTRETKEY:
case OpType.PUBLISH:
case OpType.SPUBLISH:
+ case OpType.XADD:
if (!WriteKey(ref curr, vend, out keyData))
return false;
break;
@@ -307,6 +308,11 @@ private bool WriteOp(ref byte* curr, byte* vend, OpType opType)
if (!WriteStringBytes(ref curr, vend, valueBuffer))
return false;
break;
+ case OpType.XADD:
+ // Auto-generate ID with *
+ if (!WriteStringBytes(ref curr, vend, System.Text.Encoding.ASCII.GetBytes("*")))
+ return false;
+ break;
default:
break;
}
@@ -319,6 +325,15 @@ private bool WriteOp(ref byte* curr, byte* vend, OpType opType)
if (!WriteInteger(n, ref curr, vend))
return false;
break;
+ case OpType.XADD:
+ // Write field name
+ if (!WriteStringBytes(ref curr, vend, System.Text.Encoding.ASCII.GetBytes("field")))
+ return false;
+ // Write field value
+ RandomString();
+ if (!WriteStringBytes(ref curr, vend, valueBuffer))
+ return false;
+ break;
default:
break;
}
diff --git a/libs/common/RespMemoryWriter.cs b/libs/common/RespMemoryWriter.cs
index ebb864855c1..f3979c9d8d3 100644
--- a/libs/common/RespMemoryWriter.cs
+++ b/libs/common/RespMemoryWriter.cs
@@ -25,7 +25,7 @@ namespace Garnet.common
ref SpanByteAndMemory output;
public readonly bool resp3;
- public unsafe RespMemoryWriter(byte respVersion, ref SpanByteAndMemory output)
+ public RespMemoryWriter(byte respVersion, ref SpanByteAndMemory output)
{
this.output = ref output;
ptrHandle = default;
@@ -519,7 +519,6 @@ private void ReallocateOutput(int extraLenHint = 0, bool lowerMinimum = false)
if (ptrHandle.Pointer != default)
{
ptrHandle.Dispose();
- output.Memory.Dispose();
}
else
{
diff --git a/libs/common/RespReadUtils.cs b/libs/common/RespReadUtils.cs
index d8b351a7c6d..ffc32dc2b2a 100644
--- a/libs/common/RespReadUtils.cs
+++ b/libs/common/RespReadUtils.cs
@@ -640,6 +640,16 @@ public static bool TryReadInt64WithLengthHeader(out long number, ref byte* ptr,
return true;
}
+ ///
+ /// Tries to read a Ulong from the given ASCII-encoded RESP string.
+ /// Note: this does not check for any length headers and is simply an accessor to TryReadUlong.
+ ///
+ /// If parsing was successful, contains the parsed ulong value.
+ /// The starting position in the RESP string. Will be advanced if parsing is successful.
+ /// The current end of the RESP string.
+ /// True if a ulong was successfully parsed.
+ public static bool ReadUlong(out ulong number, ref byte* ptr, byte* end) => TryReadUInt64(ref ptr, end, out number, out _);
+
///
/// Read long with length header
///
diff --git a/libs/host/Configuration/Options.cs b/libs/host/Configuration/Options.cs
index 97344910305..42c7bd6aa31 100644
--- a/libs/host/Configuration/Options.cs
+++ b/libs/host/Configuration/Options.cs
@@ -616,6 +616,10 @@ public IEnumerable LuaAllowedFunctions
[Option("expired-key-deletion-scan-freq", Required = false, HelpText = "Frequency of background scan for expired key deletion, in seconds")]
public int ExpiredKeyDeletionScanFrequencySecs { get; set; }
+ [OptionValidation]
+ [Option("streams", Required = false, HelpText = "Enable streams on server.")]
+ public bool? EnableStreams { get; set; }
+
[IntRangeValidation(0, int.MaxValue, includeMin: true, isRequired: false)]
[Option("cluster-replication-reestablishment-timeout")]
public int ClusterReplicationReestablishmentTimeout { get; set; }
@@ -893,6 +897,7 @@ public GarnetServerOptions GetServerOptions(ILogger logger = null)
UnixSocketPermission = unixSocketPermissions,
MaxDatabases = MaxDatabases,
ExpiredKeyDeletionScanFrequencySecs = ExpiredKeyDeletionScanFrequencySecs,
+ EnableStreams = EnableStreams.GetValueOrDefault(),
ClusterReplicationReestablishmentTimeout = ClusterReplicationReestablishmentTimeout,
ClusterReplicaResumeWithData = ClusterReplicaResumeWithData,
};
diff --git a/libs/host/GarnetServer.cs b/libs/host/GarnetServer.cs
index f655046afc7..21d6972917c 100644
--- a/libs/host/GarnetServer.cs
+++ b/libs/host/GarnetServer.cs
@@ -341,10 +341,10 @@ private TsavoriteKV CreateStore(int dbId, IClust
clusterFactory.CreateCheckpointManager(opts.DeviceFactoryCreator, defaultNamingScheme, isMainStore: true, logger) :
new GarnetCheckpointManager(opts.DeviceFactoryCreator, defaultNamingScheme, removeOutdated: true);
- var store = new TsavoriteKV(kvSettings
- , Tsavorite.core.StoreFunctions.Create(new SpanByteComparer(),
- () => new GarnetObjectSerializer(customCommandManager))
- , (allocatorSettings, storeFunctions) => new(allocatorSettings, storeFunctions));
+ var store = new TsavoriteKV(kvSettings: kvSettings,
+ storeFunctions: Tsavorite.core.StoreFunctions.Create(new SpanByteComparer(),
+ valueSerializerCreator: () => new GarnetObjectSerializer(customCommandManager)),
+ allocatorFactory: (allocatorSettings, storeFunctions) => new(allocatorSettings, storeFunctions));
if (heapMemorySize > 0 || readCacheHeapMemorySize > 0)
sizeTracker = new CacheSizeTracker(store, heapMemorySize, readCacheHeapMemorySize, this.loggerFactory);
diff --git a/libs/host/defaults.conf b/libs/host/defaults.conf
index cffab1cd601..669ed3fa01e 100644
--- a/libs/host/defaults.conf
+++ b/libs/host/defaults.conf
@@ -410,6 +410,9 @@
/* Max number of logical databases allowed in a single Garnet server instance */
"MaxDatabases": 16,
+ /* Enable use of streams inside Garnet */
+ "EnableStreams": false,
+
/* Frequency of background scan for expired key deletion, in seconds */
"ExpiredKeyDeletionScanFrequencySecs": -1,
diff --git a/libs/resources/RespCommandsDocs.json b/libs/resources/RespCommandsDocs.json
index be77703a3ed..8d447cdba38 100644
--- a/libs/resources/RespCommandsDocs.json
+++ b/libs/resources/RespCommandsDocs.json
@@ -7770,6 +7770,326 @@
}
]
},
+ {
+ "Command": "XADD",
+ "Name": "XADD",
+ "Summary": "Appends a new message to a stream. Creates the key if it doesn\u0027t exist.",
+ "Group": "Stream",
+ "Complexity": "O(1) when adding a new entry, O(N) when trimming where N being the number of entries evicted.",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandKeyArgument",
+ "Name": "KEY",
+ "DisplayText": "key",
+ "Type": "Key",
+ "KeySpecIndex": 0
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "NOMKSTREAM",
+ "DisplayText": "nomkstream",
+ "Type": "PureToken",
+ "Token": "NOMKSTREAM",
+ "ArgumentFlags": "Optional"
+ },
+ {
+ "TypeDiscriminator": "RespCommandContainerArgument",
+ "Name": "TRIM",
+ "Type": "Block",
+ "ArgumentFlags": "Optional",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandContainerArgument",
+ "Name": "STRATEGY",
+ "Type": "OneOf",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "MAXLEN",
+ "DisplayText": "maxlen",
+ "Type": "PureToken",
+ "Token": "MAXLEN"
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "MINID",
+ "DisplayText": "minid",
+ "Type": "PureToken",
+ "Token": "MINID"
+ }
+ ]
+ },
+ {
+ "TypeDiscriminator": "RespCommandContainerArgument",
+ "Name": "OPERATOR",
+ "Type": "OneOf",
+ "ArgumentFlags": "Optional",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "EQUAL",
+ "DisplayText": "equal",
+ "Type": "PureToken",
+ "Token": "="
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "APPROXIMATELY",
+ "DisplayText": "approximately",
+ "Type": "PureToken",
+ "Token": "~"
+ }
+ ]
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "THRESHOLD",
+ "DisplayText": "threshold",
+ "Type": "String"
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "COUNT",
+ "DisplayText": "count",
+ "Type": "Integer",
+ "Token": "LIMIT",
+ "ArgumentFlags": "Optional"
+ }
+ ]
+ },
+ {
+ "TypeDiscriminator": "RespCommandContainerArgument",
+ "Name": "ID-SELECTOR",
+ "Type": "OneOf",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "AUTO-ID",
+ "DisplayText": "auto-id",
+ "Type": "PureToken",
+ "Token": "*"
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "ID",
+ "DisplayText": "id",
+ "Type": "String"
+ }
+ ]
+ },
+ {
+ "TypeDiscriminator": "RespCommandContainerArgument",
+ "Name": "DATA",
+ "Type": "Block",
+ "ArgumentFlags": "Multiple",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "FIELD",
+ "DisplayText": "field",
+ "Type": "String"
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "VALUE",
+ "DisplayText": "value",
+ "Type": "String"
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "Command": "XDEL",
+ "Name": "XDEL",
+ "Summary": "Returns the number of messages after removing them from a stream.",
+ "Group": "Stream",
+ "Complexity": "O(1) for each single item to delete in the stream, regardless of the stream size.",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandKeyArgument",
+ "Name": "KEY",
+ "DisplayText": "key",
+ "Type": "Key",
+ "KeySpecIndex": 0
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "ID",
+ "DisplayText": "id",
+ "Type": "String",
+ "ArgumentFlags": "Multiple"
+ }
+ ]
+ },
+ {
+ "Command": "XLEN",
+ "Name": "XLEN",
+ "Summary": "Return the number of messages in a stream.",
+ "Group": "Stream",
+ "Complexity": "O(1)",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandKeyArgument",
+ "Name": "KEY",
+ "DisplayText": "key",
+ "Type": "Key",
+ "KeySpecIndex": 0
+ }
+ ]
+ },
+ {
+ "Command": "XRANGE",
+ "Name": "XRANGE",
+ "Summary": "Returns the messages from a stream within a range of IDs.",
+ "Group": "Stream",
+ "Complexity": "O(N) with N being the number of elements being returned. If N is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1).",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandKeyArgument",
+ "Name": "KEY",
+ "DisplayText": "key",
+ "Type": "Key",
+ "KeySpecIndex": 0
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "START",
+ "DisplayText": "start",
+ "Type": "String"
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "END",
+ "DisplayText": "end",
+ "Type": "String"
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "COUNT",
+ "DisplayText": "count",
+ "Type": "Integer",
+ "Token": "COUNT",
+ "ArgumentFlags": "Optional"
+ }
+ ]
+ },
+ {
+ "Command": "XREVRANGE",
+ "Name": "XREVRANGE",
+ "Summary": "Returns the messages from a stream within a range of IDs in reverse order.",
+ "Group": "Stream",
+ "Complexity": "O(N) with N being the number of elements being returned. If N is constant (e.g. always asking for the first 10 elements with COUNT), you can consider it O(1).",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandKeyArgument",
+ "Name": "KEY",
+ "DisplayText": "key",
+ "Type": "Key",
+ "KeySpecIndex": 0
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "END",
+ "DisplayText": "end",
+ "Type": "String"
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "START",
+ "DisplayText": "start",
+ "Type": "String"
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "COUNT",
+ "DisplayText": "count",
+ "Type": "Integer",
+ "Token": "COUNT",
+ "ArgumentFlags": "Optional"
+ }
+ ]
+ },
+ {
+ "Command": "XTRIM",
+ "Name": "XTRIM",
+ "Summary": "Deletes messages from the beginning of a stream.",
+ "Group": "Stream",
+ "Complexity": "O(N), with N being the number of evicted entries. Constant times are very small however, since entries are organized in macro nodes containing multiple entries that can be released with a single deallocation.",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandKeyArgument",
+ "Name": "KEY",
+ "DisplayText": "key",
+ "Type": "Key",
+ "KeySpecIndex": 0
+ },
+ {
+ "TypeDiscriminator": "RespCommandContainerArgument",
+ "Name": "TRIM",
+ "Type": "Block",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandContainerArgument",
+ "Name": "STRATEGY",
+ "Type": "OneOf",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "MAXLEN",
+ "DisplayText": "maxlen",
+ "Type": "PureToken",
+ "Token": "MAXLEN"
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "MINID",
+ "DisplayText": "minid",
+ "Type": "PureToken",
+ "Token": "MINID"
+ }
+ ]
+ },
+ {
+ "TypeDiscriminator": "RespCommandContainerArgument",
+ "Name": "OPERATOR",
+ "Type": "OneOf",
+ "ArgumentFlags": "Optional",
+ "Arguments": [
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "EQUAL",
+ "DisplayText": "equal",
+ "Type": "PureToken",
+ "Token": "="
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "APPROXIMATELY",
+ "DisplayText": "approximately",
+ "Type": "PureToken",
+ "Token": "~"
+ }
+ ]
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "THRESHOLD",
+ "DisplayText": "threshold",
+ "Type": "String"
+ },
+ {
+ "TypeDiscriminator": "RespCommandBasicArgument",
+ "Name": "COUNT",
+ "DisplayText": "count",
+ "Type": "Integer",
+ "Token": "LIMIT",
+ "ArgumentFlags": "Optional"
+ }
+ ]
+ }
+ ]
+ },
{
"Command": "ZADD",
"Name": "ZADD",
diff --git a/libs/resources/RespCommandsInfo.json b/libs/resources/RespCommandsInfo.json
index daa1acd29d3..2f8431f28c6 100644
--- a/libs/resources/RespCommandsInfo.json
+++ b/libs/resources/RespCommandsInfo.json
@@ -5169,14 +5169,17 @@
]
},
{
- "Command": "ZADD",
- "Name": "ZADD",
- "Arity": -4,
+ "Command": "XADD",
+ "Name": "XADD",
+ "Arity": -5,
"Flags": "DenyOom, Fast, Write",
"FirstKey": 1,
"LastKey": 1,
"Step": 1,
- "AclCategories": "Fast, SortedSet, Write",
+ "AclCategories": "Fast, Stream, Write",
+ "Tips": [
+ "nondeterministic_output"
+ ],
"KeySpecifications": [
{
"BeginSearch": {
@@ -5189,20 +5192,46 @@
"KeyStep": 1,
"Limit": 0
},
+ "Notes": "UPDATE instead of INSERT because of the optional trimming feature",
"Flags": "RW, Update"
}
],
"StoreType": "Object"
},
{
- "Command": "ZCARD",
- "Name": "ZCARD",
+ "Command": "XDEL",
+ "Name": "XDEL",
+ "Arity": -3,
+ "Flags": "Fast, Write",
+ "FirstKey": 1,
+ "LastKey": 1,
+ "Step": 1,
+ "AclCategories": "Fast, Stream, Write",
+ "KeySpecifications": [
+ {
+ "BeginSearch": {
+ "TypeDiscriminator": "BeginSearchIndex",
+ "Index": 1
+ },
+ "FindKeys": {
+ "TypeDiscriminator": "FindKeysRange",
+ "LastKey": 0,
+ "KeyStep": 1,
+ "Limit": 0
+ },
+ "Flags": "RW, Delete"
+ }
+ ]
+ },
+ {
+ "Command": "XLEN",
+ "Name": "XLEN",
"Arity": 2,
"Flags": "Fast, ReadOnly",
"FirstKey": 1,
"LastKey": 1,
"Step": 1,
- "AclCategories": "Fast, Read, SortedSet",
+ "AclCategories": "Fast, Read, Stream",
"KeySpecifications": [
{
"BeginSearch": {
@@ -5221,6 +5250,113 @@
"StoreType": "Object"
},
{
+ "Command": "XRANGE",
+ "Name": "XRANGE",
+ "Arity": -4,
+ "Flags": "ReadOnly",
+ "FirstKey": 1,
+ "LastKey": 1,
+ "Step": 1,
+ "AclCategories": "Read, Slow, Stream",
+ "KeySpecifications": [
+ {
+ "BeginSearch": {
+ "TypeDiscriminator": "BeginSearchIndex",
+ "Index": 1
+ },
+ "FindKeys": {
+ "TypeDiscriminator": "FindKeysRange",
+ "LastKey": 0,
+ "KeyStep": 1,
+ "Limit": 0
+ },
+ "Flags": "RO, Access"
+ }
+ ]
+ },
+ {
+ "Command": "XREVRANGE",
+ "Name": "XREVRANGE",
+ "Arity": -4,
+ "Flags": "ReadOnly",
+ "FirstKey": 1,
+ "LastKey": 1,
+ "Step": 1,
+ "AclCategories": "Read, Slow, Stream",
+ "KeySpecifications": [
+ {
+ "BeginSearch": {
+ "TypeDiscriminator": "BeginSearchIndex",
+ "Index": 1
+ },
+ "FindKeys": {
+ "TypeDiscriminator": "FindKeysRange",
+ "LastKey": 0,
+ "KeyStep": 1,
+ "Limit": 0
+ },
+ "Flags": "RO, Access"
+ }
+ ]
+ },
+ {
+ "Command": "XTRIM",
+ "Name": "XTRIM",
+ "Arity": -4,
+ "Flags": "Write",
+ "FirstKey": 1,
+ "LastKey": 1,
+ "Step": 1,
+ "AclCategories": "Slow, Stream, Write",
+ "Tips": [
+ "nondeterministic_output"
+ ],
+ "KeySpecifications": [
+ {
+ "BeginSearch": {
+ "TypeDiscriminator": "BeginSearchIndex",
+ "Index": 1
+ },
+ "FindKeys": {
+ "TypeDiscriminator": "FindKeysRange",
+ "LastKey": 0,
+ "KeyStep": 1,
+ "Limit": 0
+ },
+ "Flags": "RW, Delete"
+ }
+ ]
+ },
+ {
+ "Command": "ZADD",
+ "Name": "ZADD",
+ "Arity": -4,
+ "Flags": "DenyOom, Fast, Write",
+ "FirstKey": 1,
+ "LastKey": 1,
+ "Step": 1,
+ "AclCategories": "Fast, SortedSet, Write",
+ "KeySpecifications": [
+ {
+ "BeginSearch": {
+ "TypeDiscriminator": "BeginSearchIndex",
+ "Index": 1
+ },
+ "FindKeys": {
+ "TypeDiscriminator": "FindKeysRange",
+ "LastKey": 0,
+ "KeyStep": 1,
+ "Limit": 0
+ },
+ "Flags": "RW, Update"
+ }
+ ]
+ },
+ {
+ "Command": "ZCARD",
+ "Name": "ZCARD",
+ "Arity": 2,
+ "Flags": "Fast, ReadOnly",
"Command": "ZCOLLECT",
"Name": "ZCOLLECT",
"Arity": 2,
diff --git a/libs/server/BTreeIndex/BTree.cs b/libs/server/BTreeIndex/BTree.cs
new file mode 100644
index 00000000000..afb7d38e938
--- /dev/null
+++ b/libs/server/BTreeIndex/BTree.cs
@@ -0,0 +1,192 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+
+namespace Garnet.server.BTreeIndex
+{
+ public unsafe partial class BTree
+ {
+ BTreeNode* root;
+ BTreeNode* head;
+ BTreeNode* tail;
+ byte* tailMinKey;
+ public static readonly int MAX_TREE_DEPTH = 10; // maximum allowed depth of the tree
+ static int DEFAULT_SPLIT_LEAF_POSITION = (BTreeNode.LEAF_CAPACITY + 1) / 2; // position at which leaf node is split
+ static int SPLIT_LEAF_POSITION = BTreeNode.LEAF_CAPACITY; // position at which leaf node is split
+ static int SPLIT_INTERNAL_POSITION = BTreeNode.INTERNAL_CAPACITY; // position at which internal node is split
+
+ BTreeNode*[] rootToTailLeaf; // array of nodes from root to tail leaf
+ public BTreeStats stats; // statistics about the tree
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ public BTree(uint sectorSize)
+ {
+ // HK TODO: This lives in memory always, so why allocate on native heap?
+ var memoryBlock = (IntPtr*)NativeMemory.AlignedAlloc((nuint)BTreeNode.PAGE_SIZE, (nuint)BTreeNode.PAGE_SIZE);
+ root = BTreeNode.Create(BTreeNodeType.Leaf, memoryBlock);
+ head = tail = root;
+ root->info->next = root->info->previous = null;
+ root->info->count = 0;
+ tailMinKey = null;
+ rootToTailLeaf = new BTreeNode*[MAX_TREE_DEPTH];
+ stats = new BTreeStats();
+ stats.depth = 1;
+ stats.numLeafNodes = 1;
+ stats.numAllocates = 1;
+ }
+
+ ///
+ /// Frees the memory allocated for a node
+ ///
+ /// BTreeNode to free from memory
+ private void Free(ref BTreeNode* node)
+ {
+ if (node == null)
+ return;
+
+ // If this is an internal node, free all its children first
+ if (node->info->type == BTreeNodeType.Internal)
+ {
+ for (int i = 0; i <= node->info->count; i++)
+ {
+ var child = node->data.children[i];
+ Free(ref child);
+ node->data.children[i] = null;
+ }
+ }
+
+ // Free the memory handle
+ if (node->memoryHandle != null)
+ {
+ NativeMemory.Free(node->memoryHandle);
+ stats.numDeallocates++;
+ node = null;
+ }
+ }
+
+ ///
+ /// Frees the memory allocated for a node
+ ///
+ ///
+ public static void FreeNode(ref BTreeNode* node)
+ {
+ if (node == null)
+ return;
+
+ // If this is an internal node, free all its children first
+ if (node->info->type == BTreeNodeType.Internal)
+ {
+ for (int i = 0; i <= node->info->count; i++)
+ {
+ var child = node->data.children[i];
+ FreeNode(ref child);
+ node->data.children[i] = null;
+ }
+ }
+
+ // Free the memory handle
+ if (node->memoryHandle != null)
+ {
+ NativeMemory.Free(node->memoryHandle);
+ node = null;
+ }
+ }
+
+ public static void Deallocate(ref BTreeNode* node)
+ {
+ // Free the memory handle
+ if (node->memoryHandle != null)
+ {
+ NativeMemory.Free(node->memoryHandle);
+ node->info = null;
+ node->keys = null;
+ node->data.values = null;
+ node->data.children = null;
+ node->memoryHandle = null;
+ }
+ }
+
+ ///
+ /// Deallocates the memory allocated for the B+Tree
+ ///
+ public void Deallocate()
+ {
+ if (root == null)
+ return;
+ Free(ref root);
+ Console.WriteLine("free complete");
+ stats.printStats();
+ root = null;
+ head = null;
+ tail = null;
+ }
+
+ ///
+ /// Destructor for the B+tree
+ ///
+ ~BTree()
+ {
+ Deallocate();
+ }
+
+ public ulong FastInserts => stats.totalFastInserts;
+ public ulong LeafCount => stats.numLeafNodes;
+ public ulong InternalCount => stats.numInternalNodes;
+
+ public ulong ValidCount => StatsValidCount();
+
+ public long RootValidCount => GetValidCount(root);
+
+ public long TailValidCount => GetValidCount(tail);
+
+ public long Count()
+ {
+ return stats.numKeys;
+ }
+ public ulong StatsValidCount()
+ {
+ return stats.numValidKeys;
+ }
+
+ public long GetValidCount(BTreeNode* node)
+ {
+ return node->info->validCount;
+ }
+
+ ///
+ /// Retrieves the first entry in the B+Tree (smallest key)
+ ///
+ /// entry fetched
+ public KeyValuePair First()
+ {
+ BTreeNode* leaf = head;
+ if (leaf == null)
+ {
+ return default;
+ }
+ byte[] keyBytes = new ReadOnlySpan(leaf->GetKey(0), BTreeNode.KEY_SIZE).ToArray();
+ return new KeyValuePair(keyBytes, leaf->GetValue(0));
+ }
+
+ ///
+ /// Retrieves the last entry in the B+Tree (largest key)
+ ///
+ /// entry fetched
+ public KeyValuePair Last()
+ {
+ BTreeNode* leaf = tail;
+ if (leaf == null)
+ {
+ return default;
+ }
+
+ byte[] keyBytes = new ReadOnlySpan(leaf->GetKey(leaf->info->count - 1), BTreeNode.KEY_SIZE).ToArray();
+ return new KeyValuePair(keyBytes, leaf->GetValue(leaf->info->count - 1));
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/BTreeIndex/BTreeDelete.cs b/libs/server/BTreeIndex/BTreeDelete.cs
new file mode 100644
index 00000000000..07097b04831
--- /dev/null
+++ b/libs/server/BTreeIndex/BTreeDelete.cs
@@ -0,0 +1,32 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+namespace Garnet.server.BTreeIndex
+{
+ public unsafe partial class BTree
+ {
+ ///
+ /// Delete a key from the B+tree
+ ///
+ /// key to delete
+ /// true if key was tombstoned
+ public bool Delete(byte* key)
+ {
+ BTreeNode* leaf = null;
+ var nodesTraversed = new BTreeNode*[MAX_TREE_DEPTH];
+
+ TraverseToLeaf(ref leaf, ref nodesTraversed, key);
+ var index = leaf->LowerBound(key);
+ if (index >= leaf->info->count || BTreeNode.Compare(key, leaf->GetKey(index)) != 0)
+ {
+ return false;
+ }
+
+ // insert a tombstone for the delete
+ leaf->InsertTombstone(index);
+ leaf->info->validCount--;
+ stats.numValidKeys--;
+ return true;
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/BTreeIndex/BTreeInsert.cs b/libs/server/BTreeIndex/BTreeInsert.cs
new file mode 100644
index 00000000000..d9073dbd930
--- /dev/null
+++ b/libs/server/BTreeIndex/BTreeInsert.cs
@@ -0,0 +1,344 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using System.Runtime.InteropServices;
+
+namespace Garnet.server.BTreeIndex
+{
+ public unsafe partial class BTree
+ {
+ ///
+ /// Insert a key-value pair into the B+tree. Directly inserts into the tail leaf node.
+ ///
+ ///
+ ///
+ /// true if insertion is successful
+ public bool Insert(byte* key, Value value)
+ {
+ BTreeNode* leaf = null;
+ stats.totalFastInserts++;
+ stats.totalInserts++;
+ stats.numKeys++;
+ stats.numValidKeys++;
+ leaf = tail;
+ return InsertToLeafNode(ref leaf, ref rootToTailLeaf, key, value, true);
+ }
+
+ public bool Insert(byte* key, ReadOnlySpan keySpan, Value value)
+ {
+ BTreeNode* leaf = null;
+ stats.totalFastInserts++;
+ stats.totalInserts++;
+ stats.numKeys++;
+ stats.numValidKeys++;
+ leaf = tail;
+ return InsertToLeafNode(ref leaf, ref rootToTailLeaf, key, value, true);
+ }
+ public bool InsertToLeafNode(ref BTreeNode* leaf, ref BTreeNode*[] nodesTraversed, byte* key, Value value, bool appendToLeaf = false)
+ {
+ int index;
+ if (appendToLeaf)
+ {
+ // if leaf has space
+ if (leaf->info->count < BTreeNode.LEAF_CAPACITY)
+ {
+ // append to end of leaf node
+ leaf->SetKey(leaf->info->count, key);
+ leaf->SetValue(leaf->info->count, value);
+ leaf->info->count++;
+ leaf->info->validCount++;
+ return true;
+ }
+ index = leaf->info->count;
+ return SplitLeafNode(ref leaf, ref nodesTraversed, key, value, index);
+ }
+
+ // find the index where the key should be inserted
+ index = leaf->LowerBound(key);
+ if (index < leaf->info->count && BTreeNode.Compare(key, leaf->GetKey(index)) == 0)
+ {
+ // insert is actually an update
+ leaf->SetValue(index, value);
+ return false;
+ }
+
+ if (leaf->info->count < BTreeNode.LEAF_CAPACITY)
+ {
+ // move keys to the right of index
+ var sourceSpan = new ReadOnlySpan(leaf->keys + index * BTreeNode.KEY_SIZE, (leaf->info->count - index) * BTreeNode.KEY_SIZE);
+ var destinationSpan = new Span(leaf->keys + ((index + 1) * BTreeNode.KEY_SIZE), (leaf->info->count - index) * BTreeNode.KEY_SIZE);
+ sourceSpan.CopyTo(destinationSpan);
+
+ leaf->SetKey(index, key);
+ leaf->SetValue(index, value);
+ leaf->info->count++;
+ leaf->info->validCount++;
+ return true;
+ }
+ return SplitLeafNode(ref leaf, ref nodesTraversed, key, value, index);
+ }
+
+ public bool SplitLeafNode(ref BTreeNode* leaf, ref BTreeNode*[] nodesTraversed, byte* key, Value value, int index)
+ {
+ var memoryBlock = (IntPtr*)NativeMemory.AlignedAlloc((nuint)BTreeNode.PAGE_SIZE, (nuint)BTreeNode.PAGE_SIZE);
+ stats.numAllocates++;
+ BTreeNode* newLeaf = BTreeNode.Create(BTreeNodeType.Leaf, memoryBlock);
+
+ leaf->info->count = SPLIT_LEAF_POSITION;
+ newLeaf->info->previous = leaf;
+ newLeaf->info->next = leaf->info->next;
+ newLeaf->info->count = BTreeNode.LEAF_CAPACITY + 1 - SPLIT_LEAF_POSITION;
+ leaf->info->next = newLeaf;
+ stats.numLeafNodes++;
+
+ // scan the keys from splitLeafPos to get the number of valid keys in the new leaf
+ uint newLeafValidCount = 0;
+ for (var i = SPLIT_LEAF_POSITION; i < BTreeNode.LEAF_CAPACITY; i++)
+ {
+ if (leaf->data.values[i].Valid)
+ {
+ newLeafValidCount++;
+ }
+ }
+ leaf->info->validCount -= newLeafValidCount;
+ newLeaf->info->validCount = newLeafValidCount;
+ // insert the new key to either the old node or the newly created node, based on the index
+ if (index >= leaf->info->count)
+ {
+ // new key goes to the new leaf
+ var newIndex = index - leaf->info->count;
+
+ // move the keys from old node to the new node using ReadOnlySpan
+ var sourceSpan = new ReadOnlySpan(leaf->keys + index * BTreeNode.KEY_SIZE, newIndex * BTreeNode.KEY_SIZE);
+ var destinationSpan = new Span(newLeaf->keys, newIndex * BTreeNode.KEY_SIZE);
+ sourceSpan.CopyTo(destinationSpan);
+
+ // add key to new leaf
+ newLeaf->SetKey(newIndex, key);
+
+ var existingLeafKeysSpan = new ReadOnlySpan(leaf->keys + index * BTreeNode.KEY_SIZE, (BTreeNode.LEAF_CAPACITY - index) * BTreeNode.KEY_SIZE);
+ var newLeafKeysSpan = new Span(newLeaf->keys + (newIndex + 1) * BTreeNode.KEY_SIZE, (BTreeNode.LEAF_CAPACITY - index) * BTreeNode.KEY_SIZE);
+ existingLeafKeysSpan.CopyTo(newLeafKeysSpan);
+
+ var existingLeafValuesSpan = new ReadOnlySpan(leaf->data.values + leaf->info->count, newIndex * sizeof(Value));
+ var newLeafValuesSpan = new Span(newLeaf->data.values, newIndex * sizeof(Value));
+ existingLeafValuesSpan.CopyTo(newLeafValuesSpan);
+ newLeaf->SetValue(newIndex, value);
+
+ var existingLeafValuesSpan2 = new ReadOnlySpan(leaf->data.values + index, (BTreeNode.LEAF_CAPACITY - index) * sizeof(Value));
+ var newLeafValuesSpan2 = new Span(newLeaf->data.values + newIndex + 1, (BTreeNode.LEAF_CAPACITY - index) * sizeof(Value));
+ existingLeafValuesSpan2.CopyTo(newLeafValuesSpan2);
+ newLeaf->info->validCount++;
+ }
+ else
+ {
+ var existingLeafKeysSpan = new ReadOnlySpan(leaf->keys + (leaf->info->count - 1) * BTreeNode.KEY_SIZE, newLeaf->info->count * BTreeNode.KEY_SIZE);
+ var newLeafKeysSpan = new Span(newLeaf->keys, newLeaf->info->count * BTreeNode.KEY_SIZE);
+ existingLeafKeysSpan.CopyTo(newLeafKeysSpan);
+
+ var existingLeafKeysSpan2 = new ReadOnlySpan(leaf->keys + index * BTreeNode.KEY_SIZE, (leaf->info->count - index - 1) * BTreeNode.KEY_SIZE);
+ var newLeafKeysSpan2 = new Span(leaf->keys + ((index + 1) * BTreeNode.KEY_SIZE), (leaf->info->count - index - 1) * BTreeNode.KEY_SIZE);
+ existingLeafKeysSpan2.CopyTo(newLeafKeysSpan2);
+ leaf->SetKey(index, key);
+
+ var existingLeafValuesSpan = new ReadOnlySpan(leaf->data.values + leaf->info->count - 1, newLeaf->info->count * sizeof(Value));
+ var newLeafValuesSpan = new Span(newLeaf->data.values, newLeaf->info->count * sizeof(Value));
+ existingLeafValuesSpan.CopyTo(newLeafValuesSpan);
+
+ var existingLeafValuesSpan2 = new ReadOnlySpan(leaf->data.values + index, (leaf->info->count - index - 1) * sizeof(Value));
+ var newLeafValuesSpan2 = new Span(leaf->data.values + index + 1, (leaf->info->count - index - 1) * sizeof(Value));
+ existingLeafValuesSpan2.CopyTo(newLeafValuesSpan2);
+ leaf->SetValue(index, value);
+ leaf->info->validCount++;
+ }
+
+ uint validCount = 0;
+ // the leaf that is split will also be the tail node; so update the tail pointer
+ if (leaf == tail)
+ {
+ tail = newLeaf;
+ tailMinKey = newLeaf->GetKey(0);
+ rootToTailLeaf[0] = newLeaf;
+ // validCount in internal nodes of the index excludes the validCount of the tail leaf node (optimizing for performance to avoid traversal)
+ // thus, when we split the tail leaf, we push up the validCount of the leaf that we split to the internal node
+ validCount = leaf->info->validCount;
+ }
+
+ // update the parent node with the new key
+ PushUpKeyInInternalNode(ref nodesTraversed, newLeaf->GetKey(0), ref newLeaf, SPLIT_INTERNAL_POSITION, validCount);
+ return true;
+ }
+
+ public void PushUpKeyInInternalNode(ref BTreeNode*[] nodesTraversed, byte* key, ref BTreeNode* child, int splitPos, uint newValidCount)
+ {
+ int i;
+ // starts from parent of leaf node that triggered the push-up.
+ // if the parent has space, insert the key and child pointer, and return. Otherwise, split and cascade up.
+ for (i = 1; i < stats.depth; i++)
+ {
+ var node = nodesTraversed[i];
+ var index = node->UpperBound(key);
+
+ if (node->info->count < BTreeNode.INTERNAL_CAPACITY)
+ {
+ // we can insert
+ InsertToInternalNodeWithinCapacity(ref node, key, ref child, ref nodesTraversed, index, newValidCount);
+
+ // update validCounts in the parent nodes
+ for (var j = i + 1; j < stats.depth; j++)
+ {
+ nodesTraversed[j]->info->validCount += newValidCount;
+ }
+ return;
+ }
+
+ // split internal node
+ node->info->validCount += newValidCount;
+ var newNode = SplitInternalNode(ref node, ref nodesTraversed, ref key, ref child, splitPos, index, i);
+ if (rootToTailLeaf[i] == node && tail != head && BTreeNode.Compare(key, tailMinKey) <= 0)
+ {
+ rootToTailLeaf[i] = newNode;
+ }
+ child = newNode;
+ }
+ // split root
+ CreateNewRoot(key, child);
+ }
+
+ public void InsertToInternalNodeWithinCapacity(ref BTreeNode* node, byte* key, ref BTreeNode* child, ref BTreeNode*[] nodesTraversed, int index, uint newValidCount)
+ {
+ // move all keys to the right
+ var sourceSpan = new ReadOnlySpan(node->keys + index * BTreeNode.KEY_SIZE, (node->info->count - index) * BTreeNode.KEY_SIZE);
+ var destinationSpan = new Span(node->keys + ((index + 1) * BTreeNode.KEY_SIZE), (node->info->count - index) * BTreeNode.KEY_SIZE);
+ sourceSpan.CopyTo(destinationSpan);
+
+ // move all children starting from index+1 to the right using a for loop
+ for (var j = node->info->count; j > index; j--)
+ {
+ node->SetChild(j + 1, node->GetChild(j));
+ }
+
+ // insert
+ node->SetKey(index, key);
+ node->SetChild(index + 1, child);
+ node->info->count++;
+ node->info->validCount += newValidCount;
+ }
+
+ public BTreeNode* CreateInternalNode(ref BTreeNode* node, int splitPos)
+ {
+ var memoryBlock = (IntPtr*)NativeMemory.AlignedAlloc((nuint)BTreeNode.PAGE_SIZE, (nuint)BTreeNode.PAGE_SIZE);
+ stats.numAllocates++;
+ BTreeNode* newNode = BTreeNode.Create(BTreeNodeType.Internal, memoryBlock);
+ stats.numInternalNodes++;
+ node->info->count = splitPos;
+ newNode->info->count = BTreeNode.INTERNAL_CAPACITY - splitPos;
+ newNode->info->next = node->info->next;
+ newNode->info->previous = node;
+ node->info->next = newNode;
+ return newNode;
+ }
+
+ public BTreeNode* SplitInternalNode(ref BTreeNode* nodeToSplit, ref BTreeNode*[] nodesTraversed, ref byte* key, ref BTreeNode* child, int splitPos, int index, int level)
+ {
+ var newNode = CreateInternalNode(ref nodeToSplit, splitPos);
+
+ // scan keys from splitPos to get number of valid keys in the new node
+ uint newValidCount = 0;
+ for (int i = splitPos; i < BTreeNode.INTERNAL_CAPACITY; i++)
+ {
+ if (nodeToSplit->GetChild(i) != null)
+ {
+ newValidCount += nodeToSplit->GetChild(i)->info->validCount;
+ }
+ }
+ newNode->info->validCount = newValidCount;
+
+ if (index > nodeToSplit->info->count)
+ {
+ // child goes to newNode
+ var sourceSpan = new ReadOnlySpan(nodeToSplit->keys + (nodeToSplit->info->count + 1) * BTreeNode.KEY_SIZE, (index - nodeToSplit->info->count - 1) * BTreeNode.KEY_SIZE);
+ var destinationSpan = new Span(newNode->keys, (index - nodeToSplit->info->count - 1) * BTreeNode.KEY_SIZE);
+ sourceSpan.CopyTo(destinationSpan);
+
+ var existingNodeKeysSpan = new ReadOnlySpan(nodeToSplit->keys + index * BTreeNode.KEY_SIZE, (BTreeNode.INTERNAL_CAPACITY - index) * BTreeNode.KEY_SIZE);
+ var newNodeKeysSpan = new Span(newNode->keys + (index - nodeToSplit->info->count) * BTreeNode.KEY_SIZE, (BTreeNode.INTERNAL_CAPACITY - index) * BTreeNode.KEY_SIZE);
+ existingNodeKeysSpan.CopyTo(newNodeKeysSpan);
+ newNode->SetKey(index - nodeToSplit->info->count - 1, key);
+
+ var existingNodeChildrenSpan = new ReadOnlySpan(nodeToSplit->data.children + 1 + nodeToSplit->info->count, (index - nodeToSplit->info->count) * sizeof(BTreeNode*));
+ var newNodeChildrenSpan = new Span(newNode->data.children, (index - nodeToSplit->info->count) * sizeof(BTreeNode*));
+ existingNodeChildrenSpan.CopyTo(newNodeChildrenSpan);
+
+ var existingNodeChildrenSpan2 = new ReadOnlySpan(nodeToSplit->data.children + 1 + index, newNode->info->count * sizeof(BTreeNode*));
+ var newNodeChildrenSpan2 = new Span(newNode->data.children + 1 + index - nodeToSplit->info->count, newNode->info->count * sizeof(BTreeNode*));
+ existingNodeChildrenSpan2.CopyTo(newNodeChildrenSpan2);
+ newNode->SetChild(index - nodeToSplit->info->count, child);
+ key = nodeToSplit->GetKey(nodeToSplit->info->count);
+ }
+ else if (index == nodeToSplit->info->count)
+ {
+ var sourceSpan = new ReadOnlySpan(nodeToSplit->keys + nodeToSplit->info->count * BTreeNode.KEY_SIZE, newNode->info->count * BTreeNode.KEY_SIZE);
+ var destinationSpan = new Span(newNode->keys, newNode->info->count * BTreeNode.KEY_SIZE);
+ sourceSpan.CopyTo(destinationSpan);
+
+ var existingNodeChildrenSpan = new ReadOnlySpan(nodeToSplit->data.children + 1 + nodeToSplit->info->count, newNode->info->count * sizeof(BTreeNode*));
+ var newNodeChildrenSpan = new Span(newNode->data.children + 1, newNode->info->count * sizeof(BTreeNode*));
+ existingNodeChildrenSpan.CopyTo(newNodeChildrenSpan);
+ newNode->SetChild(0, child);
+ }
+ else
+ {
+ // child goes to old node
+ var sourceSpan = new ReadOnlySpan(nodeToSplit->keys + nodeToSplit->info->count * BTreeNode.KEY_SIZE, newNode->info->count * BTreeNode.KEY_SIZE);
+ var destinationSpan = new Span(newNode->keys, newNode->info->count * BTreeNode.KEY_SIZE);
+ sourceSpan.CopyTo(destinationSpan);
+
+ var existingNodeKeysSpan = new ReadOnlySpan(nodeToSplit->keys + index * BTreeNode.KEY_SIZE, (nodeToSplit->info->count - index) * BTreeNode.KEY_SIZE);
+ var newNodeKeysSpan = new Span(nodeToSplit->keys + ((index + 1) * BTreeNode.KEY_SIZE), (nodeToSplit->info->count - index) * BTreeNode.KEY_SIZE);
+ existingNodeKeysSpan.CopyTo(newNodeKeysSpan);
+ nodeToSplit->SetKey(index, key);
+
+ var existingNodeChildrenSpan = new ReadOnlySpan(nodeToSplit->data.children + nodeToSplit->info->count, newNode->info->count * sizeof(BTreeNode*));
+ var newNodeChildrenSpan = new Span(newNode->data.children, newNode->info->count * sizeof(BTreeNode*));
+ existingNodeChildrenSpan.CopyTo(newNodeChildrenSpan);
+
+ var existingNodeChildrenSpan2 = new ReadOnlySpan(nodeToSplit->data.children + index + 1, (nodeToSplit->info->count - index + 1) * sizeof(BTreeNode*));
+ var newNodeChildrenSpan2 = new Span(nodeToSplit->data.children + index + 2, (nodeToSplit->info->count - index + 1) * sizeof(BTreeNode*));
+ existingNodeChildrenSpan2.CopyTo(newNodeChildrenSpan2);
+ nodeToSplit->SetChild(index + 1, child);
+ key = nodeToSplit->GetKey(nodeToSplit->info->count);
+ }
+
+ return newNode;
+ }
+
+
+ public void CreateNewRoot(byte* key, BTreeNode* newlySplitNode)
+ {
+ var memoryBlock = (IntPtr*)NativeMemory.AlignedAlloc((nuint)BTreeNode.PAGE_SIZE, (nuint)BTreeNode.PAGE_SIZE);
+ stats.numAllocates++;
+ BTreeNode* newRoot = BTreeNode.Create(BTreeNodeType.Internal, memoryBlock);
+
+ // Set the new root's key.
+ newRoot->info->count = 1;
+ newRoot->SetKey(0, key);
+
+ // Set children: left child is the old root; right child is the newly split node.
+ newRoot->SetChild(0, root);
+ newRoot->SetChild(1, newlySplitNode);
+
+ newRoot->info->validCount = root->info->validCount;
+ if (newlySplitNode != tail)
+ {
+ newRoot->info->validCount += newlySplitNode->info->validCount;
+ }
+ newRoot->info->next = newRoot->info->previous = null;
+ root = newRoot;
+ rootToTailLeaf[stats.depth] = newRoot;
+ stats.depth++;
+ stats.numInternalNodes++;
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/BTreeIndex/BTreeInternals.cs b/libs/server/BTreeIndex/BTreeInternals.cs
new file mode 100644
index 00000000000..20fefdb0e46
--- /dev/null
+++ b/libs/server/BTreeIndex/BTreeInternals.cs
@@ -0,0 +1,335 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using System.Numerics;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+
+namespace Garnet.server.BTreeIndex
+{
+
+ public enum BTreeNodeType
+ {
+ Internal,
+ Leaf
+ }
+
+ ///
+ /// Represents information stored in a node in the B+tree
+ ///
+ [StructLayout(LayoutKind.Explicit)]
+ public unsafe struct NodeData
+ {
+ [FieldOffset(0)]
+ public Value* values;
+ [FieldOffset(0)]
+ public BTreeNode** children;
+ }
+
+ [StructLayout(LayoutKind.Explicit, Size = sizeof(byte) + sizeof(ulong))]
+ public struct Value
+ {
+ [FieldOffset(0)]
+ public byte valid;
+ [FieldOffset(1)]
+ public ulong address;
+
+ public bool Valid
+ {
+ get
+ {
+ return valid == 1;
+ }
+ set
+ {
+ valid = (byte)(value ? 1 : 0);
+ }
+ }
+
+ public Value(ulong value)
+ {
+ this.valid = 1;
+ this.address = value;
+ }
+ }
+
+ public unsafe struct NodeInfo
+ {
+ public BTreeNodeType type;
+ public int count;
+ public BTreeNode* next;
+ public BTreeNode* previous;
+ public uint validCount; // valid keys (non-tombstone keys) in the node.
+ }
+
+ ///
+ /// Represents a node in the B+tree
+ /// Memory layout:
+ /// +-----------------------------------+
+ /// | BTreeNode (HEADER_SIZE bytes) |
+ /// | - NodeInfo* info |
+ /// | - NodeData data |
+ /// | - byte* keys |
+ /// | - IntPtr* memoryHandle |
+ /// +-----------------------------------+
+ /// | NodeInfo (METADATA_SIZE bytes) |
+ /// | - BTreeNodeType type |
+ /// | - int count |
+ /// | - BTreeNode* next |
+ /// | - BTreeNode* previous |
+ /// | - uint validCount |
+ /// +-----------------------------------+
+ /// | Keys array: capacity * KEY_SIZE |
+ /// +-----------------------------------+
+ /// | Data array: either Value[] (leaf) |
+ /// | or BTreeNode*[] (internal) |
+ /// +-----------------------------------+
+ /// Expects an allocated block of memory (of size BTreeNode.PAGE_SIZE) to be passed as handle
+ /// Stores handle for deallocation
+ /// BTreeNode struct also contained within the 4KB block to allow pointers to created nodes to be passed around
+ /// as well as allow for on-demand allocation/deallocation.
+ /// NOTE: currently reverted to MemoryMarshal for allocation of handles due to undefined behavior with SectorAlignedMemory.
+ ///
+ public unsafe struct BTreeNode
+ {
+ public static int HEADER_SIZE = sizeof(BTreeNode);
+ public static int PAGE_SIZE = 4096; // This must be increased if you want to store the BTreeNode header in the block.
+ public static int KEY_SIZE = 16; // key size in bytes.
+ public static int METADATA_SIZE = sizeof(NodeInfo);
+ public static int LEAF_CAPACITY = (PAGE_SIZE - HEADER_SIZE - METADATA_SIZE) / (KEY_SIZE + sizeof(Value));
+ public static int INTERNAL_CAPACITY = (PAGE_SIZE - HEADER_SIZE - METADATA_SIZE - sizeof(BTreeNode*)) / (KEY_SIZE + sizeof(BTreeNode*));
+
+ public NodeInfo* info;
+ public NodeData data;
+ public byte* keys;
+ public IntPtr* memoryHandle;
+
+ public static BTreeNode* Create(BTreeNodeType type, IntPtr* handle)
+ {
+ // Place the node header at the beginning of the block.
+ BTreeNode* node = (BTreeNode*)handle;
+ node->memoryHandle = handle;
+
+ // Define the start of the payload right after the header.
+ byte* payloadPtr = (byte*)(handle) + HEADER_SIZE;
+
+ // The NodeInfo will be stored at the start of the payload.
+ node->info = (NodeInfo*)payloadPtr;
+ node->info->type = type;
+ node->info->count = 0;
+ node->info->next = null;
+ node->info->previous = null;
+ node->info->validCount = 0;
+
+ // Data for keys follows the Nodeinfo->
+ byte* keysPtr = payloadPtr + METADATA_SIZE;
+ node->keys = keysPtr;
+
+ int capacity = (type == BTreeNodeType.Leaf) ? LEAF_CAPACITY : INTERNAL_CAPACITY;
+ int keysSize = capacity * KEY_SIZE;
+ byte* dataSectionPtr = keysPtr + keysSize;
+
+ // Set up NodeData in-place.
+ if (type == BTreeNodeType.Leaf)
+ {
+ node->data.values = (Value*)dataSectionPtr;
+ }
+ else
+ {
+ node->data.children = (BTreeNode**)dataSectionPtr;
+ }
+
+ return node;
+ }
+
+ public byte* GetKey(int index)
+ {
+ byte* keyAddress = keys + (index * KEY_SIZE);
+ return keyAddress;
+ }
+
+ public void SetKey(int index, byte* keyData)
+ {
+ var sourceSpan = new ReadOnlySpan(keyData, KEY_SIZE);
+ var destinationSpan = new Span(keys + (index * KEY_SIZE), KEY_SIZE);
+ sourceSpan.CopyTo(destinationSpan);
+ }
+
+ public void SetChild(int index, BTreeNode* child)
+ {
+ data.children[index] = child;
+ }
+
+ public BTreeNode* GetChild(int index)
+ {
+ return data.children[index];
+ }
+
+ public void SetValue(int index, Value value)
+ {
+ data.values[index] = value;
+ }
+
+ public Value GetValue(int index)
+ {
+ return data.values[index];
+ }
+
+ public void SetValueValid(int index, bool valid)
+ {
+ data.values[index].Valid = valid;
+ }
+
+ public bool IsValueValid(int index)
+ {
+ return data.values[index].Valid;
+ }
+
+ public void InsertTombstone(int index)
+ {
+ data.values[index].Valid = false;
+ }
+
+ ///
+ /// Returns the index of the first key greater than the given key
+ ///
+ ///
+ ///
+ public int UpperBound(byte* key)
+ {
+ if (info->count == 0)
+ {
+ return 0;
+ }
+ int left = 0, right = info->count - 1;
+ while (left <= right)
+ {
+ var mid = left + (right - left) / 2;
+ byte* midKey = GetKey(mid);
+ int cmp = Compare(key, midKey);
+ if (cmp < 0)
+ {
+ right = mid - 1;
+ }
+ else
+ {
+ left = mid + 1;
+ }
+ }
+ return left;
+ }
+
+ ///
+ /// Returns the index of the first key less than the given key
+ ///
+ ///
+ ///
+ public int LowerBound(byte* key)
+ {
+ if (info->count == 0)
+ {
+ return 0;
+ }
+ // Binary search for the first key >= given key
+ int left = 0, right = info->count - 1;
+ while (left <= right)
+ {
+ var mid = left + (right - left) / 2;
+ byte* midKey = GetKey(mid);
+ int cmp = Compare(midKey, key);
+ if (cmp == 0)
+ {
+ return mid;
+ }
+ else if (cmp < 0)
+ {
+ left = mid + 1;
+ }
+ else
+ {
+ right = mid - 1;
+ }
+ }
+ return left;
+ }
+
+ ///
+ /// Compares two keys
+ ///
+ ///
+ ///
+ /// -1 if key1 is less than key2; 0 if key1 == key2; 1 if key1 > key2
+ public static int Compare(byte* key1, byte* key2)
+ {
+
+ if (Sse2.IsSupported)
+ {
+ var v1 = Sse2.LoadVector128(key1);
+ var v2 = Sse2.LoadVector128(key2);
+
+ var mask = Sse2.MoveMask(Sse2.CompareEqual(v1, v2));
+
+ if (mask != 0xFFFF) // Not all bytes are equal
+ {
+ // Find the index of the first differing byte
+ int index = BitOperations.TrailingZeroCount(~mask); // Invert mask to find first zero (differing byte)
+ return key1[index] < key2[index] ? -1 : 1;
+ }
+
+ return 0; // Arrays are equal
+ }
+ else
+ {
+ return new Span(key1, KEY_SIZE).SequenceCompareTo(new Span(key2, KEY_SIZE));
+ }
+ }
+ }
+
+ ///
+ /// Statistics about the B+Tree
+ ///
+ public struct BTreeStats
+ {
+ // general index stats
+ public int depth;
+ public ulong numLeafNodes;
+ public ulong numInternalNodes;
+
+ // workload specific stats
+ public long totalInserts; // cumulative number of inserts to the index
+ public long totalDeletes; // cumulative number of deletes to the index
+ public ulong totalFastInserts; // cumulative number of fast inserts to the index
+ public long numKeys; // number of keys currently indexed
+ public ulong numValidKeys; // number of keys that are not tombstoned
+ public ulong numAllocates;
+ public ulong numDeallocates;
+ public BTreeStats()
+ {
+ depth = 0;
+ numLeafNodes = 0;
+ numInternalNodes = 0;
+ totalInserts = 0;
+ totalDeletes = 0;
+ totalFastInserts = 0;
+ numKeys = 0;
+ numValidKeys = 0;
+ numAllocates = 0;
+ numDeallocates = 0;
+ }
+
+ public void printStats()
+ {
+ Console.WriteLine($"Depth: {depth}");
+ Console.WriteLine($"Number of leaf nodes: {numLeafNodes}");
+ Console.WriteLine($"Number of internal nodes: {numInternalNodes}");
+ Console.WriteLine($"Total inserts: {totalInserts}");
+ Console.WriteLine($"Total deletes: {totalDeletes}");
+ Console.WriteLine($"Total fast inserts: {totalFastInserts}");
+ Console.WriteLine($"Number of keys: {numKeys}");
+ Console.WriteLine($"Number of valid keys: {numValidKeys}");
+ Console.WriteLine($"Number of allocates: {numAllocates}");
+ Console.WriteLine($"Number of deallocates: {numDeallocates}");
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/BTreeIndex/BTreeLookup.cs b/libs/server/BTreeIndex/BTreeLookup.cs
new file mode 100644
index 00000000000..90738960cb5
--- /dev/null
+++ b/libs/server/BTreeIndex/BTreeLookup.cs
@@ -0,0 +1,187 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace Garnet.server.BTreeIndex
+{
+ public unsafe partial class BTree
+ {
+ ///
+ /// Point lookup in the index
+ ///
+ /// lookup key
+ ///
+ public Value Get(byte* key)
+ {
+ BTreeNode* leaf = null;
+ var nodesTraversed = new BTreeNode*[MAX_TREE_DEPTH];
+ TraverseToLeaf(ref leaf, ref nodesTraversed, key);
+
+ var index = leaf->LowerBound(key);
+ if (index < leaf->info->count && BTreeNode.Compare(key, leaf->GetKey(index)) == 0)
+ {
+ var value = leaf->GetValue(index);
+ if (value.Valid)
+ {
+ return value;
+ }
+ }
+ return default;
+ }
+
+ ///
+ /// Range lookup in the index
+ ///
+ /// start key for the range lookup
+ /// end key for the range lookup
+ /// address of the start key
+ /// address of end key
+ /// list of tombstones
+ /// limit entries scanned in the range lookup
+ /// reverse lookup
+ ///
+ public int Get(byte* start, byte* end, out Value startVal, out Value endVal, out List tombstones, long limit = -1, bool reverse = false)
+ {
+ Debug.Assert(reverse ?
+ BTreeNode.Compare(start, end) >= 0 : BTreeNode.Compare(start, end) <= 0,
+ "Start key should be less than or equal to end key");
+
+ int count = 0;
+ tombstones = new List();
+ BTreeNode* startLeaf = null, endLeaf = null;
+ BTreeNode*[] nodesTraversed = new BTreeNode*[MAX_TREE_DEPTH];
+ int startIndex, endIndex;
+
+ // find the leaf node for the start key
+ TraverseToLeaf(ref startLeaf, ref nodesTraversed, start);
+ // find the leaf node for the end key
+ TraverseToLeaf(ref endLeaf, ref nodesTraversed, end);
+
+ if (reverse)
+ {
+ // find the first slot > start and subtract one index to get the start index
+ startIndex = startLeaf->UpperBound(start) - 1;
+ startVal = startLeaf->GetValue(startIndex);
+
+ // find the first value greater than equal to key and that will be the last index
+ endIndex = endLeaf->LowerBound(end);
+ endVal = endLeaf->GetValue(endIndex);
+ }
+ else
+ {
+ // find the first key in the start leaf that is greater than or equal to the start key
+ startIndex = startLeaf->LowerBound(start);
+ startVal = startLeaf->GetValue(startIndex);
+ // find the last key in the end leaf that is less than or equal to the end key
+ endIndex = endLeaf->UpperBound(end) - 1;
+ endVal = endLeaf->GetValue(endIndex);
+ }
+
+ // iterate over the leaves between startLeaf[startIndex] and endLeaf[endIndex] (inclusive) and collect all tombstones
+ BTreeNode* leaf = startLeaf;
+ uint numScanned = 0;
+ while (leaf != null)
+ {
+ int first, last;
+ bool scanComplete = false;
+ if (reverse)
+ {
+ // we would like an inverse traversal
+ first = leaf == startLeaf ? startIndex : leaf->info->count - 1;
+ last = leaf == endLeaf ? endIndex : 0;
+ }
+ else
+ {
+ last = leaf == endLeaf ? endIndex : leaf->info->count - 1;
+ first = leaf == startLeaf ? startIndex : 0;
+ }
+
+ for (var i = first; ;)
+ {
+ numScanned++;
+ var value = leaf->GetValue(i);
+ if (!value.Valid)
+ {
+ tombstones.Add(leaf->GetValue(i));
+ }
+ else
+ {
+ // entry will be part of result set
+ count++;
+ if (limit != -1 && count >= limit)
+ {
+ // update address of the last key we iterated till
+ endVal = value;
+ scanComplete = true;
+ break;
+ }
+ }
+
+ if (reverse)
+ {
+ if (i <= last)
+ {
+ break;
+ }
+ i--;
+ }
+ else
+ {
+ if (i >= last)
+ {
+ break;
+ }
+ i++;
+ }
+ }
+
+ if (leaf == endLeaf || scanComplete)
+ {
+ break;
+ }
+
+ leaf = reverse ? leaf->info->previous : leaf->info->next;
+ }
+
+ return count;
+ }
+
+ ///
+ /// Retrieves the last Undeleted entry in the B+Tree (largest non-tombstoned key)
+ ///
+ /// entry fetched
+ public KeyValuePair LastAlive()
+ {
+ BTreeNode* leaf = tail;
+ if (leaf == null)
+ {
+ return default;
+ }
+
+ // Traverse backwards from the tail to find the first valid (non-tombstoned) entry.
+ while (leaf != null)
+ {
+ // Iterate backwards through the entries in this leaf
+ for (int i = (int)leaf->info->count - 1; i >= 0; i--)
+ {
+ var value = leaf->GetValue(i);
+ if (value.Valid)
+ {
+ // Found a valid entry, return it
+ byte[] keyBytes = new ReadOnlySpan(leaf->GetKey(i), BTreeNode.KEY_SIZE).ToArray();
+ return new KeyValuePair(keyBytes, value);
+ }
+ }
+
+ // No valid entry found in this leaf, move to the previous leaf
+ leaf = leaf->info->previous;
+ }
+
+ // No valid entry found in the entire tree
+ return default;
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/BTreeIndex/BTreeTraverse.cs b/libs/server/BTreeIndex/BTreeTraverse.cs
new file mode 100644
index 00000000000..8c9d482045f
--- /dev/null
+++ b/libs/server/BTreeIndex/BTreeTraverse.cs
@@ -0,0 +1,50 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+namespace Garnet.server.BTreeIndex
+{
+ public unsafe partial class BTree
+ {
+ public byte* TraverseToLeaf(ref BTreeNode* node, ref BTreeNode*[] nodesTraversed, byte* key)
+ {
+ byte* leafMax = null;
+ BTreeNode* child = root;
+ for (var i = stats.depth - 1; i > 0; --i)
+ {
+ node = child;
+ nodesTraversed[i] = child;
+ var slot = node->UpperBound(key);
+ if (slot != node->info->count)
+ {
+ leafMax = node->GetKey(slot);
+ }
+ child = node->GetChild(slot);
+ }
+ node = child;
+ nodesTraversed[0] = child;
+ return leafMax;
+ }
+
+ public byte* TraverseToLeaf(ref BTreeNode* node, ref BTreeNode*[] nodesTraversed, byte* key, out int[] slots)
+ {
+ slots = new int[MAX_TREE_DEPTH];
+ byte* leafMax = null;
+ BTreeNode* child = root;
+ for (var i = stats.depth - 1; i > 0; --i)
+ {
+ node = child;
+ nodesTraversed[i] = child;
+ var slot = node->UpperBound(key);
+ slots[i] = slot;
+ if (slot != node->info->count)
+ {
+ leafMax = node->GetKey(slot);
+ }
+ child = node->GetChild(slot);
+ }
+ node = child;
+ nodesTraversed[0] = child;
+ return leafMax;
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/BTreeIndex/BTreeTrim.cs b/libs/server/BTreeIndex/BTreeTrim.cs
new file mode 100644
index 00000000000..8085e633f0a
--- /dev/null
+++ b/libs/server/BTreeIndex/BTreeTrim.cs
@@ -0,0 +1,368 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using System.Diagnostics;
+
+namespace Garnet.server.BTreeIndex
+{
+ public unsafe partial class BTree
+ {
+ public void TrimByID(byte* key, out ulong entriesTrimmed, out Value headValue, out ReadOnlySpan headValidKey, out uint numLeavesDeleted)
+ => TrimByID(key, out int _, out entriesTrimmed, out headValue, out headValidKey, out numLeavesDeleted);
+
+ public void TrimByLength(ulong length, out ulong entriesTrimmed, out Value headValue, out ReadOnlySpan headValidKey, out uint numLeavesDeleted, bool approximateTrimming = false)
+ => TrimByLength(ref root, length, out entriesTrimmed, out headValue, out headValidKey, out numLeavesDeleted, approximateTrimming);
+
+ private void TrimByID(byte* key, out int underflowingNodes, out ulong entriesTrimmed, out Value headValidValue, out ReadOnlySpan headValidKey, out uint numLeavesDeleted)
+ {
+ underflowingNodes = 0;
+ entriesTrimmed = 0;
+ numLeavesDeleted = 0;
+
+ var nodesTraversed = new BTreeNode*[MAX_TREE_DEPTH];
+ BTreeNode* leaf = null;
+ TraverseToLeaf(ref leaf, ref nodesTraversed, key, out int[] internalSlots);
+
+ // find index for key in leaf node - this returns the index of first key >= given key
+ var index = leaf->LowerBound(key);
+ headValidKey = new ReadOnlySpan(leaf->GetKey(index), BTreeNode.KEY_SIZE);
+ headValidValue = leaf->GetValue(index);
+
+ // insert tombstones until index to mark as deleted
+ for (var i = 0; i < index; i++)
+ {
+ leaf->SetValueValid(i, false);
+ leaf->info->validCount--;
+ entriesTrimmed++;
+ }
+
+ if (leaf == head)
+ {
+ numLeavesDeleted = 0;
+ return;
+ }
+
+ // traverse the leaf level to delete preceding leaf nodes
+ var node = leaf->info->previous;
+ var nodesToTraverseInSubtree = internalSlots[1] - 1;
+ uint deletedValidCount = (uint)(leaf->info->count - leaf->info->validCount);
+ var totalDeletedValidCount = deletedValidCount;
+ while (node != null)
+ {
+ var validCount = node->info->validCount;
+ var count = node->info->count;
+ if (nodesToTraverseInSubtree >= 0)
+ {
+ deletedValidCount += validCount;
+ nodesToTraverseInSubtree--;
+ }
+ totalDeletedValidCount += validCount;
+
+ var prev = node->info->previous;
+ if (prev == null)
+ {
+ Debug.Assert(node == head, "Head node should not have a previous node");
+ }
+
+ stats.numLeafNodes--;
+ stats.numKeys -= count;
+ stats.numValidKeys -= validCount;
+ entriesTrimmed += validCount;
+
+ // deallocate the node
+ Deallocate(ref node);
+ numLeavesDeleted++;
+
+ // continue iteration
+ node = prev;
+ }
+
+ leaf->info->previous = null;
+ head = leaf;
+
+ bool rootReassigned = false;
+ // traverse internal nodes except root and delete preceding internal nodes
+ for (int i = 1; i < stats.depth - 1; i++)
+ {
+ node = nodesTraversed[i];
+ var slotOfKey = internalSlots[i];
+
+ if (slotOfKey > 0)
+ {
+ // shift children leftwards until slotOfKey (inclusive) using ReadOnlySpan
+ var sourceSpan = new ReadOnlySpan(node->keys + (slotOfKey - 1) * BTreeNode.KEY_SIZE, ((slotOfKey - 1)) * BTreeNode.KEY_SIZE);
+ var destinationSpan = new Span(node->keys, ((slotOfKey - 1)) * BTreeNode.KEY_SIZE);
+ sourceSpan.CopyTo(destinationSpan);
+
+ var sourceChildrenSpan = new ReadOnlySpan(node->data.children + (slotOfKey - 1) + 1, ((slotOfKey - 1)) * sizeof(BTreeNode*));
+ var destinationChildrenSpan = new Span(node->data.children, ((slotOfKey - 1)) * sizeof(BTreeNode*));
+ sourceChildrenSpan.CopyTo(destinationChildrenSpan);
+ }
+ var prevCount = node->info->count;
+ node->info->count -= slotOfKey;
+ node->info->validCount -= deletedValidCount;
+
+ if (prevCount > BTreeNode.INTERNAL_CAPACITY / 2 && node->info->count < BTreeNode.INTERNAL_CAPACITY / 2)
+ {
+ underflowingNodes++;
+ }
+
+ node = nodesTraversed[i]->info->previous;
+ deletedValidCount = 0;
+ while (node != null)
+ {
+ var temp = node->info->previous;
+ if (nodesToTraverseInSubtree >= 0)
+ {
+ deletedValidCount += node->info->validCount;
+ nodesToTraverseInSubtree--;
+ }
+ Deallocate(ref node);
+ stats.numInternalNodes--;
+ node = temp;
+ }
+ nodesTraversed[i]->info->previous = null;
+ // corner case: slotOfKey points to last child => after deletion only one child remains
+ // delete all partent levels and re-assign root
+ if (i + 1 < stats.depth)
+ {
+ var nextSlot = internalSlots[i + 1];
+ if (nextSlot == nodesTraversed[i + 1]->info->count)
+ {
+ var newRoot = nodesTraversed[i];
+ var originalDepth = stats.depth;
+ for (int j = i + 1; j < originalDepth; j++)
+ {
+ var curr = nodesTraversed[j];
+ while (curr != null)
+ {
+ var pre = curr->info->previous;
+ Deallocate(ref curr);
+ stats.numInternalNodes--;
+ curr = pre;
+ }
+ stats.depth--;
+ }
+ root = newRoot;
+ rootReassigned = true;
+ break;
+ }
+ }
+ }
+ if (!rootReassigned && stats.depth > 1 && nodesTraversed[stats.depth - 1] != null)
+ {
+ nodesTraversed[stats.depth - 1]->info->validCount -= totalDeletedValidCount;
+ }
+ }
+
+ private void TrimByLength(ref BTreeNode* node, ulong length, out ulong entriesTrimmed, out Value headValidValue, out ReadOnlySpan headValidKey, out uint numLeavesDeleted, bool approximateTrimming)
+ {
+ var depth = stats.depth - 1;
+ ulong currentValidCount = 0;
+ BTreeNode* current = node;
+ int[] internalSlots = new int[MAX_TREE_DEPTH];
+ int underflowingNodes = 0;
+ entriesTrimmed = 0;
+ numLeavesDeleted = 0;
+ headValidKey = default;
+ BTreeNode*[] nodesTraversed = new BTreeNode*[MAX_TREE_DEPTH];
+
+ // stream is already smaller than desired length to trim to
+ if (length >= stats.numValidKeys)
+ {
+ headValidValue = current->GetValue(0);
+ headValidKey = new ReadOnlySpan(current->GetKey(0), BTreeNode.KEY_SIZE);
+ return;
+ }
+
+ // set the starting node (root) as the last node in nodes traversed array
+ nodesTraversed[depth] = current;
+ // while we have not traversed the entire depth of the tree?\
+ while (depth > 0)
+ {
+ // current node is internal node
+ if (current->info->type == BTreeNodeType.Internal)
+ {
+ // iterate over the children in the internal node from right to left, largest to smallest
+ for (var i = current->info->count; i >= 0; i--)
+ {
+ var child = current->GetChild(i);
+
+ // cumulative valid count is less than desired length so just keep accumulating
+ if (currentValidCount + child->info->validCount < length)
+ {
+ currentValidCount += child->info->validCount;
+ }
+ else
+ {
+ // if the cumulative valid count including this child exceeds the desired length, then we have found the node to split at
+ // track the node in nodes traversed and the slot index in internalSlots
+ nodesTraversed[depth - 1] = child;
+ internalSlots[depth] = i;
+ // current holds the node we will continue traversing from to find the split point
+ current = child;
+ break;
+ }
+ }
+ }
+ depth--;
+ }
+
+ // After traversing down from root (depth stats.depth-1) to leaf level (depth 0),
+ // current is guaranteed to be a leaf node
+ Debug.Assert(current->info->type == BTreeNodeType.Leaf, "Current must be a leaf after traversal");
+
+ // In approximate trimming mode, we don't attempt to trim within the node itself.
+ if (approximateTrimming)
+ {
+ headValidValue = current->GetValue(0);
+ headValidKey = new ReadOnlySpan(current->GetKey(0), BTreeNode.KEY_SIZE);
+ }
+ else
+ {
+ // length is the desired length to trim to and currentValidCount is the cumulative valid count before this node.
+ // since we are in non-approximate mode, we need to trim within this node to reach the exact desired length.
+ // keepInCurrent holds that little diff we may not have accumulated once we reached the current node itself.
+ ulong keepInCurrent = length - currentValidCount;
+ ulong kept = 0;
+ headValidValue = default;
+ headValidKey = default;
+ // iterate over entries in current node right to left (largest to smallest).
+ for (int i = (int)current->info->count - 1; i >= 0; i--)
+ {
+ // only consider valid entries
+ if (current->IsValueValid(i))
+ {
+ // we keep a key in the current node only if we have not yet reached the desired keepInCurrent count
+ if (kept < keepInCurrent)
+ {
+ if (kept == 0)
+ {
+ headValidValue = current->GetValue(i);
+ headValidKey = new ReadOnlySpan(current->GetKey(i), BTreeNode.KEY_SIZE);
+ }
+ kept++;
+ }
+ else
+ {
+ // once we have reached the desired keepInCurrent count, we mark remaining keys as deleted;
+ // since we iterate right to left, we're now deleting entries to the LEFT (older entries)
+ // Mark as deleted.
+ current->SetValueValid(i, false);
+ current->info->validCount--;
+ entriesTrimmed++;
+ stats.numValidKeys--;
+ }
+ }
+ }
+ }
+
+ // now current node has been trimmed internally. Proceed to removing preceding nodes
+
+ // nodes are linked list at leaf level, so we can traverse backwards
+ var leaf = current->info->previous;
+ uint deletedValidCount = 0;
+ var nodesToTraverseInSubtree = internalSlots[depth + 1] - 1;
+ while (leaf != null)
+ {
+ var count = leaf->info->count;
+ var validCount = leaf->info->validCount;
+
+ if (nodesToTraverseInSubtree >= 0)
+ {
+ deletedValidCount += validCount;
+ nodesToTraverseInSubtree--;
+ }
+ var prev = leaf->info->previous;
+ if (prev == null)
+ {
+ Debug.Assert(leaf == head, "Head node should not have a previous node");
+ }
+ stats.numLeafNodes--;
+ stats.numKeys -= count;
+ stats.numValidKeys -= validCount;
+ entriesTrimmed += validCount;
+
+ // deallocate the node
+ Deallocate(ref leaf);
+ numLeavesDeleted++;
+ leaf = prev;
+ }
+
+ // disconnect current from previous nodes, and make the current the new head node
+ current->info->previous = null;
+ head = current;
+ // traverse the internal nodes except root and delete preceding internal nodes
+ for (int i = 1; i < stats.depth - 1; i++)
+ {
+ var slotOfKey = internalSlots[i];
+ var inner = nodesTraversed[i];
+ if (inner == null)
+ {
+ break;
+ }
+ if (slotOfKey > 0)
+ {
+ // shift keys and children from slotOfKey to beginning
+ var sourceSpan = new ReadOnlySpan(inner->keys + (slotOfKey - 1) * BTreeNode.KEY_SIZE, ((slotOfKey - 1)) * BTreeNode.KEY_SIZE);
+ var destinationSpan = new Span(inner->keys, ((slotOfKey - 1)) * BTreeNode.KEY_SIZE);
+ sourceSpan.CopyTo(destinationSpan);
+
+ var sourceChildrenSpan = new ReadOnlySpan(inner->data.children + (slotOfKey - 1) + 1, ((slotOfKey - 1)) * sizeof(BTreeNode*));
+ var destinationChildrenSpan = new Span(inner->data.children, ((slotOfKey - 1)) * sizeof(BTreeNode*));
+ sourceChildrenSpan.CopyTo(destinationChildrenSpan);
+ }
+ var prevCount = inner->info->count;
+ inner->info->count -= slotOfKey;
+ nodesTraversed[i]->info->validCount -= deletedValidCount;
+
+ if (prevCount > BTreeNode.INTERNAL_CAPACITY / 2 && inner->info->count < BTreeNode.INTERNAL_CAPACITY / 2)
+ {
+ underflowingNodes++;
+ }
+ deletedValidCount = 0;
+ nodesToTraverseInSubtree = slotOfKey - 1;
+ inner = inner->info->previous;
+ while (inner != null && inner != root)
+ {
+ var temp = inner->info->previous;
+ if (nodesToTraverseInSubtree >= 0)
+ {
+ deletedValidCount += inner->info->validCount;
+ nodesToTraverseInSubtree--;
+ }
+ Deallocate(ref inner);
+ stats.numInternalNodes--;
+ inner = temp;
+ }
+ nodesTraversed[i]->info->previous = null;
+ // corner case: slotOfKey points to last child => after deletion only one child remains
+ // delete all parent levels and re-assign root
+ if (i + 1 < stats.depth)
+ {
+ var nextSlot = internalSlots[i + 1];
+ if (nextSlot == nodesTraversed[i + 1]->info->count)
+ {
+ var newRoot = nodesTraversed[i];
+ var originalDepth = stats.depth;
+ for (int j = i + 1; j < originalDepth; j++)
+ {
+ var curr = nodesTraversed[j];
+ while (curr != null)
+ {
+ var pre = curr->info->previous;
+ Deallocate(ref curr);
+ stats.numInternalNodes--;
+ curr = pre;
+ }
+ stats.depth--;
+ }
+ root = newRoot;
+ break;
+ }
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/Databases/DatabaseManagerBase.cs b/libs/server/Databases/DatabaseManagerBase.cs
index 5b5a3693b70..78cf01c2625 100644
--- a/libs/server/Databases/DatabaseManagerBase.cs
+++ b/libs/server/Databases/DatabaseManagerBase.cs
@@ -191,6 +191,7 @@ protected void RecoverDatabaseCheckpoint(GarnetDatabase db, out long storeVersio
{
try
{
+ // Compaction trims hybrid log from the back, checkpointing truncates AOF
DoCompaction(db, isFromCheckpoint: true, logger);
var lastSaveStoreTailAddress = db.Store.Log.TailAddress;
@@ -546,6 +547,7 @@ private async Task InitiateCheckpointAsync(GarnetDatabase db, bool full, Checkpo
: Checkpoint.HybridLogOnly(db.Store, checkpointType, out checkpointResult.token);
}
+ // We just hand off the state machine to the state machine driver and await its completion
checkpointResult.success = await db.StateMachineDriver.RunAsync(sm);
// If cluster is enabled the replication manager is responsible for truncating AOF
diff --git a/libs/server/Resp/CmdStrings.cs b/libs/server/Resp/CmdStrings.cs
index 4214aa6e59d..9043c52a499 100644
--- a/libs/server/Resp/CmdStrings.cs
+++ b/libs/server/Resp/CmdStrings.cs
@@ -305,6 +305,15 @@ static partial class CmdStrings
public static ReadOnlySpan RESP_ERR_EXPDELSCAN_INVALID => "ERR Cannot execute EXPDELSCAN with background expired key deletion scan enabled"u8;
public static ReadOnlySpan RESP_ERR_CHECKPOINT_ALREADY_IN_PROGRESS => "ERR checkpoint already in progress"u8;
+ public static ReadOnlySpan RESP_ERR_STREAMS_DISABLED => "ERR STREAMS is disabled, enable it with --streams option."u8;
+ public static ReadOnlySpan RESP_ERR_XADD_WRONG_NUM_ARGS => "ERR wrong number of arguments for 'xadd' command"u8;
+ public static ReadOnlySpan RESP_ERR_XADD_INVALID_STREAM_ID => "ERR Invalid stream ID specified as stream command argument"u8;
+ public static ReadOnlySpan RESP_ERR_XADD_ID_NOT_GREATER => "ERR The ID specified in XADD is equal or smaller than the target stream top item"u8;
+ public static ReadOnlySpan RESP_ERR_XLEN_WRONG_NUM_ARGS => "ERR wrong number of arguments for 'xlen' command"u8;
+ public static ReadOnlySpan RESP_ERR_XRANGE_WRONG_NUM_ARGS => "ERR wrong number of arguments for 'xrange' command"u8;
+ public static ReadOnlySpan RESP_ERR_XDEL_WRONG_NUM_ARGS => "ERR wrong number of arguments for 'xdel' command"u8;
+ public static ReadOnlySpan RESP_ERR_XTRIM_WRONG_NUM_ARGS => "ERR wrong number of arguments for 'xtrim' command"u8;
+
///
/// Response string templates
///
@@ -357,6 +366,8 @@ static partial class CmdStrings
public static ReadOnlySpan hash => "hash"u8;
public static ReadOnlySpan STRING => "STRING"u8;
public static ReadOnlySpan stringt => "string"u8;
+ public static ReadOnlySpan STREAM => "STREAM"u8;
+ public static ReadOnlySpan stream => "stream"u8;
public static ReadOnlySpan none => "none"u8;
///
diff --git a/libs/server/Resp/Parser/RespCommand.cs b/libs/server/Resp/Parser/RespCommand.cs
index cc81121b1df..66182a72c8f 100644
--- a/libs/server/Resp/Parser/RespCommand.cs
+++ b/libs/server/Resp/Parser/RespCommand.cs
@@ -195,6 +195,13 @@ public enum RespCommand : ushort
SUNIONSTORE,
SWAPDB,
UNLINK,
+ XADD,
+ XLEN,
+ XRANGE,
+ XREVRANGE,
+ XDEL,
+ XTRIM,
+ XLAST,
ZADD,
ZCOLLECT,
ZDIFFSTORE,
@@ -961,6 +968,21 @@ private RespCommand FastParseArrayCommand(ref int count, ref ReadOnlySpan
}
break;
+ case 'X':
+ if (*(ulong*)(ptr + 2) == MemoryMarshal.Read("\r\nXADD\r\n"u8))
+ {
+ return RespCommand.XADD;
+ }
+ else if (*(ulong*)(ptr + 2) == MemoryMarshal.Read("\r\nXLEN\r\n"u8))
+ {
+ return RespCommand.XLEN;
+ }
+ else if (*(ulong*)(ptr + 2) == MemoryMarshal.Read("\r\nXDEL\r\n"u8))
+ {
+ return RespCommand.XDEL;
+ }
+ break;
+
case 'Z':
if (*(ulong*)(ptr + 2) == MemoryMarshal.Read("\r\nZADD\r\n"u8))
{
@@ -1147,6 +1169,16 @@ private RespCommand FastParseArrayCommand(ref int count, ref ReadOnlySpan
return RespCommand.WATCH;
}
break;
+ case 'X':
+ if (*(ulong*)(ptr + 3) == MemoryMarshal.Read("\nXTRIM\r\n"u8))
+ {
+ return RespCommand.XTRIM;
+ }
+ else if (*(ulong*)(ptr + 3) == MemoryMarshal.Read("\nXLAST\r\n"u8))
+ {
+ return RespCommand.XLAST;
+ }
+ break;
case 'Z':
if (*(ulong*)(ptr + 3) == MemoryMarshal.Read("\nZCARD\r\n"u8))
@@ -1335,6 +1367,13 @@ private RespCommand FastParseArrayCommand(ref int count, ref ReadOnlySpan
}
break;
+ case 'X':
+ if (*(ulong*)(ptr + 4) == MemoryMarshal.Read("XRANGE\r\n"u8))
+ {
+ return RespCommand.XRANGE;
+ }
+ break;
+
case 'Z':
if (*(ulong*)(ptr + 4) == MemoryMarshal.Read("ZCOUNT\r\n"u8))
{
@@ -1548,6 +1587,10 @@ private RespCommand FastParseArrayCommand(ref int count, ref ReadOnlySpan
{
return RespCommand.ZEXPIREAT;
}
+ else if (*(ulong*)(ptr + 4) == MemoryMarshal.Read("XREVRANG"u8) && *(uint*)(ptr + 11) == MemoryMarshal.Read("GE\r\n"u8))
+ {
+ return RespCommand.XREVRANGE;
+ }
break;
case 10:
if (*(ulong*)(ptr + 4) == MemoryMarshal.Read("SSUBSCRI"u8) && *(uint*)(ptr + 11) == MemoryMarshal.Read("BE\r\n"u8))
diff --git a/libs/server/Resp/RespServerSession.cs b/libs/server/Resp/RespServerSession.cs
index 698d6d6d6da..574b41dd6b7 100644
--- a/libs/server/Resp/RespServerSession.cs
+++ b/libs/server/Resp/RespServerSession.cs
@@ -226,6 +226,8 @@ public IGarnetServer Server
// Threshold for slow log in ticks (0 means disabled)
readonly long slowLogThreshold;
+ internal readonly SessionStreamCache sessionStreamCache;
+
///
/// Create a new RESP server session
///
@@ -305,6 +307,13 @@ public RespServerSession(
if (this.networkSender.GetMaxSizeSettings?.MaxOutputSize < sizeof(int))
this.networkSender.GetMaxSizeSettings.MaxOutputSize = sizeof(int);
}
+
+ // grab stream manager from storeWrapper
+ if (storeWrapper.serverOptions.EnableStreams)
+ {
+ this.streamManager = storeWrapper.streamManager;
+ sessionStreamCache = new SessionStreamCache();
+ }
}
///
@@ -951,6 +960,14 @@ private bool ProcessArrayCommands(RespCommand cmd, ref TGarnetApi st
RespCommand.SUNIONSTORE => SetUnionStore(ref storageApi),
RespCommand.SDIFF => SetDiff(ref storageApi),
RespCommand.SDIFFSTORE => SetDiffStore(ref storageApi),
+ // Stream Commands
+ RespCommand.XADD => StreamAdd(respProtocolVersion),
+ RespCommand.XLEN => StreamLength(),
+ RespCommand.XDEL => StreamDelete(),
+ RespCommand.XRANGE => StreamRange(respProtocolVersion),
+ RespCommand.XREVRANGE => StreamRange(respProtocolVersion, isReverse: true),
+ RespCommand.XTRIM => StreamTrim(),
+ RespCommand.XLAST => StreamLast(respProtocolVersion),
_ => ProcessOtherCommands(cmd, ref storageApi)
};
return success;
diff --git a/libs/server/Resp/StreamCommands.cs b/libs/server/Resp/StreamCommands.cs
new file mode 100644
index 00000000000..7b464dbca2b
--- /dev/null
+++ b/libs/server/Resp/StreamCommands.cs
@@ -0,0 +1,355 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using Garnet.common;
+using Tsavorite.core;
+
+namespace Garnet.server
+{
+ internal sealed unsafe partial class RespServerSession : ServerSessionBase
+ {
+ readonly StreamManager streamManager;
+
+ ///
+ /// Adds a new entry to the stream.
+ ///
+ /// true if stream was added successfully; error otherwise
+ private bool StreamAdd(byte respProtocolVersion)
+ {
+ if (parseState.Count < 4)
+ {
+ return AbortWithErrorMessage(CmdStrings.RESP_ERR_XADD_WRONG_NUM_ARGS);
+ }
+
+ int argsParsed = 0;
+
+ // Parse the stream key.
+ var key = parseState.GetArgSliceByRef(0);
+ argsParsed++;
+
+ bool noMkStream = false;
+ if (argsParsed < parseState.Count && parseState.GetArgSliceByRef(argsParsed).ReadOnlySpan.SequenceEqual("NOMKSTREAM"u8))
+ {
+ noMkStream = true;
+ argsParsed++;
+ }
+
+ // Parse the id. We parse as string for easy pattern matching.
+ var idGiven = parseState.GetArgSliceByRef(argsParsed);
+
+ // parse past the ID
+ argsParsed++;
+
+ // get the number of the remaining key-value pairs
+ var numPairs = parseState.Count - argsParsed;
+
+ // grab the rest of the input that will mainly be k-v pairs as entry to the stream.
+ byte* vPtr = parseState.GetArgSliceByRef(argsParsed).ptr - sizeof(int);
+ int vsize = (int)(recvBufferPtr + endReadHead - vPtr);
+ var streamDataSpan = new ReadOnlySpan(vPtr, vsize);
+ var _output = SpanByteAndMemory.FromPinnedPointer(dcurr, (int)(dend - dcurr));
+
+ var disabledStreams = streamManager == null;
+ if (disabledStreams)
+ {
+ while (!RespWriteUtils.TryWriteError(CmdStrings.RESP_ERR_STREAMS_DISABLED, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ if (sessionStreamCache.TryGetStreamFromCache(key.Span, out StreamObject cachedStream))
+ {
+ cachedStream.AddEntry(idGiven, numPairs, streamDataSpan, ref _output, respProtocolVersion);
+ }
+ else
+ {
+ streamManager.StreamAdd(key, idGiven, noMkStream, streamDataSpan, numPairs, ref _output, out byte[] lastStreamKey, out StreamObject lastStream, respProtocolVersion);
+ // since we added to a new stream that was not in the cache, try adding it to the cache
+ if (lastStream != null)
+ {
+ sessionStreamCache.TryAddStreamToCache(lastStreamKey, lastStream);
+ }
+ }
+ ProcessOutput(_output);
+ return true;
+ }
+
+ ///
+ /// Retrieves the length of the stream.
+ ///
+ /// true if stream length was retrieved successfully; error otherwise
+ private bool StreamLength()
+ {
+ if (parseState.Count != 1)
+ {
+ return AbortWithErrorMessage(CmdStrings.RESP_ERR_XLEN_WRONG_NUM_ARGS);
+ }
+ // parse the stream key.
+ var key = parseState.GetArgSliceByRef(0);
+
+ ulong streamLength;
+
+ var disabledStreams = streamManager == null;
+ if (disabledStreams)
+ {
+ while (!RespWriteUtils.TryWriteError(CmdStrings.RESP_ERR_STREAMS_DISABLED, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ // check if the stream exists in cache
+ if (sessionStreamCache.TryGetStreamFromCache(key.Span, out StreamObject cachedStream))
+ {
+ streamLength = cachedStream.Length();
+ }
+ else
+ {
+ streamLength = streamManager.StreamLength(key);
+ }
+ // write back result
+ while (!RespWriteUtils.TryWriteInt64((long)streamLength, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ ///
+ /// Retrieves a range of stream entries.
+ ///
+ /// true if range of stream entries were retrieved successfully; error otherwise
+ public bool StreamRange(byte respProtocolVersion, bool isReverse = false)
+ {
+ // command is of format: XRANGE key start end [COUNT count]
+ // and for XREVRANGE key end start [COUNT count]
+
+ // we expect at least 3 arguments
+ if (parseState.Count < 3)
+ {
+ return AbortWithErrorMessage(CmdStrings.RESP_ERR_XRANGE_WRONG_NUM_ARGS);
+ }
+
+ // parse the stream key
+ var key = parseState.GetArgSliceByRef(0);
+
+ // parse start and end IDs
+ var startId = parseState.GetArgSliceByRef(1).ToString();
+ var endId = parseState.GetArgSliceByRef(2).ToString();
+
+ int count = -1;
+ if (parseState.Count > 3)
+ {
+ // parse the count argument
+ var countStr = parseState.GetArgSliceByRef(4).ToString();
+ if (!int.TryParse(countStr, out count))
+ {
+ return AbortWithErrorMessage(CmdStrings.RESP_ERR_GENERIC_SYNTAX_ERROR);
+ }
+ }
+
+ var _output = SpanByteAndMemory.FromPinnedPointer(dcurr, (int)(dend - dcurr));
+
+ var disabledStreams = streamManager == null;
+ if (disabledStreams)
+ {
+ while (!RespWriteUtils.TryWriteError(CmdStrings.RESP_ERR_STREAMS_DISABLED, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ bool success = false;
+ // check if the stream exists in cache
+ if (sessionStreamCache.TryGetStreamFromCache(key.Span, out StreamObject cachedStream))
+ {
+ cachedStream.ReadRange(startId, endId, count, ref _output, respProtocolVersion, isReverse);
+ success = true;
+ }
+ else
+ {
+ success = streamManager.StreamRange(key, startId, endId, count, ref _output, respProtocolVersion, isReverse);
+ }
+ if (success)
+ {
+ // _ = ProcessOutputWithHeader(_output);
+ ProcessOutput(_output);
+ }
+ else
+ {
+ //return empty array
+ while (!RespWriteUtils.TryWriteArrayLength(0, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ return true;
+ }
+
+ ///
+ /// Deletes stream entry(s).
+ ///
+ /// true if stream entry(s) was deleted successfully; error otherwise
+ public bool StreamDelete()
+ {
+ // command is of format: XDEL key id [id ...]
+ // we expect at least 2 arguments
+ if (parseState.Count < 2)
+ {
+ return AbortWithErrorMessage(CmdStrings.RESP_ERR_XDEL_WRONG_NUM_ARGS);
+ }
+
+ // parse the stream key
+ var key = parseState.GetArgSliceByRef(0);
+ int deletedCount = 0;
+
+ var disabledStreams = streamManager == null;
+ if (disabledStreams)
+ {
+ while (!RespWriteUtils.TryWriteError(CmdStrings.RESP_ERR_STREAMS_DISABLED, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ // for every id, parse and delete the stream entry
+ for (int i = 1; i < parseState.Count; i++)
+ {
+ // parse the id as string
+ var idGiven = parseState.GetArgSliceByRef(i);
+
+ bool deleted;
+ // check if the stream exists in cache
+ if (sessionStreamCache.TryGetStreamFromCache(key.Span, out StreamObject cachedStream))
+ {
+ deleted = cachedStream.DeleteEntry(idGiven);
+ }
+ else
+ {
+ // delete the entry in the stream from the streamManager
+ deleted = streamManager.StreamDelete(key, idGiven, out StreamObject lastStream);
+ if (lastStream != null)
+ {
+ // since we deleted from a stream that was not in the cache, try adding it to the cache
+ sessionStreamCache.TryAddStreamToCache(key.ToArray(), lastStream);
+ }
+ }
+
+ deletedCount = deleted ? deletedCount + 1 : deletedCount;
+ }
+
+ // write back the number of entries deleted
+ while (!RespWriteUtils.TryWriteInt64(deletedCount, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ ///
+ /// Trims the stream to the specified length or ID.
+ ///
+ /// returns true if stream was trimmed successfully; error otherwise
+ public bool StreamTrim()
+ {
+ if (parseState.Count < 3)
+ {
+ return AbortWithErrorMessage(CmdStrings.RESP_ERR_XTRIM_WRONG_NUM_ARGS);
+ }
+
+ var key = parseState.GetArgSliceByRef(0);
+ var trimType = parseState.GetArgSliceByRef(1).ToString().ToUpper();
+ bool approximate = false;
+ int trimArgIndex = 2;
+ // Check for optional ~
+ if (parseState.Count > 3 && parseState.GetArgSliceByRef(2).ToString() == "~")
+ {
+ approximate = true;
+ trimArgIndex++;
+ }
+ var trimArg = parseState.GetArgSliceByRef(trimArgIndex);
+
+ ulong entriesTrimmed = 0;
+ StreamTrimOpts optType = StreamTrimOpts.NONE;
+ switch (trimType)
+ {
+ case "MAXLEN":
+ optType = StreamTrimOpts.MAXLEN;
+ break;
+ case "MINID":
+ optType = StreamTrimOpts.MINID;
+ break;
+ }
+
+ var disabledStreams = streamManager == null;
+ if (disabledStreams)
+ {
+ while (!RespWriteUtils.TryWriteError(CmdStrings.RESP_ERR_STREAMS_DISABLED, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ bool result;
+ if (sessionStreamCache.TryGetStreamFromCache(key.Span, out StreamObject cachedStream))
+ {
+ result = cachedStream.Trim(trimArg, optType, out entriesTrimmed, approximate);
+ }
+ else
+ {
+ result = streamManager.StreamTrim(key, trimArg, optType, out entriesTrimmed, approximate);
+ }
+ if (!result)
+ {
+ return AbortWithErrorMessage(CmdStrings.RESP_ERR_GENERIC_SYNTAX_ERROR);
+ }
+ while (!RespWriteUtils.TryWriteInt64((long)entriesTrimmed, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ ///
+ /// Gets last entry in the stream.
+ /// XLAST key
+ ///
+ ///
+ public bool StreamLast(byte respProtocolVersion)
+ {
+ if (parseState.Count != 1)
+ {
+ return AbortWithErrorMessage(CmdStrings.RESP_ERR_WRONG_NUMBER_OF_ARGUMENTS);
+ }
+
+ var key = parseState.GetArgSliceByRef(0);
+
+ var _output = SpanByteAndMemory.FromPinnedPointer(dcurr, (int)(dend - dcurr));
+
+ var disabledStreams = streamManager == null;
+ if (disabledStreams)
+ {
+ while (!RespWriteUtils.TryWriteError(CmdStrings.RESP_ERR_STREAMS_DISABLED, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ bool success = false;
+ // check if the stream exists in cache
+ if (sessionStreamCache.TryGetStreamFromCache(key.Span, out StreamObject cachedStream))
+ {
+ cachedStream.ReadLastEntry(ref _output, respProtocolVersion);
+ success = true;
+ }
+ else
+ {
+ success = streamManager.StreamLast(key, ref _output, respProtocolVersion);
+ }
+
+ if (success)
+ {
+ ProcessOutput(_output);
+ }
+ else
+ {
+ //return empty array
+ while (!RespWriteUtils.TryWriteArrayLength(0, ref dcurr, dend))
+ SendAndReset();
+ return true;
+ }
+
+ return true;
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/Servers/GarnetServerOptions.cs b/libs/server/Servers/GarnetServerOptions.cs
index d776039ac88..20063dd0f5a 100644
--- a/libs/server/Servers/GarnetServerOptions.cs
+++ b/libs/server/Servers/GarnetServerOptions.cs
@@ -520,6 +520,19 @@ public string GetStoreCheckpointDirectory(int dbId) =>
public string GetAppendOnlyFileDirectory(int dbId) =>
Path.Combine(AppendOnlyFileBaseDirectory, GetAppendOnlyFileDirectoryName(dbId));
+ // Enable STREAMS on server
+ public bool EnableStreams = false;
+
+ ///
+ /// Page size for BTree index for STREAM
+ ///
+ public string StreamPageSize = "4m";
+
+ ///
+ /// Memory for STREAM
+ ///
+ public string StreamMemorySize = "1g";
+
///
/// Constructor
///
@@ -705,6 +718,32 @@ public KVSettings GetSettings(ILoggerFactory loggerFactory, LightEpoch epoch, St
return kvSettings;
}
+ ///
+ /// Get stream page size
+ ///
+ ///
+ public long StreamPageSizeBytes()
+ {
+ long size = ParseSize(StreamPageSize, out int _);
+ long adjustedSize = PreviousPowerOf2(size);
+ if (size != adjustedSize)
+ logger?.LogInformation($"Warning: using lower stream page size than specified (power of 2)");
+ return adjustedSize;
+ }
+
+ ///
+ /// Get stream memory size
+ ///
+ ///
+ public long StreamMemorySizeBytes()
+ {
+ long size = ParseSize(StreamMemorySize, out int _);
+ long adjustedSize = PreviousPowerOf2(size);
+ if (size != adjustedSize)
+ logger?.LogInformation($"Warning: using lower stream page size than specified (power of 2)");
+ return adjustedSize;
+ }
+
///
/// Get memory size
///
diff --git a/libs/server/Storage/Session/Common/ArrayKeyIterationFunctions.cs b/libs/server/Storage/Session/Common/ArrayKeyIterationFunctions.cs
index 45dd360c1f8..683274e9ea1 100644
--- a/libs/server/Storage/Session/Common/ArrayKeyIterationFunctions.cs
+++ b/libs/server/Storage/Session/Common/ArrayKeyIterationFunctions.cs
@@ -27,6 +27,15 @@ sealed partial class StorageSession : IDisposable
long lastScanCursor;
List Keys;
+
+ private const long ObjectStoreCursorBitMask = (1L << 49) - 1; // bits 0-48 set to 1
+ private const long StreamStoreCursorBitMask = (1L << 50) - 1; // bits 0-49 set to 1
+
+ private long GetObjectStoreCursor(long cursor) => cursor & ObjectStoreCursorBitMask;
+ private long GetStreamStoreCursor(long cursor) => cursor & StreamStoreCursorBitMask;
+ private bool IsNotObjectStoreCursorAndNotStreamCursor(long cursor) => (cursor & (1L << 49)) == 0 && (cursor & (1L << 50)) == 0;
+ private bool IsNotStreamCursor(long cursor) => (cursor & (1L << 50)) == 0;
+
///
/// Gets keys matching the pattern with a limit of count in every iteration
/// when using pattern
diff --git a/libs/server/Storage/Session/StorageSession.cs b/libs/server/Storage/Session/StorageSession.cs
index 43bcc8458f4..90cb4296748 100644
--- a/libs/server/Storage/Session/StorageSession.cs
+++ b/libs/server/Storage/Session/StorageSession.cs
@@ -47,6 +47,7 @@ sealed partial class StorageSession : IDisposable
public readonly ScratchBufferBuilder scratchBufferBuilder;
public readonly FunctionsState functionsState;
+ public readonly StreamManager streamManager;
public TransactionManager txnManager;
public StateMachineDriver stateMachineDriver;
@@ -101,6 +102,8 @@ public StorageSession(StoreWrapper storeWrapper,
unifiedBasicContext = unifiedStoreSession.BasicContext;
unifiedTransactionalContext = unifiedStoreSession.TransactionalContext;
+ streamManager = storeWrapper.streamManager;
+
HeadAddress = db.Store.Log.HeadAddress;
ObjectScanCountLimit = storeWrapper.serverOptions.ObjectScanCountLimit;
}
diff --git a/libs/server/StoreWrapper.cs b/libs/server/StoreWrapper.cs
index 2a40868d91c..78b4a022766 100644
--- a/libs/server/StoreWrapper.cs
+++ b/libs/server/StoreWrapper.cs
@@ -153,6 +153,9 @@ public sealed class StoreWrapper
///
public GarnetCheckpointManager StoreCheckpointManager => (GarnetCheckpointManager)store?.CheckpointManager;
+
+ internal readonly StreamManager streamManager;
+
///
/// Constructor
///
@@ -255,6 +258,10 @@ public StoreWrapper(
StoreCheckpointManager.CurrentHistoryId = runId;
}
}
+ if (serverOptions.EnableStreams)
+ {
+ this.streamManager = new StreamManager(serverOptions.StreamPageSizeBytes(), serverOptions.StreamMemorySizeBytes(), 0);
+ }
}
///
@@ -273,6 +280,11 @@ public StoreWrapper(StoreWrapper storeWrapper, bool recordToAof) : this(storeWra
clusterFactory: null,
loggerFactory: storeWrapper.loggerFactory)
{
+ // initialize stream manager
+ if (serverOptions.EnableStreams)
+ {
+ this.streamManager = new StreamManager(serverOptions.StreamPageSizeBytes(), serverOptions.StreamMemorySizeBytes(), 0);
+ }
}
///
diff --git a/libs/server/Stream/SessionStreamCache.cs b/libs/server/Stream/SessionStreamCache.cs
new file mode 100644
index 00000000000..4f6ee3ba288
--- /dev/null
+++ b/libs/server/Stream/SessionStreamCache.cs
@@ -0,0 +1,59 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+
+namespace Garnet.server
+{
+ internal class SessionStreamCache
+ {
+ const int DefaultCacheSize = 16;
+ readonly Dictionary streamCache = new Dictionary(DefaultCacheSize, ByteArrayComparer.Instance);
+ readonly byte[][] streamKeysCache = new byte[DefaultCacheSize][];
+ int cachedStreamsCount = 0;
+ int front = 0;
+
+ public SessionStreamCache()
+ { }
+
+ ///
+ /// Lookup a stream in the cahce. Since the cache is expected to be small, we can sequentially scan.
+ ///
+ /// name of stream to lookup
+ /// stream found from the cache
+ /// true if stream exists in cache
+ public bool TryGetStreamFromCache(ReadOnlySpan key, out StreamObject stream)
+ {
+ return streamCache.TryGetValue(key.ToArray(), out stream);
+ }
+
+ ///
+ /// Add a stream to the cache. If the cache is full, we don't add the stream.
+ ///
+ /// name of stream
+ /// reference to stream object
+ /// true if successfully added
+ public bool TryAddStreamToCache(byte[] key, StreamObject stream)
+ {
+ if (cachedStreamsCount < DefaultCacheSize)
+ {
+ streamCache.Add(key, stream);
+ // add to circular array and update front
+ streamKeysCache[front] = key;
+ front = (front + 1) % DefaultCacheSize;
+ cachedStreamsCount++;
+ return true;
+ }
+
+ streamCache.Remove(streamKeysCache[front]);
+ streamCache.Add(key, stream);
+ // add to circular array where we removed the oldest stream
+ streamKeysCache[front] = key;
+ front = (front + 1) % DefaultCacheSize;
+ // we don't need to update cachedStreamsCount since we added and removed a stream
+ return true;
+
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/Stream/Stream.cs b/libs/server/Stream/Stream.cs
new file mode 100644
index 00000000000..9c134836dc6
--- /dev/null
+++ b/libs/server/Stream/Stream.cs
@@ -0,0 +1,713 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using Tsavorite.core;
+using Garnet.server.BTreeIndex;
+using Garnet.common;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Garnet.server
+{
+ public enum StreamTrimOpts
+ {
+ MAXLEN,
+ MINID,
+ NONE
+ }
+
+ public enum XADDOpts
+ {
+ NOMKSTREAM,
+ NONE
+ }
+
+ public enum ParsedStreamEntryID
+ {
+ VALID,
+ INVALID,
+ NOT_GREATER,
+ }
+
+ // This is the layout that is put in the log for each stream entry
+ [StructLayout(LayoutKind.Sequential, Pack = 1, Size = 20)]
+ public struct StreamLogEntryHeader
+ {
+ public StreamID id;
+ public int numPairs;
+ }
+
+ public class StreamObject : IDisposable
+ {
+ readonly IDevice device;
+ readonly TsavoriteLog log;
+ readonly BTree index;
+ StreamID lastId;
+ long totalEntriesAdded;
+ SingleWriterMultiReaderLock _lock;
+
+ public StreamID LastId
+ {
+ get
+ {
+ // Need locking to prevent torn reads from AddEntry
+ _lock.ReadLock();
+ try
+ {
+ return lastId;
+ }
+ finally
+ {
+ _lock.ReadUnlock();
+ }
+ }
+ }
+
+ ///
+ /// Constructor
+ ///
+ /// Directory where the log will be stored
+ /// Page size of the log used for the stream
+ public StreamObject(string logDir, long pageSize, long memorySize, int safeTailRefreshFreqMs)
+ {
+ device = logDir == null ? new NullDevice() : Devices.CreateLogDevice("streamLogs/" + logDir + "/streamLog", preallocateFile: false);
+ log = new TsavoriteLog(new TsavoriteLogSettings { LogDevice = device, PageSize = pageSize, MemorySize = memorySize, SafeTailRefreshFrequencyMs = safeTailRefreshFreqMs });
+ index = new BTree(device.SectorSize);
+ totalEntriesAdded = 0;
+ lastId = default;
+ _lock = new SingleWriterMultiReaderLock();
+ }
+
+ ///
+ /// Increment the stream ID
+ ///
+ /// carries the incremented stream id
+ public void IncrementID(ref StreamID incrementedID)
+ {
+ var originalMs = lastId.getMS();
+ var originalSeq = lastId.getSeq();
+
+ if (originalMs == long.MaxValue)
+ {
+ incrementedID = default;
+ return;
+ }
+
+ var newMs = originalMs;
+ var newSeq = originalSeq + 1;
+
+ // if seq overflows, increment timestamp and reset seq
+ if (newSeq == 0)
+ {
+ newMs += 1;
+ newSeq = 0;
+ }
+
+ incrementedID.setMS(newMs);
+ incrementedID.setSeq(newSeq);
+
+ }
+
+ ///
+ /// Generate the next stream ID
+ ///
+ /// StreamID generated
+ public unsafe void GenerateNextID(ref StreamID id)
+ {
+ ulong timestamp = (ulong)Stopwatch.GetTimestamp() / (ulong)(Stopwatch.Frequency / 1000);
+
+ // read existing timestamp in big endian format
+ var lastTs = lastId.getMS();
+ // if this is the first entry or timestamp is greater than last added entry
+ if (totalEntriesAdded == 0 || timestamp > lastTs)
+ {
+ // this will write timestamp in big endian format
+ id.setMS(timestamp);
+ id.setSeq(0);
+ return;
+ }
+ // if timestamp is same as last added entry, increment the sequence number
+ // if seq overflows, increment timestamp and reset the sequence number
+ IncrementID(ref id);
+ }
+
+ unsafe ParsedStreamEntryID parseIDString(PinnedSpanByte idSlice, ref StreamID id)
+ {
+ // if we have to auto-generate the whole ID
+ if (*idSlice.ptr == '*' && idSlice.length == 1)
+ {
+ GenerateNextID(ref id);
+ return ParsedStreamEntryID.VALID;
+ }
+
+ var lastIdDecodedTs = lastId.getMS();
+
+ // parse user-defined ID
+ // can be of following formats:
+ // 1. ts (seq = 0)
+ // 2. ts-* (auto-generate seq number)
+ // 3. ts-seq
+
+ // last character is a *
+ if (*(idSlice.ptr + idSlice.length - 1) == '*')
+ {
+ // has to be of format ts-*, check if '-' is the preceding character
+ if (*(idSlice.ptr + idSlice.length - 2) != '-')
+ {
+ return ParsedStreamEntryID.INVALID;
+ }
+ // parse the timestamp
+ // slice the id to remove the last two characters
+ var slicedId = PinnedSpanByte.FromPinnedPointer(idSlice.ptr, idSlice.length - 2);
+ var idEnd = idSlice.ptr + idSlice.length - 2;
+ if (!RespReadUtils.ReadUlong(out ulong timestamp, ref idSlice.ptr, idEnd))
+ {
+ return ParsedStreamEntryID.INVALID;
+ }
+
+ // check if timestamp is greater than last added entry's decoded ts
+ if (totalEntriesAdded != 0 && timestamp < lastIdDecodedTs)
+ {
+ return ParsedStreamEntryID.NOT_GREATER;
+ }
+ else if (totalEntriesAdded != 0 && timestamp == lastIdDecodedTs)
+ {
+ IncrementID(ref id);
+ }
+ else
+ {
+ id.setMS(timestamp);
+ id.setSeq(0);
+ }
+ }
+ else
+ {
+ // find index of '-' in the id
+ int index = -1;
+ for (int i = 0; i < idSlice.length; i++)
+ {
+ if (*(idSlice.ptr + i) == '-')
+ {
+ index = i;
+ break;
+ }
+ }
+ // if '-' is not found, format should be just ts
+ if (index == -1)
+ {
+ if (!RespReadUtils.ReadUlong(out ulong timestamp, ref idSlice.ptr, idSlice.ptr + idSlice.length))
+ {
+ return ParsedStreamEntryID.INVALID;
+ }
+ // check if timestamp is greater than last added entry
+ if (totalEntriesAdded != 0 && timestamp < lastIdDecodedTs)
+ {
+ return ParsedStreamEntryID.NOT_GREATER;
+ }
+ else if (totalEntriesAdded != 0 && timestamp == lastIdDecodedTs)
+ {
+ IncrementID(ref id);
+ }
+ else
+ {
+ id.setMS(timestamp);
+ id.setSeq(0);
+ }
+ }
+ else
+ {
+ // parse the timestamp
+ // slice the id to remove everything after '-'
+ var slicedId = PinnedSpanByte.FromPinnedPointer(idSlice.ptr, index);
+ var slicedSeq = PinnedSpanByte.FromPinnedPointer(idSlice.ptr + index + 1, idSlice.length - index - 1);
+ if (!RespReadUtils.ReadUlong(out ulong timestamp, ref idSlice.ptr, idSlice.ptr + index))
+ {
+ return ParsedStreamEntryID.INVALID;
+ }
+ var seqBegin = idSlice.ptr + index + 1;
+ var seqEnd = idSlice.ptr + idSlice.length;
+ if (!RespReadUtils.ReadUlong(out ulong seq, ref seqBegin, seqEnd))
+ {
+ return ParsedStreamEntryID.INVALID;
+ }
+
+ if (totalEntriesAdded != 0 && timestamp < lastIdDecodedTs)
+ {
+ return ParsedStreamEntryID.NOT_GREATER;
+ }
+ else if (totalEntriesAdded != 0 && timestamp == lastIdDecodedTs)
+ {
+ if (seq <= lastId.seq)
+ {
+ return ParsedStreamEntryID.INVALID;
+ }
+ }
+ // use ID and seq given by user
+ // encode while storing
+ id.setMS(timestamp);
+ id.setSeq(seq);
+ }
+ }
+
+ return ParsedStreamEntryID.VALID;
+ }
+
+ ///
+ /// Adds an entry or item to the stream
+ ///
+ public unsafe void AddEntry(PinnedSpanByte idSlice, int numPairs, ReadOnlySpan rawFieldValuePairs, ref SpanByteAndMemory output, byte respProtocolVersion)
+ {
+ byte* tmpPtr = null;
+ StreamID id = default;
+ using var writer = new RespMemoryWriter(respProtocolVersion, ref output);
+
+ // take a lock to ensure thread safety
+ _lock.WriteLock();
+ try
+ {
+ var parsedIDStatus = parseIDString(idSlice, ref id);
+ if (parsedIDStatus == ParsedStreamEntryID.INVALID)
+ {
+ writer.WriteError(CmdStrings.RESP_ERR_XADD_INVALID_STREAM_ID);
+ return;
+ }
+ else if (parsedIDStatus == ParsedStreamEntryID.NOT_GREATER)
+ {
+ writer.WriteError(CmdStrings.RESP_ERR_XADD_ID_NOT_GREATER);
+ return;
+ }
+
+ // add the entry to the log
+ StreamLogEntryHeader header = new StreamLogEntryHeader
+ {
+ id = id,
+ numPairs = numPairs,
+ };
+
+ log.Enqueue(header, item: rawFieldValuePairs, out long returnedLogicalAddr);
+
+ var streamValue = new Value((ulong)returnedLogicalAddr);
+
+ bool added = index.Insert((byte*)Unsafe.AsPointer(ref id.idBytes[0]), streamValue);
+
+ if (!added)
+ {
+ writer.WriteNull();
+ return;
+ }
+
+ // copy encoded ms and seq
+ lastId.ms = id.ms;
+ lastId.seq = id.seq;
+
+ totalEntriesAdded++;
+
+ ulong idMS = id.getMS();
+ ulong idSeq = id.getSeq();
+ Span outputBuffer = stackalloc byte[(NumUtils.MaximumFormatInt64Length * 2) + 1];
+ int len = NumUtils.WriteInt64((long)idMS, outputBuffer);
+ outputBuffer[len++] = (byte)'-';
+ len += NumUtils.WriteInt64((long)idSeq, outputBuffer.Slice(len));
+
+ writer.WriteBulkString(outputBuffer.Slice(0, len));
+ }
+ finally
+ {
+ // log.Commit();
+ _lock.WriteUnlock();
+ }
+
+ }
+
+ ///
+ /// Get current length of the stream (number of entries in the stream)
+ ///
+ /// length of stream
+ public ulong Length()
+ {
+ ulong len = 0;
+ _lock.ReadLock();
+ try
+ {
+ // get length of the stream from the index excluding tombstones
+ len = index.ValidCount;
+ }
+ finally
+ {
+ _lock.ReadUnlock();
+ }
+ return len;
+ }
+
+ ///
+ /// Deletes an entry from the stream
+ ///
+ /// id of the stream entry to delete
+ /// true if entry was deleted successfully
+ public unsafe bool DeleteEntry(PinnedSpanByte idSlice)
+ {
+ // first parse the idString
+ if (!parseCompleteID(idSlice, out StreamID entryID))
+ {
+ return false;
+ }
+ bool deleted = false;
+ // take a lock to delete from the index
+ _lock.WriteLock();
+ try
+ {
+ deleted = index.Delete((byte*)Unsafe.AsPointer(ref entryID.idBytes[0]));
+ }
+ finally
+ {
+ _lock.WriteUnlock();
+ }
+ return deleted;
+ }
+
+
+ // Read the last entry in the stream and into output
+ internal unsafe void ReadLastEntry(ref SpanByteAndMemory output, byte respProtocolVersion)
+ {
+ var writer = new RespMemoryWriter(respProtocolVersion, ref output);
+ try
+ {
+ _lock.ReadLock();
+ try
+ {
+ if (index.Count() == 0)
+ {
+ writer.WriteNull();
+ return;
+ }
+
+ // LastAlive to skip tombstoned entries
+ long addressOnLog = (long)index.LastAlive().Value.address;
+ (byte[] entry, int len) = (null, 0); // log.Read(addressOnLog, readUncommitted: true);
+
+ if (entry == null)
+ {
+ writer.WriteNull();
+ return;
+ }
+
+ ReadOnlySpan entrySp = entry.AsSpan(sizeof(long), len - sizeof(long)); // skip the previousEntryAddress part
+ // HK TODO: this is broken atm
+ //WriteEntryToWriter(entrySp, ref writer, len);
+ }
+ finally
+ {
+ _lock.ReadUnlock();
+ }
+ }
+ finally
+ {
+ writer.Dispose();
+ }
+ }
+
+ ///
+ /// Read entries from the stream from given range
+ ///
+ /// start of range
+ /// end of range
+ /// threshold to scanning
+ ///
+ public unsafe void ReadRange(string min, string max, int limit, ref SpanByteAndMemory output, byte respProtocolVersion, bool isReverse = false)
+ {
+ var writer = new RespMemoryWriter(respProtocolVersion, ref output);
+ try
+ {
+ _lock.ReadLock();
+ try
+ {
+ if (index.Count() == 0)
+ {
+ return;
+ }
+
+ long startAddr, endAddr;
+ StreamID startID, endID;
+ if (min == "-")
+ {
+ byte[] idBytes = index.First().Key;
+ startID = new StreamID(idBytes);
+ }
+ else if (min == "+") // this can happen in reverse range queries
+ {
+ byte[] idBytes = index.Last().Key;
+ startID = new StreamID(idBytes);
+ }
+ else if (!ParseStreamIDFromString(min, out startID))
+ {
+ return;
+ }
+
+ if (max == "+")
+ {
+ byte[] idBytes = index.Last().Key;
+ endID = new StreamID(idBytes);
+ }
+ else if (max == "-") // this can happen in reverse range queries
+ {
+ byte[] idBytes = index.First().Key;
+ endID = new StreamID(idBytes);
+ }
+ else if (!ParseStreamIDFromString(max, out endID))
+ {
+ return;
+ }
+
+ int count = index.Get((byte*)Unsafe.AsPointer(ref startID.idBytes[0]), (byte*)Unsafe.AsPointer(ref endID.idBytes[0]), out Value startVal, out Value endVal, out var tombstones, limit, isReverse);
+
+ if (isReverse)
+ {
+ startAddr = (long)startVal.address;
+ endAddr = (long)endVal.address;
+ }
+ else
+ {
+ startAddr = (long)startVal.address;
+ endAddr = (long)endVal.address + 1;
+ }
+
+ long readCount = 0;
+ try
+ {
+ using (var iter = log.Scan(startAddr, endAddr, scanUncommitted: true)) // isReverseStreamIter: isReverse))
+ {
+ writer.WriteArrayLength(count);
+
+ while (iter.GetNext(out byte[] entry, out _, out long currentAddress, out long nextAddress))
+ {
+ var current = new Value((ulong)currentAddress);
+ // check if any tombstone t.address matches current
+ var tombstoneFound = false;
+ foreach (var tombstone in tombstones)
+ {
+ if (tombstone.address == current.address)
+ {
+ tombstoneFound = true;
+ break;
+ }
+ }
+ if (tombstoneFound)
+ {
+ continue;
+ }
+
+ WriteEntryToWriter(entry, ref writer);
+
+ readCount++;
+ if (limit != -1 && readCount == limit)
+ {
+ break;
+ }
+ }
+ }
+ }
+ finally
+ { }
+ }
+ finally
+ {
+ _lock.ReadUnlock();
+ }
+ }
+ finally
+ {
+ writer.Dispose();
+ }
+ }
+
+ ///
+ /// Trims the stream based on the specified options.
+ ///
+ /// length or ID specifying the threshold
+ /// MAXLEN or MINID
+ /// number of keys trimmed
+ ///
+ public unsafe bool Trim(PinnedSpanByte trimArg, StreamTrimOpts optType, out ulong entriesTrimmed, bool approximate = false)
+ {
+ uint numLeavesDeleted = 0;
+ Value headValue = default;
+ _lock.WriteLock();
+ try
+ {
+ switch (optType)
+ {
+ case StreamTrimOpts.MAXLEN:
+ if (!RespReadUtils.ReadUlong(out ulong maxLen, ref trimArg.ptr, trimArg.ptr + trimArg.length))
+ {
+ entriesTrimmed = 0;
+ return false;
+ }
+ index.TrimByLength(maxLen, out entriesTrimmed, out headValue, out var headValidKey, out numLeavesDeleted, approximate);
+ break;
+ case StreamTrimOpts.MINID:
+ if (!parseCompleteID(trimArg, out StreamID minID))
+ {
+ entriesTrimmed = 0;
+ return false;
+ }
+ index.TrimByID((byte*)Unsafe.AsPointer(ref minID.idBytes[0]), out entriesTrimmed, out headValue, out headValidKey, out numLeavesDeleted);
+ break;
+ default:
+ entriesTrimmed = 0;
+ break;
+ }
+
+ if (numLeavesDeleted == 0)
+ {
+ // didn't delete any leaf nodes so done here
+ return true;
+ }
+ // truncate log to new head
+ var newHeadAddress = (long)headValue.address;
+ log.TruncateUntil(newHeadAddress);
+ }
+ finally
+ {
+ _lock.WriteUnlock();
+ }
+ return true;
+ }
+
+ unsafe void WriteEntryToWriter(ReadOnlySpan entryBytes, ref RespMemoryWriter writer)
+ {
+ // each response entry is an array of two items: ID and array of key-value pairs
+ writer.WriteArrayLength(2);
+
+ // Read the first 20 bytes into our StreamLogEntryHeader struct
+ StreamLogEntryHeader streamLogEntryHeader = MemoryMarshal.Read(entryBytes.Slice(0, sizeof(StreamLogEntryHeader)));
+ StreamID entryID = streamLogEntryHeader.id;
+
+ // first item in the array is the ID
+ WriteStreamIdToWriter(entryID, ref writer);
+
+ // Second item is an array so write the subarray length
+ int numPairs = streamLogEntryHeader.numPairs;
+ writer.WriteArrayLength(numPairs);
+
+ // this is a serialized ReadOnlySpan of field-value pairs, we want to copy it directly into the writer
+ int serializedSpanLength = MemoryMarshal.Read(entryBytes.Slice(sizeof(StreamLogEntryHeader)));
+ int valueOffset = sizeof(StreamLogEntryHeader) + sizeof(int);
+ ReadOnlySpan value = entryBytes.Slice(valueOffset, serializedSpanLength);
+ writer.WriteDirect(value);
+ }
+
+
+ unsafe bool parseCompleteID(PinnedSpanByte idSlice, out StreamID streamID)
+ {
+ streamID = default;
+ // complete ID is of the format ts-seq in input where both ts and seq are ulong
+ // find the index of '-' in the id
+ int index = -1;
+ for (int i = 0; i < idSlice.length; i++)
+ {
+ if (*(idSlice.ptr + i) == '-')
+ {
+ index = i;
+ break;
+ }
+ }
+ // parse the timestamp
+ if (!RespReadUtils.ReadUlong(out ulong timestamp, ref idSlice.ptr, idSlice.ptr + index))
+ {
+ return false;
+ }
+
+ // after reading the timestamp, the pointer will be at the '-' character
+ var seqBegin = idSlice.ptr + 1;
+ // parse the sequence number
+ if (!RespReadUtils.ReadUlong(out ulong seq, ref seqBegin, idSlice.ptr + idSlice.length - 1))
+ {
+ return false;
+ }
+ streamID.setMS(timestamp);
+ streamID.setSeq(seq);
+ return true;
+ }
+
+ public static bool ParseCompleteStreamIDFromString(ReadOnlySpan idString, out StreamID id)
+ {
+ id = default;
+ int hyphenIdx = -1;
+ for (int i = 0; i < idString.Length; i++)
+ {
+ if (idString[i] == '-')
+ {
+ if (hyphenIdx != -1)
+ {
+ // more than 1 occurence of hypen
+ return false;
+ }
+ hyphenIdx = i;
+ }
+ }
+
+ // no occurence of hypen
+ if (hyphenIdx == -1)
+ return false;
+
+ if (!ulong.TryParse(idString.Slice(0, hyphenIdx), out ulong timestamp))
+ {
+ return false;
+ }
+ if (!ulong.TryParse(idString.Slice(hyphenIdx + 1), out ulong seq))
+ {
+ return false;
+ }
+
+ id.setMS(timestamp);
+ id.setSeq(seq);
+ return true;
+ }
+
+ public static bool ParseStreamIDFromString(ReadOnlySpan idString, out StreamID id)
+ {
+ id = default;
+ if (idString == "-" || idString == "+")
+ {
+ return false;
+ }
+ if (!idString.Contains('-'))
+ {
+ if (!ulong.TryParse(idString, out ulong ms))
+ {
+ return false;
+ }
+ id.setMS(ms);
+ id.setSeq(0);
+ return true;
+ }
+ return ParseCompleteStreamIDFromString(idString, out id);
+ }
+
+ // Util to write without doing temp heap allocations
+ private static void WriteStreamIdToWriter(StreamID id, ref RespMemoryWriter writer)
+ {
+ Span outputBuffer = stackalloc byte[(NumUtils.MaximumFormatInt64Length * 2) + 1];
+ ulong idMS = id.getMS();
+ ulong idSeq = id.getSeq();
+ int len = NumUtils.WriteInt64((long)idMS, outputBuffer);
+ outputBuffer[len++] = (byte)'-';
+ len += NumUtils.WriteInt64((long)idSeq, outputBuffer.Slice(len));
+ writer.WriteBulkString(outputBuffer.Slice(0, len));
+ }
+
+ ///
+ public void Dispose()
+ {
+ try
+ {
+ log.Dispose();
+ device.Dispose();
+ }
+ finally
+ { }
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/Stream/StreamID.cs b/libs/server/Stream/StreamID.cs
new file mode 100644
index 00000000000..a2ab0a99fb9
--- /dev/null
+++ b/libs/server/Stream/StreamID.cs
@@ -0,0 +1,64 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using System.Buffers.Binary;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Garnet.server
+{
+ ///
+ /// Represents a GarnetStreamID, which is a 128-bit identifier for an entry in a stream.
+ ///
+ [StructLayout(LayoutKind.Explicit)]
+ public unsafe struct StreamID
+ {
+ [FieldOffset(0)]
+ public ulong ms;
+ [FieldOffset(8)]
+ public ulong seq;
+ [FieldOffset(0)]
+ public fixed byte idBytes[16];
+
+ public StreamID(ulong ms, ulong seq)
+ {
+ BinaryPrimitives.WriteUInt64BigEndian(new Span(Unsafe.AsPointer(ref this.ms), 8), ms);
+ BinaryPrimitives.WriteUInt64BigEndian(new Span(Unsafe.AsPointer(ref this.seq), 8), seq);
+ }
+ public void setMS(ulong ms)
+ {
+ BinaryPrimitives.WriteUInt64BigEndian(new Span(Unsafe.AsPointer(ref this.ms), 8), ms);
+ }
+
+ public void setSeq(ulong seq)
+ {
+ BinaryPrimitives.WriteUInt64BigEndian(new Span(Unsafe.AsPointer(ref this.seq), 8), seq);
+ }
+
+ public ulong getMS()
+ {
+ return BinaryPrimitives.ReadUInt64BigEndian(new Span(Unsafe.AsPointer(ref this.ms), 8));
+ }
+
+ public ulong getSeq()
+ {
+ return BinaryPrimitives.ReadUInt64BigEndian(new Span(Unsafe.AsPointer(ref this.seq), 8));
+ }
+
+ public unsafe StreamID(byte[] inputBytes)
+ {
+ if (inputBytes.Length != 16)
+ {
+ throw new ArgumentException("idBytes must be 16 bytes");
+ }
+
+ fixed (byte* idBytesPtr = idBytes)
+ {
+ var sourceSpan = new ReadOnlySpan(inputBytes);
+ var destinationSpan = new Span(idBytesPtr, 16);
+ sourceSpan.CopyTo(destinationSpan);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/Stream/StreamManager.cs b/libs/server/Stream/StreamManager.cs
new file mode 100644
index 00000000000..1b99504561a
--- /dev/null
+++ b/libs/server/Stream/StreamManager.cs
@@ -0,0 +1,331 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using Garnet.common;
+using Tsavorite.core;
+
+namespace Garnet.server
+{
+ public sealed class StreamManager : IDisposable
+ {
+ private readonly Dictionary streams;
+
+ long defPageSize;
+ long defMemorySize;
+ int safeTailRefreshFreqMs;
+
+ SingleWriterMultiReaderLock _lock = new SingleWriterMultiReaderLock();
+
+ public StreamManager(long pageSize, long memorySize, int safeTailRefreshFreqMs)
+ {
+ streams = new Dictionary(ByteArrayComparer.Instance);
+ defPageSize = pageSize;
+ defMemorySize = memorySize;
+ this.safeTailRefreshFreqMs = safeTailRefreshFreqMs;
+ }
+
+ /*
+ SCAN semantics:
+ Eventually returns all stable keys: Keys that exist from start to finish of the full scan will be returned at least once
+ No duplicates for stable keys: Keys that don't change during the scan won't be returned multiple times (though this isn't guaranteed if rehashing occurs)
+ May return deleted keys: A key deleted after being scanned but before the cursor is returned can still appear in results
+ May miss new keys: Keys added during the scan may or may not be returned
+ May return modified keys multiple times: If keys are added/deleted causing rehash, some keys might be returned more than once
+ Full scan always terminates: Returns cursor 0 eventually, even with ongoing modifications.
+ Note: Naive locking is okay till I see something in the profiler that suggests otherwise.
+ */
+ public unsafe void KeyScan(byte* patternPtr, int length, ref long cursor, long remainingCount, List keys)
+ {
+ _lock.ReadLock();
+ try
+ {
+ int currentPosition = 0; // Tracks absolute position in dictionary
+ int matchedCount = 0; // Tracks number of keys added to results
+
+ foreach (byte[] key in streams.Keys)
+ {
+ // Skip keys before cursor position
+ if (currentPosition < cursor)
+ {
+ currentPosition++;
+ continue;
+ }
+
+ // Check pattern matching
+ bool matches = true;
+ if (patternPtr != null)
+ {
+ fixed (byte* keyPtr = key)
+ matches = GlobUtils.Match(patternPtr, length, keyPtr, key.Length, true);
+ }
+
+ // If key matches pattern, add it to results
+ if (matches)
+ {
+ keys.Add(key);
+ matchedCount++;
+
+ // Stop if we've reached the requested count
+ if (matchedCount >= remainingCount)
+ {
+ currentPosition++;
+ break;
+ }
+ }
+
+ currentPosition++;
+ }
+
+ // If we've processed all keys, set cursor to 0 (scan complete)
+ // Otherwise, set cursor to current position for next iteration
+ cursor = currentPosition >= streams.Count ? 0 : currentPosition;
+ }
+ finally
+ {
+ _lock.ReadUnlock();
+ }
+ }
+
+ ///
+ /// Get all the stream keys
+ ///
+ /// Array of stream keys as strings
+ public unsafe byte[][] GetKeys(byte* pattern, int len)
+ {
+ _lock.ReadLock();
+ byte[][] keys = new byte[streams.Count][];
+ try
+ {
+ int i = 0;
+ foreach (var key in streams.Keys)
+ {
+ if (pattern != null)
+ {
+ fixed (byte* keyPtr = key)
+ {
+ if (!GlobUtils.Match(pattern, len, keyPtr, key.Length, true))
+ {
+ continue;
+ }
+ }
+ }
+
+ keys[i] = key;
+ i++;
+ }
+ return keys;
+ }
+ finally
+ {
+ _lock.ReadUnlock();
+ }
+ }
+
+ ///
+ /// Add a new entry to the stream
+ ///
+ /// key/name of the stream
+ /// id of the stream entry
+ /// if true, do not create a new stream if it does not exist
+ /// payload to the stream
+ /// # k-v pairs in the payload
+ ///
+ /// key of last stream accessed (for cache)
+ /// reference to last stream accessed (for cache)
+ /// RESP protocol version
+ public void StreamAdd(PinnedSpanByte keySlice, PinnedSpanByte idSlice, bool noMkStream, ReadOnlySpan value, int numPairs, ref SpanByteAndMemory output, out byte[] streamKey, out StreamObject lastStream, byte respProtocolVersion)
+ {
+ // copy key store this key in the dictionary
+ byte[] key = keySlice.ToArray();
+
+ bool foundStream = false;
+ StreamObject stream;
+ lastStream = null;
+ streamKey = null;
+ _lock.ReadLock();
+ try
+ { // HK TODO: wth is this code block doing? Where it calls AddEntry seems weird
+ foundStream = streams.TryGetValue(key, out stream);
+ if (foundStream)
+ {
+ stream.AddEntry(idSlice, numPairs, value, ref output, respProtocolVersion);
+ // update last accessed stream key
+ lastStream = stream;
+ streamKey = key;
+ }
+ }
+ finally
+ {
+ _lock.ReadUnlock();
+ }
+
+ if (foundStream)
+ {
+ return;
+ }
+
+ // take a write lock
+ _lock.WriteLock();
+ try
+ {
+ // retry to validate if some other thread has created the stream
+ foundStream = streams.TryGetValue(key, out stream);
+ if (!foundStream && !noMkStream)
+ {
+ // stream was not found with this key so create a new one
+ StreamObject newStream = new StreamObject(null, defPageSize, defMemorySize, safeTailRefreshFreqMs);
+ newStream.AddEntry(idSlice, numPairs, value, ref output, respProtocolVersion);
+ streams.TryAdd(key, newStream);
+ streamKey = key;
+ lastStream = newStream;
+ }
+ else if (!foundStream && noMkStream)
+ {
+ // stream was not found and noMkStream is set so return an error
+ using var writer = new RespMemoryWriter(respProtocolVersion, ref output);
+ writer.WriteNull();
+ return;
+ }
+ else
+ {
+ stream.AddEntry(idSlice, numPairs, value, ref output, respProtocolVersion);
+ lastStream = stream;
+ streamKey = key;
+ }
+ }
+ finally
+ {
+ _lock.WriteUnlock();
+ }
+ return;
+ }
+
+ ///
+ /// Get the length of a particular stream
+ ///
+ /// key of the stream we want to obtain the length
+ /// length of the stream
+ public ulong StreamLength(PinnedSpanByte keySlice)
+ {
+ var key = keySlice.ToArray();
+ if (streams != null)
+ {
+ bool foundStream = streams.TryGetValue(key, out StreamObject stream);
+ if (foundStream)
+ {
+ return stream.Length();
+ }
+ else
+ {
+ // return 0 if stream does not exist, as if it was empty
+ return 0;
+ }
+ }
+ return 0;
+ }
+
+ ///
+ /// Perform range scan in a stream
+ ///
+ /// key/name of stream
+ /// start of range
+ /// end of range
+ /// threshold to limit scanning
+ ///
+ /// RESP protocol version
+ public bool StreamRange(PinnedSpanByte keySlice, string start, string end, int count, ref SpanByteAndMemory output, byte respProtocolVersion, bool isReverse)
+ {
+ var key = keySlice.ToArray();
+ if (streams != null && streams.Count > 0)
+ {
+ bool foundStream = streams.TryGetValue(key, out StreamObject stream);
+ if (foundStream)
+ {
+ stream.ReadRange(start, end, count, ref output, respProtocolVersion, isReverse);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ ///
+ /// Delete an entry from a stream
+ ///
+ /// key/name of stream to delete
+ /// id of stream entry to delete
+ /// last accessed stream in cache
+ ///
+ public bool StreamDelete(PinnedSpanByte keySlice, PinnedSpanByte idSlice, out StreamObject lastSeenStream)
+ {
+ var key = keySlice.ToArray();
+ StreamObject stream;
+ lastSeenStream = null;
+ if (streams != null)
+ {
+ if (streams.TryGetValue(key, out stream))
+ {
+ lastSeenStream = stream;
+ return stream.DeleteEntry(idSlice);
+ }
+ }
+ return false;
+ }
+
+ public bool StreamTrim(PinnedSpanByte keySlice, PinnedSpanByte trimArg, StreamTrimOpts optType, out ulong validKeysRemoved, bool approximate = false)
+ {
+ bool foundStream;
+ var key = keySlice.ToArray();
+ StreamObject stream;
+ validKeysRemoved = 0;
+ if (streams != null)
+ {
+ foundStream = streams.TryGetValue(key, out stream);
+
+ if (foundStream)
+ {
+ return stream.Trim(trimArg, optType, out validKeysRemoved, approximate);
+ }
+ }
+ return true; // no keys removed so return true
+ }
+
+ public bool StreamLast(PinnedSpanByte key, ref SpanByteAndMemory output, byte respProtocolVersion)
+ {
+ var keyArr = key.ToArray();
+ if (streams != null && streams.Count > 0)
+ {
+ bool foundStream = streams.TryGetValue(keyArr, out StreamObject stream);
+ if (foundStream)
+ {
+ stream.ReadLastEntry(ref output, respProtocolVersion);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ ///
+ public void Dispose()
+ {
+ if (streams != null)
+ {
+ _lock.WriteLock();
+ try
+ {
+ foreach (var stream in streams.Values)
+ {
+ stream.Dispose();
+ }
+
+ streams.Clear();
+ }
+ finally
+ {
+ _lock.WriteUnlock();
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/libs/server/Transaction/TransactionManager.cs b/libs/server/Transaction/TransactionManager.cs
index 93b4301130f..8fd70edc407 100644
--- a/libs/server/Transaction/TransactionManager.cs
+++ b/libs/server/Transaction/TransactionManager.cs
@@ -225,6 +225,7 @@ internal bool RunTransactionProc(byte id, ref CustomProcedureInput procInput, Cu
return false;
}
+ // State will never be Aborted at this point for non-cluster mode. This code path is only for TransactionProcedure.
if (state == TxnState.Aborted)
{
WriteCachedSlotVerificationMessage(ref output);
@@ -379,10 +380,15 @@ internal bool Run(bool internal_txn = false, bool fail_fast_on_lock = false, Tim
if (!internal_txn)
watchContainer.SaveKeysToLock(this);
- // Acquire transaction version
+ // Acquire transaction version.
+ // Version is associated to the version the state machine runs in. Version is incremented everytime Checkpointing state machine goes from Prepare to in-progress
+ // We acquire the version here to ensure that we have the latest version before acquiring locks.
+ // This call may block if the system is in prepare_grow phase of the index resizing state machine till it moves to In-progress_grow phase.
txnVersion = stateMachineDriver.AcquireTransactionVersion();
+ //stateMachineDriver.WaitForPrepareGrowComplete();
- // Acquire lock sessions
+ // For the ongoing session, mark it as in-transaction. This marking is only used to handle cases during index resizing.
+ // See further explanation in TsavoriteThread.cs [InternalRefresh].
BeginTransaction();
bool lockSuccess;
@@ -409,9 +415,12 @@ internal bool Run(bool internal_txn = false, bool fail_fast_on_lock = false, Tim
return false;
}
- // Verify transaction version
+ // verify and possibly update txn version after locks are acquired
txnVersion = stateMachineDriver.VerifyTransactionVersion(txnVersion);
+ // acquire txn version after locks are acquired
+ //txnVersion = stateMachineDriver.AcquireTransactionVersionFastNoBarrier();
+
// Update sessions with transaction version
LocksAcquired(txnVersion);
diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Checkpointing/HybridLogCheckpointSMTask.cs b/libs/storage/Tsavorite/cs/src/core/Index/Checkpointing/HybridLogCheckpointSMTask.cs
index 9cfa9117a51..7634ab38bdf 100644
--- a/libs/storage/Tsavorite/cs/src/core/Index/Checkpointing/HybridLogCheckpointSMTask.cs
+++ b/libs/storage/Tsavorite/cs/src/core/Index/Checkpointing/HybridLogCheckpointSMTask.cs
@@ -3,6 +3,7 @@
using System;
using System.Diagnostics;
+using System.Threading;
using System.Threading.Tasks;
namespace Tsavorite.core
@@ -112,10 +113,9 @@ public virtual void GlobalAfterEnteringState(SystemState next, StateMachineDrive
if (stateMachineDriver.GetNumActiveTransactions(lastVersion) > 0)
{
stateMachineDriver.lastVersion = lastVersion;
- stateMachineDriver.lastVersionTransactionsDone = new(0);
- }
- if (stateMachineDriver.GetNumActiveTransactions(lastVersion) > 0)
+ stateMachineDriver.lastVersionTransactionsDone = new SemaphoreSlim(0);
stateMachineDriver.AddToWaitingList(stateMachineDriver.lastVersionTransactionsDone);
+ }
break;
}
}
diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Checkpointing/StateMachineDriver.cs b/libs/storage/Tsavorite/cs/src/core/Index/Checkpointing/StateMachineDriver.cs
index e9fceba76cf..72dea6bad4c 100644
--- a/libs/storage/Tsavorite/cs/src/core/Index/Checkpointing/StateMachineDriver.cs
+++ b/libs/storage/Tsavorite/cs/src/core/Index/Checkpointing/StateMachineDriver.cs
@@ -194,8 +194,12 @@ void GlobalStateMachineStep(SystemState expectedState)
if (!SystemState.Equal(expectedState, systemState))
return;
+ // Get the next state from the state machine definition. Each state machine knows it's own transition flow. They define it and basically expose it for the driver
+ // to query and then execute the transitions by calling the
var nextState = stateMachine.NextState(systemState);
+ // The state machine internally holds an array of tasks. This will iterate over each of those tasks and call BeforeEnteringState on each of them.
+ // The tasks internally have logic that they may wish to perform before they can transition to the next state, so this is the hook for that.
stateMachine.GlobalBeforeEnteringState(nextState, this);
// Execute any additional registered callbacks
@@ -217,6 +221,10 @@ void GlobalStateMachineStep(SystemState expectedState)
logger?.LogTrace("Moved to {0}, {1}", nextState.Phase, nextState.Version);
+ // Below we register the MakeTransitionWorker to be called when all threads have passed the epoch acquired at 227. That is to say they have all seen the changes till now, and this guarantees that MakeTransitionWorker is only called after
+ // everyone is seeing a view atleast fresh till this point in time.
+ // The same epoch object is shared with the TsavoriteStore internally as the one held here. So we are essentially trying to use it to lazily communicate with all threads, and synchronize when a callback can be called safely.
+
Debug.Assert(!epoch.ThisInstanceProtected());
try
{
@@ -261,6 +269,8 @@ public async Task WaitForCompletion(SystemState currentState)
void MakeTransitionWorker(SystemState nextState)
{
+ // notify each task within the state machine that we have entered the new state so they can call any logic they need to that they might have wanted to hook into.
+ // For example after the state machine enters In-Progress hybrid log checkpoint task
stateMachine.GlobalAfterEnteringState(nextState, this);
waitForTransitionIn.Release(int.MaxValue);
}
@@ -280,9 +290,12 @@ async Task RunStateMachine(CancellationToken token = default)
Exception ex = null;
try
{
+ // Crux of transitioning through states will run here
do
{
GlobalStateMachineStep(systemState);
+ // wait for threads to say they have entered the new state, by releasing to waitForTransitionIn semaphore.
+ // This is basically blocking till the callback MakeTransitionWorker is called by the epoch system after all threads have seen the new state.
await ProcessWaitingListAsync(token);
} while (systemState.Phase != Phase.REST);
}
diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/TsavoriteThread.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/TsavoriteThread.cs
index d999180388b..89e4c853b8e 100644
--- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/TsavoriteThread.cs
+++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/TsavoriteThread.cs
@@ -40,6 +40,9 @@ internal void InternalRefresh>;
+
+//// Currently works by having the a linked list for previous address scanning. This is adding 8 bytes per entry, and I need to get rid of this.
+
+//// I don't believe this needs to be concurrency control aware since we only go backwards on entries with knowledge of their addresses.
+//internal class TsavoriteStreamLogReverseIterator : TsavoriteLogScanIterator
+//{
+// internal TsavoriteStreamLogReverseIterator(TsavoriteLog tsavoriteLog, BlittableAllocatorImpl hlog, long beginAddress, long endAddress,
+// GetMemory getMemory, ScanBufferingMode scanBufferingMode, LightEpoch epoch, int headerSize, bool scanUncommitted = false, ILogger logger = null)
+// : base(tsavoriteLog, hlog, beginAddress, endAddress, getMemory, scanBufferingMode, epoch, headerSize, scanUncommitted, logger)
+// { }
+
+// ///
+// /// Retrieve physical address of next iterator value. Since this is a reverse itertor this means that the next one is actuall the previous one in log order.
+// /// (under epoch protection if it is from main page buffer)
+// ///
+// ///
+// ///
+// ///
+// ///
+// ///
+// ///
+// ///
+// protected override unsafe bool GetNextInternal(out long physicalAddress, out int entryLength, out long currentAddress, out long outNextAddress, out bool commitRecord, out bool onFrame)
+// {
+// Debug.Assert(!tsavoriteLog.readOnlyMode, "Reverse stream iterator not supported in read-only mode");
+// Debug.Assert(tsavoriteLog.logChecksum == LogChecksumType.None, "Reverse stream iterator does not support logs with checksums");
+
+// while (true)
+// {
+// physicalAddress = 0;
+// entryLength = 0;
+// currentAddress = nextAddress;
+// outNextAddress = currentAddress;
+// commitRecord = false; // reverse iterator never reads commit records so this is always false, but we keep it for signature compatibility
+// onFrame = false;
+
+// var headAddress = allocator.HeadAddress;
+// if (currentAddress < allocator.BeginAddress || // Check for boundary conditions. This is basically someone asking for something before the start of log. So we can say false
+// (allocator.IsNullDevice && currentAddress < headAddress) || // it also may be the case where someone is asking for something before the head address in null device mode.
+// currentAddress < endAddress || // we have gone past the end address we were supposed to scan back till
+// disposed)
+// {
+// return false;
+// }
+
+// // let's say you are trying to scan back from an address in the uncomitted range but you said false to scanUncommitted.
+// // then we need to jump back to the committed until address
+// if (!scanUncommitted && currentAddress >= tsavoriteLog.CommittedUntilAddress)
+// {
+// // This seems questionable at best. HK TODO: Review this logic.
+// currentAddress = tsavoriteLog.CommittedUntilAddress;
+// }
+
+// var currentPage = currentAddress >> allocator.LogPageSizeBits;
+// var currentFrame = currentPage % frameSize;
+// var currentOffset = currentAddress & allocator.PageSizeMask;
+
+// // are we below head address? We need to BufferAndLoad
+// if (currentAddress < headAddress)
+// {
+// var endAddr = endAddress;
+// // reverse iterator uses single page buffering only. So BufferAndLoad will always load just one page.
+// // We can later optimize this to load multiple pages if needed. Like we do double buffering in forward iterator.
+// if (BufferAndLoad(currentAddress, currentPage, currentFrame, headAddress, endAddr))
+// continue;
+
+// physicalAddress = frame.GetPhysicalAddress(currentFrame, currentOffset);
+// onFrame = true;
+// }
+// else
+// {
+// // in main log buffer in memory already, no paging needed
+// physicalAddress = allocator.GetPhysicalAddress(currentAddress);
+// }
+
+// // Get and check entry length
+// entryLength = tsavoriteLog.GetLength((byte*)physicalAddress);
+
+// // EntryLength should never be zero or negative in a reverse iterator. This is because unlike forward iterator we know which address to jump to directly.
+// Debug.Assert(entryLength > 0, "Reverse iterator should never commit records. Or zeroed out entries should have been handled above.");
+
+// // parse out the previous address present in stream entries right after the header.
+// outNextAddress = *(long*)(physicalAddress + headerSize);
+
+// // Update nextAddress to point to the previous entry for the next iteration
+// nextAddress = outNextAddress;
+
+// // Return true to indicate we found a valid entry. If the previous address is invalid (0), the next call will return false.
+// return true;
+// }
+// }
+//}
\ No newline at end of file
diff --git a/playground/BTree/Btree.csproj b/playground/BTree/Btree.csproj
new file mode 100644
index 00000000000..b4678381019
--- /dev/null
+++ b/playground/BTree/Btree.csproj
@@ -0,0 +1,18 @@
+
+
+
+ Exe
+ net8.0
+ enable
+ enable
+
+
+
+
+
+
+
+
+
+
+
diff --git a/playground/BTree/Program.cs b/playground/BTree/Program.cs
new file mode 100644
index 00000000000..8c32ae30619
--- /dev/null
+++ b/playground/BTree/Program.cs
@@ -0,0 +1,186 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using Garnet.server;
+using Garnet.server.BTreeIndex;
+class Program
+{
+ ///
+ /// Playground for the B+tree index implementation
+ ///
+ ///
+ ///
+ static unsafe void Main(string[] args)
+ {
+ var tree = new BTree((uint)BTreeNode.PAGE_SIZE);
+ ulong N = 50000;
+ bool verbose = true;
+ if (args.Length > 0)
+ {
+ for (int i = 0; i < args.Length; i++)
+ {
+ if (args[i] == "--verb")
+ {
+ verbose = true;
+ }
+ else if (args[i] == "-N")
+ {
+ N = ulong.Parse(args[i + 1]);
+ break;
+ }
+ }
+ }
+ StreamID[] streamIDs = new StreamID[N];
+ long duration = 0;
+ long dur2 = 0;
+ for (ulong i = 0; i < N; i++)
+ {
+ StreamID x = new StreamID(i + 1, 0);
+ Debug.Assert(x.ms > 0);
+ streamIDs[i] = x;
+ }
+ long start = Stopwatch.GetTimestamp();
+ Stopwatch sw = new Stopwatch();
+ sw.Start();
+ for (ulong i = 0; i < N; i++)
+ {
+ tree.Insert((byte*)Unsafe.AsPointer(ref streamIDs[i].idBytes[0]), new Value(i + 1));
+ var value = tree.Get((byte*)Unsafe.AsPointer(ref streamIDs[i].idBytes[0]));
+ Debug.Assert(value.address == i + 1);
+ }
+ sw.Stop();
+ dur2 = sw.ElapsedTicks;
+ duration += Stopwatch.GetTimestamp() - start;
+ Console.WriteLine(" Number of Fast Inserts = " + tree.FastInserts);
+ double nanosecondsPerTick = (1_000_000_000.0) / Stopwatch.Frequency;
+ if (verbose)
+ {
+ Console.WriteLine("Insertion done");
+ Console.WriteLine(" Number of Fast Inserts = " + tree.FastInserts);
+ Console.WriteLine("Number of Leaves = " + tree.LeafCount);
+ Console.WriteLine("Number of Internal Nodes = " + tree.InternalCount);
+ Console.WriteLine("Time for insertion = " + (double)dur2 * nanosecondsPerTick + " ns");
+ }
+ long insertion_time = (long)(dur2 * nanosecondsPerTick);
+ sw.Reset();
+
+ // point lookups
+ sw.Start();
+ for (ulong i = 0; i < N; i++)
+ {
+ var value = tree.Get((byte*)Unsafe.AsPointer(ref streamIDs[i].idBytes[0]));
+ Debug.Assert(value.address == i + 1);
+ }
+ sw.Stop();
+ long query_time = (long)(sw.ElapsedTicks * nanosecondsPerTick);
+ if (verbose)
+ {
+ Console.WriteLine("Time for querying = " + query_time + " ns");
+ }
+ sw.Reset();
+ Console.WriteLine("All inserted keys found");
+
+ // forward range query
+ double[] selectivities = [0.01, 0.05, 0.1];
+ long[] range_query_times = new long[selectivities.Length];
+ Value[] startVal = new Value[selectivities.Length];
+ Value[] endVal = new Value[selectivities.Length];
+ List[] list = new List[selectivities.Length];
+ for (int i = 0; i < selectivities.Length; i++)
+ {
+ double selectivity = selectivities[i];
+ ulong startIdx, endIdx;
+ do
+ {
+ // get a random start index from 0 to N
+ startIdx = (ulong)new Random().Next(0, (int)N);
+ endIdx = (ulong)(startIdx + (N * selectivity));
+ } while (endIdx >= N);
+ sw.Start();
+ var count = tree.Get((byte*)Unsafe.AsPointer(ref streamIDs[startIdx].idBytes[0]), (byte*)Unsafe.AsPointer(ref streamIDs[endIdx].idBytes[0]), out startVal[i], out endVal[i], out list[i]);
+ Debug.Assert(count == (int)(endIdx - startIdx + 1));
+ sw.Stop();
+ range_query_times[i] = (long)(sw.ElapsedTicks * nanosecondsPerTick);
+ if (verbose)
+ {
+ Console.WriteLine("Time for range query " + (i + 1) + " = " + range_query_times[i] + " ns");
+ }
+ sw.Reset();
+ }
+ if (verbose)
+ Console.WriteLine("Range query check passed ");
+
+ sw.Start();
+ // now do a reverse range query from streamIDs[N-1] to streamIDs[N-500], but lim
+ int count_rev = tree.Get(
+ start: (byte*)Unsafe.AsPointer(ref streamIDs[N - 1].idBytes[0]), // start is ahead of end, but that is okay because we have reverse
+ end: (byte*)Unsafe.AsPointer(ref streamIDs[N - 500].idBytes[0]),
+ startVal: out Value startVal_rev,
+ endVal: out Value endVal_rev,
+ tombstones: out List tombstones_rev,
+ limit: 250,
+ reverse: true);
+
+ sw.Stop();
+ long reverse_query_time = (long)(sw.ElapsedTicks * nanosecondsPerTick);
+ if (verbose)
+ {
+ Console.WriteLine("Time for reverse range query = " + reverse_query_time + " ns");
+ }
+ Debug.Assert(count_rev == 250);
+ Debug.Assert(startVal_rev.address == N); // address for streamIDs[N-1] is N (since we inserted i+1)
+ Debug.Assert(endVal_rev.address == N - 249); // we go back 249 positions from N (limit 250 means 250 items: N, N-1, ..., N-249)
+ Console.WriteLine("Reverse range query check passed ");
+
+ // tree.TrimByID((byte*)Unsafe.AsPointer(ref streamIDs[500].idBytes[0]), out var validKeysRemoved, out var headValue, out var headValidKey, out var numLeavesDeleted);
+ // Console.WriteLine("Trimmed by ID: validKeysRemoved = " + validKeysRemoved);
+ // Console.WriteLine("num leaves deleted = " + numLeavesDeleted);
+
+ // tree.TrimByLength(2000, out var validKeysRemoved2, out var headValue2, out var headValidKey2, out var numLeavesDeleted2);
+ // Console.WriteLine("Trimmed by length: validKeysRemoved = " + validKeysRemoved2);
+ // Console.WriteLine("num leaves deleted = " + numLeavesDeleted2);
+
+ // now let's delete some keys
+ sw.Reset();
+ int num_deletes = 100;
+ int num_successfully_deleted = 0;
+ for (int i = 0; i < num_deletes; i++)
+ {
+ // generate a random index to delete
+ int idx = new Random().Next(0, (int)N);
+ sw.Start();
+ bool val = false;
+ // bool val = tree.Delete((byte*)Unsafe.AsPointer(ref streamIDs[idx].idBytes[0]));
+ sw.Stop();
+ if (val)
+ {
+ num_successfully_deleted++;
+ }
+ }
+ long deleteTime = (long)(sw.ElapsedTicks * nanosecondsPerTick);
+ if (verbose)
+ {
+ Console.WriteLine("Number of keys deleted = " + num_successfully_deleted);
+ Console.WriteLine("Time for deletion = " + deleteTime + " ns");
+ }
+
+ tree.Delete((byte*)Unsafe.AsPointer(ref streamIDs[N - 400].idBytes[0]));
+ tree.Delete((byte*)Unsafe.AsPointer(ref streamIDs[N - 300].idBytes[0]));
+ tree.Delete((byte*)Unsafe.AsPointer(ref streamIDs[N - 200].idBytes[0]));
+ tree.Delete((byte*)Unsafe.AsPointer(ref streamIDs[N - 100].idBytes[0]));
+
+ // do a range query to check again
+ tree.Get((byte*)Unsafe.AsPointer(ref streamIDs[N - 500].idBytes[0]), (byte*)Unsafe.AsPointer(ref streamIDs[N - 1].idBytes[0]), out Value startVal1, out Value endVal1, out List tombstones);
+ Debug.Assert(tombstones.Count == 4);
+ Console.WriteLine("Delete check passed ");
+
+ // print all times collected in a csv format
+ Console.WriteLine(insertion_time + ", " + query_time + ", " + range_query_times[0] + ", " + range_query_times[1] + ", " + range_query_times[2] + ", " + deleteTime);
+ tree.Deallocate();
+ Console.WriteLine("Num allocates = " + tree.stats.numAllocates);
+ Console.WriteLine("Num deallocates = " + tree.stats.numDeallocates);
+ Console.WriteLine("All checks passed");
+ }
+}
\ No newline at end of file
diff --git a/playground/CommandInfoUpdater/SupportedCommand.cs b/playground/CommandInfoUpdater/SupportedCommand.cs
index a1a61b79234..1bc47ec262d 100644
--- a/playground/CommandInfoUpdater/SupportedCommand.cs
+++ b/playground/CommandInfoUpdater/SupportedCommand.cs
@@ -311,6 +311,54 @@ public class SupportedCommand
new("WATCH", RespCommand.WATCH),
new("WATCHMS", RespCommand.WATCHMS),
new("WATCHOS", RespCommand.WATCHOS),
+ new("XADD", RespCommand.XADD),
+ new("XDEL", RespCommand.XDEL),
+ new("XLEN", RespCommand.XLEN),
+ new("XRANGE", RespCommand.XRANGE),
+ new ("XREVRANGE", RespCommand.XREVRANGE),
+ new("XTRIM", RespCommand.XTRIM),
+ new("ZADD", RespCommand.ZADD),
+ new("ZCARD", RespCommand.ZCARD),
+ new("ZCOUNT", RespCommand.ZCOUNT),
+ new("ZDIFF", RespCommand.ZDIFF),
+ new("ZDIFFSTORE", RespCommand.ZDIFFSTORE),
+ new("ZINCRBY", RespCommand.ZINCRBY),
+ new("ZINTER", RespCommand.ZINTER),
+ new("ZINTERCARD", RespCommand.ZINTERCARD),
+ new("ZINTERSTORE", RespCommand.ZINTERSTORE),
+ new("ZLEXCOUNT", RespCommand.ZLEXCOUNT),
+ new("ZMSCORE", RespCommand.ZMSCORE),
+ new("ZMPOP", RespCommand.ZMPOP),
+ new("ZPOPMAX", RespCommand.ZPOPMAX),
+ new("ZPOPMIN", RespCommand.ZPOPMIN),
+ new("ZRANDMEMBER", RespCommand.ZRANDMEMBER),
+ new("ZRANGE", RespCommand.ZRANGE),
+ new("ZRANGEBYLEX", RespCommand.ZRANGEBYLEX),
+ new("ZRANGEBYSCORE", RespCommand.ZRANGEBYSCORE),
+ new("ZRANGESTORE", RespCommand.ZRANGESTORE),
+ new("ZRANK", RespCommand.ZRANK),
+ new("ZREM", RespCommand.ZREM),
+ new("ZREMRANGEBYLEX", RespCommand.ZREMRANGEBYLEX),
+ new("ZREMRANGEBYRANK", RespCommand.ZREMRANGEBYRANK),
+ new("ZREMRANGEBYSCORE", RespCommand.ZREMRANGEBYSCORE),
+ new("ZREVRANGE", RespCommand.ZREVRANGE),
+ new("ZREVRANGEBYLEX", RespCommand.ZREVRANGEBYLEX),
+ new("ZREVRANGEBYSCORE", RespCommand.ZREVRANGEBYSCORE),
+ new("ZREVRANK", RespCommand.ZREVRANK),
+ new("ZSCAN", RespCommand.ZSCAN),
+ new("ZSCORE", RespCommand.ZSCORE),
+ new("ZEXPIRE", RespCommand.HEXPIRE),
+ new("ZPEXPIRE", RespCommand.HPEXPIRE),
+ new("ZEXPIREAT", RespCommand.HEXPIREAT),
+ new("ZPEXPIREAT", RespCommand.HPEXPIREAT),
+ new("ZTTL", RespCommand.HTTL),
+ new("ZPTTL", RespCommand.HPTTL),
+ new("ZEXPIRETIME", RespCommand.HEXPIRETIME),
+ new("ZPEXPIRETIME", RespCommand.HPEXPIRETIME),
+ new("ZPERSIST", RespCommand.HPERSIST),
+ new("ZCOLLECT", RespCommand.HPERSIST),
+ new("ZUNION", RespCommand.ZUNION),
+ new("ZUNIONSTORE", RespCommand.ZUNIONSTORE),
new("ZADD", RespCommand.ZADD, StoreType.Object),
new("ZCARD", RespCommand.ZCARD, StoreType.Object),
new("ZCOUNT", RespCommand.ZCOUNT, StoreType.Object),
diff --git a/test/Garnet.test/BTreeTests.cs b/test/Garnet.test/BTreeTests.cs
new file mode 100644
index 00000000000..8854d30fe15
--- /dev/null
+++ b/test/Garnet.test/BTreeTests.cs
@@ -0,0 +1,171 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using Garnet.server;
+using Garnet.server.BTreeIndex;
+using NUnit.Framework;
+using NUnit.Framework.Legacy;
+
+namespace Garnet.test
+{
+ using Value = Value;
+
+ [TestFixture]
+ public unsafe class BTreeTests
+ {
+ static StreamID[] streamIDs;
+ static ulong N = 50000;
+
+ [SetUp]
+ public void Setup()
+ {
+ streamIDs = new StreamID[N];
+ for (ulong i = 0; i < N; i++)
+ {
+ streamIDs[i] = new StreamID(i + 1, 0);
+ }
+ }
+
+ [TearDown]
+ public void TearDown()
+ { }
+
+ [Test]
+ [Category("INIT")]
+ public void InitBTreeLeafNode()
+ {
+ // var memoryBlock = (IntPtr*)Marshal.AllocHGlobal(BTreeNode.PAGE_SIZE).ToPointer();
+ var memoryBlock = (IntPtr*)NativeMemory.AlignedAlloc((nuint)BTreeNode.PAGE_SIZE, (nuint)BTreeNode.PAGE_SIZE);
+ var leaf = BTreeNode.Create(BTreeNodeType.Leaf, memoryBlock);
+ ClassicAssert.AreEqual(leaf->info->type, BTreeNodeType.Leaf);
+ ClassicAssert.AreEqual(leaf->info->count, 0);
+
+ // free the leaf
+ BTree.FreeNode(ref leaf);
+
+ leaf = null;
+ }
+
+ [Test]
+ [Category("INSERT")]
+ public void Insert()
+ {
+ var tree = new BTree((uint)BTreeNode.PAGE_SIZE);
+ ClassicAssert.AreEqual(tree.FastInserts, 0);
+ ClassicAssert.AreEqual(tree.LeafCount, 1);
+ ClassicAssert.AreEqual(tree.InternalCount, 0);
+
+ for (ulong i = 0; i < N; i++)
+ {
+ tree.Insert((byte*)Unsafe.AsPointer(ref streamIDs[i].idBytes[0]), new Value(i + 1));
+ }
+ ClassicAssert.AreEqual(tree.FastInserts, N);
+ tree.Deallocate();
+ }
+
+ [Test]
+ [Category("LOOKUP")]
+ public void PointLookup()
+ {
+ var tree = new BTree((uint)BTreeNode.PAGE_SIZE);
+
+ for (ulong i = 0; i < N; i++)
+ {
+ tree.Insert((byte*)Unsafe.AsPointer(ref streamIDs[i].idBytes[0]), new Value(streamIDs[i].ms));
+ }
+
+ for (ulong i = 0; i < N; i++)
+ {
+ ClassicAssert.AreEqual(tree.Get((byte*)Unsafe.AsPointer(ref streamIDs[i].idBytes[0])).address, streamIDs[i].ms);
+ }
+
+ tree.Deallocate();
+ }
+
+ [Test]
+ [Category("LOOKUP")]
+ public void RangeLookup()
+ {
+ var tree = new BTree(4096);
+
+ for (ulong i = 0; i < N; i++)
+ {
+ tree.Insert((byte*)Unsafe.AsPointer(ref streamIDs[i].idBytes[0]), new Value(streamIDs[i].ms));
+ }
+
+ int count = tree.Get((byte*)Unsafe.AsPointer(ref streamIDs[N - 200].idBytes[0]), (byte*)Unsafe.AsPointer(ref streamIDs[N - 1].idBytes[0]), out Value startVal, out Value endVal, out List list);
+ ClassicAssert.AreEqual(count, N - 1 - (N - 200) + 1);
+ ClassicAssert.AreEqual(list.Count, 0);
+ ClassicAssert.AreEqual(startVal.address, streamIDs[N - 200].ms);
+ ClassicAssert.AreEqual(endVal.address, streamIDs[N - 1].ms);
+
+ tree.Deallocate();
+ }
+
+ [Test]
+ [Category("Delete")]
+ public void Delete()
+ {
+ var tree = new BTree((uint)BTreeNode.PAGE_SIZE);
+ for (ulong i = 0; i < N; i++)
+ {
+ tree.Insert((byte*)Unsafe.AsPointer(ref streamIDs[i].idBytes[0]), new Value(streamIDs[i].ms));
+ }
+
+ // delete 10% of keys at random
+ Random rand = new Random();
+ uint delCount = 0;
+ for (ulong i = 0; i < N / 10; i++)
+ {
+ ulong idx = (ulong)rand.Next(0, (int)N);
+ bool deleted = tree.Delete((byte*)Unsafe.AsPointer(ref streamIDs[idx].idBytes[0]));
+ if (deleted)
+ {
+ delCount++;
+ }
+ }
+ ClassicAssert.AreEqual(tree.ValidCount, N - delCount);
+ tree.Deallocate();
+ }
+
+ [Test]
+ [Category("Trim")]
+ public void TrimByLength()
+ {
+ var tree = new BTree((uint)BTreeNode.PAGE_SIZE);
+ for (ulong i = 0; i < N; i++)
+ {
+ tree.Insert((byte*)Unsafe.AsPointer(ref streamIDs[i].idBytes[0]), new Value(streamIDs[i].ms));
+ }
+
+ var trimLength = 5000; // trim the tree to half its size
+ tree.TrimByLength((ulong)trimLength, out var validKeysRemoved, out var headValue, out var headValidKey, out var numLeavesDeleted);
+ var validKeysRemaining = tree.RootValidCount + tree.TailValidCount;
+ ClassicAssert.GreaterOrEqual(validKeysRemaining, trimLength);
+
+ tree.Deallocate();
+ }
+
+ [Test]
+ [Category("TrimByID")]
+ public void TrimByID()
+ {
+ var tree = new BTree((uint)BTreeNode.PAGE_SIZE);
+ for (ulong i = 0; i < N; i++)
+ {
+ tree.Insert((byte*)Unsafe.AsPointer(ref streamIDs[i].idBytes[0]), new Value(streamIDs[i].ms));
+ }
+
+ var streamIDToTrim = streamIDs[N - 1000];
+ tree.TrimByID((byte*)Unsafe.AsPointer(ref streamIDToTrim.idBytes[0]), out var validKeysRemoved, out var headValue, out var headValidKey, out var numLeavesDeleted);
+ var validKeysRemaining = tree.RootValidCount + tree.TailValidCount;
+ ClassicAssert.GreaterOrEqual((ulong)validKeysRemaining, N - validKeysRemoved);
+
+ tree.Deallocate();
+ }
+ }
+}
diff --git a/test/Garnet.test/Resp/ACL/RespCommandTests.cs b/test/Garnet.test/Resp/ACL/RespCommandTests.cs
index 3b02ac68853..e94996dae53 100644
--- a/test/Garnet.test/Resp/ACL/RespCommandTests.cs
+++ b/test/Garnet.test/Resp/ACL/RespCommandTests.cs
@@ -34,7 +34,7 @@ public void Setup()
TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true);
server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, defaultPassword: DefaultPassword,
useAcl: true, enableLua: true,
- enableModuleCommand: Garnet.server.Auth.Settings.ConnectionProtectionOption.Yes);
+ enableModuleCommand: Garnet.server.Auth.Settings.ConnectionProtectionOption.Yes, enableStreams: true);
// Register custom commands so we can test ACL'ing them
ClassicAssert.IsTrue(TestUtils.TryGetCustomCommandsInfo(out respCustomCommandsInfo));
@@ -6452,6 +6452,87 @@ static async Task DoGeoSearchStoreAsync(GarnetClient client)
}
}
+ [Test]
+ public async Task XADDACLsAsync()
+ {
+ int count = 0;
+ await CheckCommandsAsync(
+ "XADD",
+ [DoXAddAsync]
+ );
+
+ async Task DoXAddAsync(GarnetClient client)
+ {
+ string val = await client.ExecuteForStringResultAsync("XADD", ["foo", "*", $"bar--{count}", "fizz"]);
+ ClassicAssert.IsNotNull(val);
+ }
+ }
+
+ [Test]
+ public async Task XLENACLsAsync()
+ {
+ await CheckCommandsAsync(
+ "XLEN",
+ [DoXLenAsync]
+ );
+
+ async Task DoXLenAsync(GarnetClient client)
+ {
+ long val = await client.ExecuteForLongResultAsync("XLEN", ["foo"]);
+ ClassicAssert.AreEqual(0, val);
+ }
+ }
+
+ [Test]
+ public async Task XRangeACLsAsync()
+ {
+ await CheckCommandsAsync(
+ "XRANGE",
+ [DoXRangeAsync]
+ );
+
+ async Task DoXRangeAsync(GarnetClient client)
+ {
+ var val = await client.ExecuteForStringArrayResultAsync("XRANGE", ["foo", "-", "+"]);
+ ClassicAssert.AreEqual(0, val.Length);
+ }
+ }
+
+ [Test]
+ public async Task XDELACLsAsync()
+ {
+ await CheckCommandsAsync(
+ "XDEL",
+ [DoXDelAsync]
+ );
+
+ async Task DoXDelAsync(GarnetClient client)
+ {
+ long val = await client.ExecuteForLongResultAsync("XDEL", ["foo", "1"]);
+ ClassicAssert.AreEqual(0, val);
+ }
+ }
+
+ [Test]
+ public async Task XTRIMACLsAsync()
+ {
+ await CheckCommandsAsync(
+ "XTRIM",
+ [DoXTrimMinIDAsync, DoXTrimMaxLenAsync]
+ );
+ async Task DoXTrimMinIDAsync(GarnetClient client)
+ {
+ long val = await client.ExecuteForLongResultAsync("XTRIM", ["foo", "MINID", "0-0"]);
+ ClassicAssert.AreEqual(0, val);
+ }
+
+ async Task DoXTrimMaxLenAsync(GarnetClient client)
+ {
+ long val = await client.ExecuteForLongResultAsync("XTRIM", ["foo", "MAXLEN", "0"]);
+ ClassicAssert.AreEqual(0, val);
+ }
+ }
+
[Test]
public async Task ZAddACLsAsync()
{
diff --git a/test/Garnet.test/RespStreamTests.cs b/test/Garnet.test/RespStreamTests.cs
new file mode 100644
index 00000000000..623d9af81ac
--- /dev/null
+++ b/test/Garnet.test/RespStreamTests.cs
@@ -0,0 +1,916 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading.Tasks;
+using NUnit.Framework;
+using NUnit.Framework.Legacy;
+using StackExchange.Redis;
+
+namespace Garnet.test
+{
+ [TestFixture]
+ public class RespStreamTests
+ {
+ protected GarnetServer server;
+ const string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+ Random random;
+ static ulong N = 5;
+
+ [SetUp]
+ public void Setup()
+ {
+ TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true);
+ server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, lowMemory: true, enableStreams: true);
+ server.Start();
+ random = new Random();
+
+ // write to one stream to test for range scans
+ var streamKey = "rangeScan";
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ for (ulong i = 0; i < N; i++)
+ {
+ var entryKey = GenerateRandomString(4); // generate random ascii string of length 4
+ var entryValue = GenerateRandomString(4); // generate random ascii string of length 4
+ var retId = db.StreamAdd(streamKey, entryKey, entryValue);
+ }
+ }
+
+ [TearDown]
+ public void TearDown()
+ {
+ server.Dispose();
+ TestUtils.DeleteDirectory(TestUtils.MethodTestDir);
+ }
+
+ public string GenerateRandomString(int length)
+ {
+ return new string(Enumerable.Repeat(chars, length)
+ .Select(s => s[random.Next(s.Length)]).ToArray());
+ }
+
+ #region STREAMIDTests
+ [Test]
+ public void StreamAddAutoGenIdTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+
+ var streamKey = "add";
+ var entryKey = GenerateRandomString(4); // generate random ascii string of length 4
+ var entryValue = GenerateRandomString(4); // generate random ascii string of length 4
+ var retId = db.StreamAdd(streamKey, entryKey, entryValue);
+ ClassicAssert.IsTrue(retId.ToString().Contains("-"));
+ }
+
+ [Test]
+ public void StreamAddUserDefinedTsTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+
+ var streamKey = "addTs";
+ var entryKey = GenerateRandomString(4); // generate random ascii string of length 4
+ var entryValue = GenerateRandomString(4); // generate random ascii string of length 4
+ var retId = db.StreamAdd(streamKey, entryKey, entryValue, $"{1}");
+ ClassicAssert.IsTrue(retId.ToString().Contains("-"));
+ }
+
+ [Test]
+ public void StreamAddUserDefinedIdTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+
+ var streamKey = "addId";
+ var entryKey = GenerateRandomString(4); // generate random ascii string of length 4
+ var entryValue = GenerateRandomString(4); // generate random ascii string of length 4
+ var retId = db.StreamAdd(streamKey, entryKey, entryValue, $"{1}-0");
+ ClassicAssert.IsTrue(retId.ToString().Contains("-"));
+ }
+ #endregion
+
+ #region STREAMOperationsTests
+ [Test]
+ public void StreamAddAndLengthTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+
+ var streamKey = "length";
+ var count = 0;
+ for (ulong i = 0; i < N; i++)
+ {
+ var entryKey = GenerateRandomString(4); // generate random ascii string of length 4
+ var entryValue = GenerateRandomString(4); // generate random ascii string of length 4
+ var retId = db.StreamAdd(streamKey, entryKey, entryValue);
+ count++;
+ }
+ ClassicAssert.AreEqual(count, N);
+
+ var length = db.StreamLength(streamKey);
+ ClassicAssert.AreEqual(length, N);
+ }
+
+ [Test]
+ public void StreamRangeExistingTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var streamKey = "rangeScan";
+ var range = db.StreamRange(streamKey, "-", "+");
+ ClassicAssert.AreEqual(range.Length, N);
+ }
+
+ [Test]
+ public void StreamRangeNonExistingTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var streamKey = "nonExistingRangeScan";
+ var range = db.StreamRange(streamKey, "-", "+");
+ ClassicAssert.AreEqual(range.Length, 0);
+ }
+
+ [Test]
+ public void StreamRangeWithCountTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var streamKey = "rangeScan";
+ int limit = 2;
+ var range = db.StreamRange(streamKey, "-", "+", limit);
+ ClassicAssert.AreEqual(range.Length, limit);
+ }
+
+ [Test]
+ public void StreamMultipleValuesTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+
+ var streamKey = "x1";
+ var entryKey = GenerateRandomString(4); // generate random ascii string of length 4
+ var entryValue = GenerateRandomString(4); // generate random ascii string of length 4
+ var retId = db.StreamAdd(streamKey, entryKey, entryValue, $"{1}-0"); // currently all 3 pairs are put on the log. each pair is ctrl metadata followed by string atm
+
+ var res = db.StreamRange(streamKey, "-", "+");
+ ClassicAssert.AreEqual(res.Length, 1);
+ foreach (var entry in res)
+ {
+ ClassicAssert.AreEqual(entry.Id.ToString(), retId.ToString());
+ ClassicAssert.AreEqual(entry.Values.Length, 1);
+ ClassicAssert.AreEqual(entry.Values[0].Name.ToString(), entryKey);
+ ClassicAssert.AreEqual(entry.Values[0].Value.ToString(), entryValue);
+ }
+
+ var delCount = db.StreamDelete(streamKey, [retId]);
+ ClassicAssert.AreEqual(delCount, 1);
+
+ // just for messing around, let's add multiple key value pairs for this id?
+ retId = db.StreamAdd(streamKey, [new NameValueEntry("field1", "value1"), new NameValueEntry("field2", "value2")], messageId: $"{2}-0");
+
+ // check if all the values are there
+ res = db.StreamRange(streamKey, "-", "+");
+ ClassicAssert.AreEqual(res.Length, 1);
+ foreach (var entry in res)
+ {
+ ClassicAssert.AreEqual(entry.Id.ToString(), retId.ToString());
+ ClassicAssert.AreEqual(entry.Values.Length, 2);
+ ClassicAssert.AreEqual(entry.Values[0].Name.ToString(), "field1");
+ ClassicAssert.AreEqual(entry.Values[0].Value.ToString(), "value1");
+ ClassicAssert.AreEqual(entry.Values[1].Name.ToString(), "field2");
+ ClassicAssert.AreEqual(entry.Values[1].Value.ToString(), "value2");
+ }
+ }
+
+ [Test]
+ public void StreamDeleteSingleTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+
+ var streamKey = "delOne";
+ var entryKey = GenerateRandomString(4); // generate random ascii string of length 4
+ var entryValue = GenerateRandomString(4); // generate random ascii string of length 4
+ var retId = db.StreamAdd(streamKey, entryKey, entryValue, $"{1}-0");
+
+ var delCount = db.StreamDelete(streamKey, [retId]);
+ ClassicAssert.AreEqual(delCount, 1);
+ }
+
+ [Test]
+ [Category("Delete")]
+ public void StreamDeleteMultipleTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+
+ var streamKey = "delMultiple";
+ var count = 0;
+ for (ulong i = 0; i < N; i++)
+ {
+ var entryKey = GenerateRandomString(4); // generate random ascii string of length 4
+ var entryValue = GenerateRandomString(4); // generate random ascii string of length 4
+ var retId = db.StreamAdd(streamKey, entryKey, entryValue, $"{i + 1}-0");
+ count++;
+ }
+ ClassicAssert.AreEqual(count, N);
+
+ // Pick arbitrary 2 unique indices between 0 and N and store each index in a set
+ int numToDelete = 2;
+ var indices = new HashSet();
+ while (indices.Count < numToDelete)
+ {
+ indices.Add(random.Next(0, (int)N));
+ }
+
+ var eIds = new RedisValue[numToDelete];
+ int c = 0;
+ foreach (var idx in indices)
+ {
+ eIds[c++] = $"{idx + 1}-0";
+ }
+
+ var delCount = db.StreamDelete(streamKey, eIds);
+ ClassicAssert.AreEqual(delCount, indices.Count);
+ }
+
+ [Test]
+ [Category("Trim")]
+ public void StreamTrimMaxLenTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+
+ var streamKey = "trimByMaxLen";
+ long count = 500;
+ for (long i = 0; i < count; i++)
+ {
+ var entryKey = GenerateRandomString(4); // generate random ascii string of length 4
+ var entryValue = GenerateRandomString(4); // generate random ascii string of length 4
+ var retId = db.StreamAdd(streamKey, entryKey, entryValue, $"{i + 1}-0");
+ }
+ var maxLen = 100;
+ var trimCount = db.StreamTrim(streamKey, maxLen);
+ ClassicAssert.GreaterOrEqual(trimCount, 1);
+ ClassicAssert.GreaterOrEqual(count - trimCount, maxLen);
+ }
+
+ [Test]
+ [Category("Trim")]
+ public void StreamTrimFullTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+
+ var streamKey = "trimmer";
+ long count = 1000;
+ string[] ids = new string[count];
+ for (long i = 0; i < count; i++)
+ {
+ var entryKey = GenerateRandomString(4); // generate random ascii string of length 4
+ var entryValue = GenerateRandomString(4); // generate random ascii string of length 4
+ RedisValue retId = db.StreamAdd(streamKey, entryKey, entryValue, $"{i + 1}-0");
+ ids[i] = retId.ToString();
+ }
+
+ // Trim in random steps from 1-150 until we have 0 entries
+ long currentLength = count;
+ var random = new Random(42); // Fixed seed for reproducibility
+
+ while (currentLength > 0)
+ {
+ // Determine how many to keep (trim to this length)
+ long trimAmount = random.Next(1, Math.Min(151, (int)currentLength + 1));
+ long newLength = Math.Max(0, currentLength - trimAmount);
+
+ // Verify stream length before trim
+ long lengthBefore = db.StreamLength(streamKey);
+ ClassicAssert.AreEqual(currentLength, lengthBefore, "Stream length mismatch before trim");
+
+ // Get first and last entries before trim
+ var rangeBefore = db.StreamRange(streamKey, "-", "+");
+ var firstIdBefore = rangeBefore.Length > 0 ? rangeBefore[0].Id.ToString() : null;
+ var lastIdBefore = rangeBefore.Length > 0 ? rangeBefore[rangeBefore.Length - 1].Id.ToString() : null;
+
+ // Perform the trim
+ long trimmed = db.StreamTrim(streamKey, newLength);
+
+ // Verify the correct number of entries were trimmed
+ ClassicAssert.AreEqual(trimAmount, trimmed, $"Expected to trim {trimAmount} entries");
+
+ // Verify new stream length
+ long lengthAfter = db.StreamLength(streamKey);
+ ClassicAssert.AreEqual(newLength, lengthAfter, "Stream length mismatch after trim");
+
+ if (newLength > 0)
+ {
+ // Get first and last entries after trim
+ var rangeAfter = db.StreamRange(streamKey, "-", "+");
+ ClassicAssert.AreEqual(newLength, rangeAfter.Length, "Range length should match stream length");
+
+ var firstIdAfter = rangeAfter[0].Id.ToString();
+ var lastIdAfter = rangeAfter[rangeAfter.Length - 1].Id.ToString();
+
+ // First entry should have changed (oldest entries were removed)
+ ClassicAssert.AreNotEqual(firstIdBefore, firstIdAfter, "First entry should change after trim");
+
+ // Last entry should remain the same (we keep newest entries)
+ ClassicAssert.AreEqual(lastIdBefore, lastIdAfter, "Last entry should not change after trim");
+
+ // Verify the new first entry is from the expected position in ids array
+ long expectedFirstIndex = count - newLength;
+ string expectedFirstId = ids[expectedFirstIndex];
+ ClassicAssert.AreEqual(expectedFirstId, firstIdAfter, "First entry ID should match expected from ids array");
+
+ // Verify the last entry is still the original last entry from ids array
+ string expectedLastId = ids[count - 1];
+ ClassicAssert.AreEqual(expectedLastId, lastIdAfter, "Last entry should still be the original last from ids array");
+ }
+
+ currentLength = newLength;
+ }
+
+ // Final verification: stream should be empty or minimal
+ long finalLength = db.StreamLength(streamKey);
+ ClassicAssert.AreEqual(0, finalLength, "Stream should be empty at the end");
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ [Category("XRANGE_XREVRANGE")]
+ public void StreamRangeAndRevRangeTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var streamKey = "rangeScan1";
+
+ // Store field-value pairs for verification
+ var expectedEntries = new List<(string field, string value)>();
+
+ // first add to stream with known field-value pairs
+ for (int i = 0; i < 10; i++)
+ {
+ var entryKey = $"field{i}";
+ var entryValue = $"value{i}";
+ expectedEntries.Add((entryKey, entryValue));
+ var retId = db.StreamAdd(streamKey, entryKey, entryValue);
+ }
+
+ // Full range tests
+ var range = db.StreamRange(streamKey, "-", "+");
+ ClassicAssert.AreEqual(range.Length, 10);
+ // Verify forward range is in ascending order
+ for (int i = 0; i < range.Length - 1; i++)
+ {
+ ClassicAssert.IsTrue(string.Compare(range[i].Id.ToString(), range[i + 1].Id.ToString()) < 0);
+ }
+ // Verify field-value pairs are in expected order
+ for (int i = 0; i < range.Length; i++)
+ {
+ ClassicAssert.AreEqual(1, range[i].Values.Length);
+ ClassicAssert.AreEqual(expectedEntries[i].field, (string)range[i].Values[0].Name);
+ ClassicAssert.AreEqual(expectedEntries[i].value, (string)range[i].Values[0].Value);
+ }
+
+ var revRange = db.StreamRange(streamKey, "-", "+", messageOrder: Order.Descending);
+ ClassicAssert.AreEqual(revRange.Length, 10);
+ // Verify reverse range is in descending order
+ for (int i = 0; i < revRange.Length - 1; i++)
+ {
+ ClassicAssert.IsTrue(string.Compare(revRange[i].Id.ToString(), revRange[i + 1].Id.ToString()) > 0);
+ }
+ // Verify field-value pairs are in reversed order
+ for (int i = 0; i < revRange.Length; i++)
+ {
+ ClassicAssert.AreEqual(1, revRange[i].Values.Length);
+ var expectedIndex = revRange.Length - 1 - i;
+ ClassicAssert.AreEqual(expectedEntries[expectedIndex].field, (string)revRange[i].Values[0].Name);
+ ClassicAssert.AreEqual(expectedEntries[expectedIndex].value, (string)revRange[i].Values[0].Value);
+ }
+
+ // Verify reverse range has same IDs as forward range (just reversed)
+ for (int i = 0; i < range.Length; i++)
+ {
+ ClassicAssert.AreEqual(range[i].Id, revRange[range.Length - 1 - i].Id);
+ }
+
+ // Partial range tests
+ var startId = range[2].Id;
+ var endId = range[5].Id;
+ var partialRange = db.StreamRange(streamKey, startId, endId);
+ ClassicAssert.AreEqual(partialRange.Length, 4);
+ // Verify partial range starts and ends with correct IDs
+ ClassicAssert.AreEqual(partialRange[0].Id, startId);
+ ClassicAssert.AreEqual(partialRange[3].Id, endId);
+ // Verify entries match the corresponding entries from full range
+ for (int i = 0; i < 4; i++)
+ {
+ ClassicAssert.AreEqual(partialRange[i].Id, range[2 + i].Id);
+ // Verify field-value pairs match expected
+ ClassicAssert.AreEqual(expectedEntries[2 + i].field, (string)partialRange[i].Values[0].Name);
+ ClassicAssert.AreEqual(expectedEntries[2 + i].value, (string)partialRange[i].Values[0].Value);
+ }
+
+ // reverse partial range
+ var partialRevRange = db.StreamRange(streamKey, startId, endId, messageOrder: Order.Descending);
+ ClassicAssert.AreEqual(partialRevRange.Length, 4);
+ // Verify reverse partial range is reversed
+ for (int i = 0; i < 4; i++)
+ {
+ ClassicAssert.AreEqual(partialRevRange[i].Id, partialRange[3 - i].Id);
+ // Verify field-value pairs are reversed
+ ClassicAssert.AreEqual(expectedEntries[5 - i].field, (string)partialRevRange[i].Values[0].Name);
+ ClassicAssert.AreEqual(expectedEntries[5 - i].value, (string)partialRevRange[i].Values[0].Value);
+ }
+
+ // limit tests
+ int limit = 3;
+ var limitedRange = db.StreamRange(streamKey, "-", "+", limit);
+ ClassicAssert.AreEqual(limitedRange.Length, limit);
+ // Verify limited range returns first N entries
+ for (int i = 0; i < limit; i++)
+ {
+ ClassicAssert.AreEqual(limitedRange[i].Id, range[i].Id);
+ // Verify field-value pairs match expected
+ ClassicAssert.AreEqual(expectedEntries[i].field, (string)limitedRange[i].Values[0].Name);
+ ClassicAssert.AreEqual(expectedEntries[i].value, (string)limitedRange[i].Values[0].Value);
+ }
+
+ // reverse limit tests
+ var limitedRevRange = db.StreamRange(streamKey, "-", "+", limit, messageOrder: Order.Descending);
+ ClassicAssert.AreEqual(limitedRevRange.Length, limit);
+ // Verify limited reverse range returns last N entries in reverse order
+ for (int i = 0; i < limit; i++)
+ {
+ ClassicAssert.AreEqual(limitedRevRange[i].Id, range[range.Length - 1 - i].Id);
+ // Verify field-value pairs match expected (from end, reversed)
+ var expectedIndex = range.Length - 1 - i;
+ ClassicAssert.AreEqual(expectedEntries[expectedIndex].field, (string)limitedRevRange[i].Values[0].Name);
+ ClassicAssert.AreEqual(expectedEntries[expectedIndex].value, (string)limitedRevRange[i].Values[0].Value);
+ }
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamLastBasicTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServers()[0];
+
+ var streamKey = "lastBasic";
+
+ // Add entries with known field-value pairs
+ var id1 = db.StreamAdd(streamKey, "field1", "value1", messageId: "1-0");
+ var id2 = db.StreamAdd(streamKey, "field2", "value2", messageId: "2-0");
+ var id3 = db.StreamAdd(streamKey, "field3", "value3", messageId: "3-0");
+
+ // Execute XLAST
+ var result = server.Execute("XLAST", streamKey);
+ var lastEntry = (RedisResult[])result;
+
+ // Verify the last entry
+ ClassicAssert.AreEqual(2, lastEntry.Length);
+ ClassicAssert.AreEqual("3-0", (string)lastEntry[0]);
+
+ var values = (RedisResult[])lastEntry[1];
+ ClassicAssert.AreEqual(2, values.Length);
+ ClassicAssert.AreEqual("field3", (string)values[0]);
+ ClassicAssert.AreEqual("value3", (string)values[1]);
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamLastMultipleFieldsTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServers()[0];
+
+ var streamKey = "lastMultiFields";
+
+ // Add an entry with multiple field-value pairs
+ var id = db.StreamAdd(streamKey, [
+ new NameValueEntry("field1", "value1"),
+ new NameValueEntry("field2", "value2"),
+ new NameValueEntry("field3", "value3")
+ ], messageId: "100-0");
+
+ // Execute XLAST
+ var result = server.Execute("XLAST", streamKey);
+ var lastEntry = (RedisResult[])result;
+
+ // Verify the last entry
+ ClassicAssert.AreEqual(2, lastEntry.Length);
+ ClassicAssert.AreEqual("100-0", (string)lastEntry[0]);
+
+ var values = (RedisResult[])lastEntry[1];
+ ClassicAssert.AreEqual(6, values.Length); // 3 fields * 2 (name + value)
+ ClassicAssert.AreEqual("field1", (string)values[0]);
+ ClassicAssert.AreEqual("value1", (string)values[1]);
+ ClassicAssert.AreEqual("field2", (string)values[2]);
+ ClassicAssert.AreEqual("value2", (string)values[3]);
+ ClassicAssert.AreEqual("field3", (string)values[4]);
+ ClassicAssert.AreEqual("value3", (string)values[5]);
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamLastEmptyStreamTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var server = redis.GetServers()[0];
+
+ var streamKey = "lastEmpty";
+
+ // Execute XLAST on non-existent stream
+ var result = server.Execute("XLAST", streamKey);
+
+ // Should return empty array
+ ClassicAssert.IsTrue(result.IsNull || ((RedisResult[])result).Length == 0);
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamLastSingleEntryTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServers()[0];
+
+ var streamKey = "lastSingle";
+
+ // Add only one entry
+ var id = db.StreamAdd(streamKey, "onlyField", "onlyValue");
+
+ // Execute XLAST
+ var result = server.Execute("XLAST", streamKey);
+ var lastEntry = (RedisResult[])result;
+
+ // Verify it returns the only entry
+ ClassicAssert.AreEqual(2, lastEntry.Length);
+ ClassicAssert.AreEqual((string)id, (string)lastEntry[0]);
+
+ var values = (RedisResult[])lastEntry[1];
+ ClassicAssert.AreEqual(2, values.Length);
+ ClassicAssert.AreEqual("onlyField", (string)values[0]);
+ ClassicAssert.AreEqual("onlyValue", (string)values[1]);
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamLastAfterDeleteTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServers()[0];
+
+ var streamKey = "lastAfterDelete";
+
+ // Add multiple entries
+ var id1 = db.StreamAdd(streamKey, "field1", "value1", messageId: "1-0");
+ var id2 = db.StreamAdd(streamKey, "field2", "value2", messageId: "2-0");
+ var id3 = db.StreamAdd(streamKey, "field3", "value3", messageId: "3-0");
+
+ // Delete the last entry
+ db.StreamDelete(streamKey, [id3]);
+
+ // Execute XLAST
+ var result = server.Execute("XLAST", streamKey);
+ var lastEntry = (RedisResult[])result;
+
+ // Should return the second entry as it's now the last
+ ClassicAssert.AreEqual(2, lastEntry.Length);
+ ClassicAssert.AreEqual("2-0", (string)lastEntry[0]);
+
+ var values = (RedisResult[])lastEntry[1];
+ ClassicAssert.AreEqual(2, values.Length);
+ ClassicAssert.AreEqual("field2", (string)values[0]);
+ ClassicAssert.AreEqual("value2", (string)values[1]);
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamLastAfterMultipleDeletesTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServers()[0];
+
+ var streamKey = "lastAfterMultipleDeletes";
+
+ // Add multiple entries
+ for (int i = 0; i < 1000; i++)
+ {
+ db.StreamAdd(streamKey, $"field{i}", $"value{i}", messageId: $"{i}-0");
+ }
+
+ for (int i = 999; i > -1; i--)
+ {
+ var result = server.Execute("XLAST", streamKey);
+ var lastEntry = (RedisResult[])result;
+ // Should return the second entry as it's now the last
+ ClassicAssert.AreEqual(2, lastEntry.Length);
+ ClassicAssert.AreEqual($"{i}-0", (string)lastEntry[0]);
+ var values = (RedisResult[])lastEntry[1];
+ ClassicAssert.AreEqual(2, values.Length);
+ ClassicAssert.AreEqual($"field{i}", (string)values[0]);
+ ClassicAssert.AreEqual($"value{i}", (string)values[1]);
+
+ // delete it so next time we get the new last
+ db.StreamDelete(streamKey, [$"{i}-0"]);
+ }
+
+ // Finally, the stream should be empty
+ var finalResult = server.Execute("XLAST", streamKey);
+ ClassicAssert.IsTrue(finalResult.IsNull || ((RedisResult[])finalResult).Length == 0);
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamLastAfterTrimTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServers()[0];
+
+ var streamKey = "lastAfterTrim";
+
+ // Add multiple entries
+ for (int i = 0; i < 10; i++)
+ {
+ db.StreamAdd(streamKey, $"field{i}", $"value{i}");
+ }
+
+ // Trim to keep only 3 entries
+ db.StreamTrim(streamKey, 3);
+
+ // Execute XLAST
+ var result = server.Execute("XLAST", streamKey);
+ var lastEntry = (RedisResult[])result;
+
+ // Should return the last entry after trim
+ ClassicAssert.AreEqual(2, lastEntry.Length);
+ var values = (RedisResult[])lastEntry[1];
+ ClassicAssert.AreEqual(2, values.Length);
+ ClassicAssert.AreEqual("field9", (string)values[0]);
+ ClassicAssert.AreEqual("value9", (string)values[1]);
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public async Task StreamLastAutoGeneratedIdTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServers()[0];
+
+ var streamKey = "lastAutoId";
+
+ // Add entries with auto-generated IDs
+ db.StreamAdd(streamKey, "field1", "value1");
+ await Task.Delay(1);
+ db.StreamAdd(streamKey, "field2", "value2");
+ await Task.Delay(1);
+ var lastId = db.StreamAdd(streamKey, "field3", "value3");
+
+ // Execute XLAST
+ var result = server.Execute("XLAST", streamKey);
+ var lastEntry = (RedisResult[])result;
+
+ // Verify it returns the last entry with the correct auto-generated ID
+ ClassicAssert.AreEqual(2, lastEntry.Length);
+ ClassicAssert.AreEqual(lastId.ToString(), (string)lastEntry[0]);
+
+ var values = (RedisResult[])lastEntry[1];
+ ClassicAssert.AreEqual(2, values.Length);
+ ClassicAssert.AreEqual("field3", (string)values[0]);
+ ClassicAssert.AreEqual("value3", (string)values[1]);
+ }
+
+
+ #endregion
+
+ #region StreamCompatabilityTests
+ // check if common things like KEYS, and SCAN work with streams
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamKeysCommandTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServer(TestUtils.GetConfig().EndPoints[0]);
+
+ // Create some streams and other types
+ db.StreamAdd("stream:1", "field1", "value1");
+ db.StreamAdd("stream:2", "field2", "value2");
+ db.StreamAdd("stream:3", "field3", "value3");
+ db.StringSet("string:1", "value");
+ db.HashSet("hash:1", "field", "value");
+
+ // Test KEYS with pattern matching all
+ var allKeys = server.Keys(pattern: "*").ToArray();
+ ClassicAssert.GreaterOrEqual(allKeys.Length, 5);
+
+ // Test KEYS with stream pattern
+ var streamKeys = server.Keys(pattern: "stream:*").ToArray();
+
+ ClassicAssert.AreEqual(3, streamKeys.Length);
+ ClassicAssert.IsTrue(streamKeys.Any(k => k == "stream:1"));
+ ClassicAssert.IsTrue(streamKeys.Any(k => k == "stream:2"));
+ ClassicAssert.IsTrue(streamKeys.Any(k => k == "stream:3"));
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamScanCommandTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServer(TestUtils.GetConfig().EndPoints[0]);
+
+ // Create multiple streams
+ for (int i = 0; i < 20; i++)
+ {
+ db.StreamAdd($"scan:stream:{i}", "field", "value");
+ }
+
+ // Scan and collect all keys
+ var scannedKeys = new HashSet();
+ var cursor = 0L;
+ var iterations = 0;
+ var maxIterations = 100; // Safety limit
+
+ do
+ {
+ var result = server.Execute("SCAN", cursor.ToString(), "MATCH", "scan:stream:*", "COUNT", "5");
+ var scanResult = (RedisResult[])result;
+
+ cursor = long.Parse((string)scanResult[0]);
+ var keys = (RedisResult[])scanResult[1];
+
+ foreach (var key in keys)
+ {
+ ClassicAssert.IsTrue(scannedKeys.Add((string)key), "Did not expect a duplicate");
+ }
+
+ iterations++;
+ } while (cursor != 0 && iterations < maxIterations);
+
+ // Verify cursor eventually returns to 0
+ ClassicAssert.AreEqual(0, cursor, "SCAN cursor should eventually return to 0");
+
+ // Verify all keys were found
+ ClassicAssert.AreEqual(20, scannedKeys.Count, "All stream keys should be returned by SCAN");
+
+ // Verify specific keys exist
+ for (int i = 0; i < 20; i++)
+ {
+ ClassicAssert.IsTrue(scannedKeys.Contains($"scan:stream:{i}"), $"Key scan:stream:{i} should be in results");
+ }
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamScanWithCountTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServer(TestUtils.GetConfig().EndPoints[0]);
+
+ // Create streams
+ for (int i = 0; i < 50; i++)
+ {
+ db.StreamAdd($"count:stream:{i}", "field", "value");
+ }
+
+ var cursor = 0L;
+ var result = server.Execute("SCAN", cursor.ToString(), "MATCH", "count:stream:*", "COUNT", "10");
+ var scanResult = (RedisResult[])result;
+
+ var newCursor = long.Parse((string)scanResult[0]);
+ var keys = (RedisResult[])scanResult[1];
+
+ // COUNT is a hint, not a guarantee, but we should get some results
+ ClassicAssert.GreaterOrEqual(keys.Length, 1, "Should return at least one key");
+ // With 50 keys and COUNT 10, we shouldn't return all keys in one scan
+ ClassicAssert.Less(keys.Length, 50, "Should not return all keys in single scan with small COUNT");
+ }
+
+ [Ignore("Havent fixed this yet")]
+ [Test]
+ public void StreamScanByTypeTest()
+ {
+ using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig());
+ var db = redis.GetDatabase(0);
+ var server = redis.GetServer(TestUtils.GetConfig().EndPoints[0]);
+
+ // Create streams and other types with same prefix
+ db.StreamAdd("type:stream:1", "field", "value");
+ db.StreamAdd("type:stream:2", "field", "value");
+ db.StringSet("type:string:1", "value");
+ db.HashSet("type:hash:1", "field", "value");
+ db.ListRightPush("type:list:1", "value");
+
+ // Scan for all keys with type prefix
+ var allKeys = new HashSet();
+ var cursor = 0L;
+
+ do
+ {
+ var result = server.Execute("SCAN", cursor.ToString(), "MATCH", "type:*");
+ var scanResult = (RedisResult[])result;
+ cursor = long.Parse((string)scanResult[0]);
+ var keys = (RedisResult[])scanResult[1];
+
+ foreach (var key in keys)
+ {
+ allKeys.Add((string)key);
+ }
+ } while (cursor != 0);
+
+ // Should find all 5 keys
+ ClassicAssert.AreEqual(5, allKeys.Count);
+
+ // Now scan with TYPE filter for streams only
+ var streamKeys = new HashSet();
+ cursor = 0L;
+
+ do
+ {
+ var result = server.Execute("SCAN", cursor.ToString(), "MATCH", "type:*", "TYPE", "stream");
+ var scanResult = (RedisResult[])result;
+ cursor = long.Parse((string)scanResult[0]);
+ var keys = (RedisResult[])scanResult[1];
+
+ foreach (var key in keys)
+ {
+ streamKeys.Add((string)key);
+ }
+ } while (cursor != 0);
+
+ // Should only find the 2 stream keys
+ ClassicAssert.AreEqual(2, streamKeys.Count);
+ ClassicAssert.IsTrue(streamKeys.Contains("type:stream:1"));
+ ClassicAssert.IsTrue(streamKeys.Contains("type:stream:2"));
+ ClassicAssert.IsFalse(streamKeys.Contains("type:string:1"));
+ ClassicAssert.IsFalse(streamKeys.Contains("type:hash:1"));
+
+ // Verify TYPE string filter doesn't return streams
+ var stringKeys = new HashSet();
+ cursor = 0L;
+
+ do
+ {
+ var result = server.Execute("SCAN", cursor.ToString(), "MATCH", "type:*", "TYPE", "string");
+ var scanResult = (RedisResult[])result;
+ cursor = long.Parse((string)scanResult[0]);
+ var keys = (RedisResult[])scanResult[1];
+
+ foreach (var key in keys)
+ {
+ stringKeys.Add((string)key);
+ }
+ } while (cursor != 0);
+
+ // Should only find the string key
+ ClassicAssert.AreEqual(1, stringKeys.Count);
+ ClassicAssert.IsTrue(stringKeys.Contains("type:string:1"));
+ ClassicAssert.IsFalse(stringKeys.Contains("type:stream:1"));
+ ClassicAssert.IsFalse(stringKeys.Contains("type:stream:2"));
+
+ // Do a full scan without type and see if all keys are present when using cursor
+ var fullScanKeys = new HashSet();
+ cursor = 0L;
+ do
+ {
+ var result = server.Execute("SCAN", cursor.ToString(), "MATCH", "type:*", "COUNT", "2");
+ var scanResult = (RedisResult[])result;
+ cursor = long.Parse((string)scanResult[0]);
+ var keys = (RedisResult[])scanResult[1];
+
+ foreach (var key in keys)
+ {
+ fullScanKeys.Add((string)key);
+ }
+ } while (cursor != 0);
+
+ ClassicAssert.AreEqual(5, fullScanKeys.Count);
+
+ ClassicAssert.IsTrue(fullScanKeys.Contains("type:string:1"));
+ ClassicAssert.IsTrue(fullScanKeys.Contains("type:hash:1"));
+ ClassicAssert.IsTrue(fullScanKeys.Contains("type:list:1"));
+ ClassicAssert.IsTrue(fullScanKeys.Contains("type:stream:1"));
+ ClassicAssert.IsTrue(fullScanKeys.Contains("type:stream:2"));
+ }
+
+ #endregion
+ }
+}
\ No newline at end of file
diff --git a/test/Garnet.test/TestUtils.cs b/test/Garnet.test/TestUtils.cs
index a0a1492318b..dad499eccbb 100644
--- a/test/Garnet.test/TestUtils.cs
+++ b/test/Garnet.test/TestUtils.cs
@@ -265,6 +265,7 @@ public static GarnetServer CreateGarnetServer(
int slowLogThreshold = 0,
TextWriter logTo = null,
bool enableCluster = false,
+ bool enableStreams = false,
int expiredKeyDeletionScanFrequencySecs = -1,
bool useReviv = false,
bool useInChainRevivOnly = false,
@@ -354,6 +355,7 @@ public static GarnetServer CreateGarnetServer(
UnixSocketPermission = unixSocketPermission,
SlowLogThreshold = slowLogThreshold,
ExpiredKeyDeletionScanFrequencySecs = expiredKeyDeletionScanFrequencySecs,
+ EnableStreams = enableStreams,
};
if (!string.IsNullOrEmpty(memorySize))
diff --git a/website/docs/commands/api-compatibility.md b/website/docs/commands/api-compatibility.md
index dfb9ab38087..f674f429c10 100644
--- a/website/docs/commands/api-compatibility.md
+++ b/website/docs/commands/api-compatibility.md
@@ -354,10 +354,10 @@ Note that this list is subject to change as we continue to expand our API comman
| | [ZUNION](data-structures.md#zunion) | ➕ | |
| | [ZUNIONSTORE](data-structures.md#zunionstore) | ➕ | |
| **STREAM** | XACK | ➖ | |
-| | XADD | ➖ | |
+| | XADD | ➕ | (Does not support Capped Streams) |
| | XAUTOCLAIM | ➖ | |
| | XCLAIM | ➖ | |
-| | XDEL | ➖ | |
+| | XDEL | ➕ | |
| | XGROUP CREATE | ➖ | |
| | XGROUP CREATECONSUMER | ➖ | |
| | XGROUP DELCONSUMER | ➖ | |
@@ -368,14 +368,14 @@ Note that this list is subject to change as we continue to expand our API comman
| | XINFO GROUPS | ➖ | |
| | XINFO HELP | ➖ | |
| | XINFO STREAM | ➖ | |
-| | XLEN | ➖ | |
+| | XLEN | ➕ | |
| | XPENDING | ➖ | |
-| | XRANGE | ➖ | |
+| | XRANGE | ➕ | |
| | XREAD | ➖ | |
| | XREADGROUP | ➖ | |
-| | XREVRANGE | ➖ | |
+| | XREVRANGE | + | |
| | XSETID | ➖ | |
-| | XTRIM | ➖ | |
+| | XTRIM | ➕ | Does not support near-exact trimming |
| **STRING** | [APPEND](raw-string.md#append) | ➕ | |
| | [DECR](raw-string.md#decr) | ➕ | |
| | [DECRBY](raw-string.md#decrby) | ➕ | |
diff --git a/website/docs/commands/data-structures.md b/website/docs/commands/data-structures.md
index deb459d6ed4..68442933557 100644
--- a/website/docs/commands/data-structures.md
+++ b/website/docs/commands/data-structures.md
@@ -980,6 +980,92 @@ If **destination** already exists, it is overwritten.
---
+## Stream
+
+### XADD
+
+#### Syntax
+
+```bash
+ XADD key [NOMKSTREAM] <* | id> field value [field value ...]
+```
+Appends given stream entry to the stream at specified key. If the key does not exist, it is created when running the command.
+Creation of the stream can be disabled with the `NOMKSTREAM` option.
+
+Every entry in the stream is accompanied by a stream entry ID and consists of field-value pairs that are stored/read in the same order as provided by the user.
+While the [XADD](#XADD) can auto-generate a unique ID using the `*` character, it is also possible to specify a user-defined ID specified by two 64-bit numbers separated by a `-` character.
+The IDs are guaranteed to be incremental.
+
+**Capped Streams** are not currently supported.
+
+---
+
+### XLEN
+
+#### Syntax
+
+```bash
+ XLEN key
+```
+Returns the number of entries inside the stream specified by `key`. If the stream does not exist, returns 0.
+
+---
+
+### XRANGE
+
+#### Syntax
+
+```bash
+ XRANGE key start end [COUNT count]
+```
+Returns stream entries matching a given range of IDs.
+`start` and `end` can be special IDs (i.e, `-` and `+`) to specify the minimum possible ID and the maximum possible ID inside a stream respectively.
+The IDs provided can also be incomplete (i.e., with only the first part of the ID).
+Using the `COUNT` option reduces the number of entries returned.
+
+### XREVRANGE
+
+#### Syntax
+
+```bash
+ XRANGE key end start [COUNT count]
+```
+Returns stream entries in the order from end to start matching a given range of IDs.
+`start` and `end` can be special IDs (i.e, `-` and `+`) to specify the minimum possible ID and the maximum possible ID inside a stream respectively.
+The IDs provided can also be incomplete (i.e., with only the first part of the ID).
+Using the `COUNT` option reduces the number of entries returned.
+
+---
+
+### XDEL
+
+#### Syntax
+
+```bash
+ XDEL key id [id ...]
+```
+Removes the specified entries from a stream given by key, and returns the number of entries deleted.
+If speficied IDs do not exist, the number of entries returned may be less than the number of IDs provided as they are not counted as deleted.
+
+---
+
+### XTRIM
+
+#### Syntax
+
+```bash
+ XTRIM key threshold
+```
+Trims the stream by evicting older entries using two strategies:
+
+- MAXLEN: evicts entries as long as stream's length exceeds specified threshold.
+- MINID: evicts entries with IDs lower than threshold where `threshold` is an entry ID.
+
+`LIMIT` clause is not currently supported.
+`MINID` defaults to exact trimming, meaning all entries having IDs lower than threshold will be deleted.
+
+---
+
## Sorted Set
### ZADD