Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 25 additions & 27 deletions benchmarks/zig_sgp4_bench.zig
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const ISS_TLE =
const ScenarioType = enum {
Scalar,
SIMD,
Multithreaded
Multithreaded,
};

const Scenario = struct {
Expand All @@ -25,25 +25,25 @@ const Scenario = struct {
sceneType: ScenarioType,
};

const BatchSize = Sgp4Batch.BatchSize;
const batchSize = Sgp4Batch.batchSize;

const scenarios = [_]Scenario {
const scenarios = [_]Scenario{
.{ .name = "1 day (minute)", .points = 1440, .stepMin = 1.0, .sceneType = .Scalar },
.{ .name = "1 week (minute)", .points = 10080, .stepMin = 1.0, .sceneType = .Scalar },
.{ .name = "2 weeks (minute)", .points = 20160, .stepMin = 1.0, .sceneType = .Scalar },
.{ .name = "2 weeks (second)", .points = 1209600, .stepMin = 1.0 / 60.0, .sceneType = .Scalar },
.{ .name = "1 month (minute)", .points = 43200, .stepMin = 1.0, .sceneType = .Scalar },
};

const scenariosSIMD = [_]Scenario {
const scenariosSIMD = [_]Scenario{
.{ .name = "1 day (minute)", .points = 1440, .stepMin = 1.0, .sceneType = .SIMD },
.{ .name = "1 week (minute)", .points = 10080, .stepMin = 1.0, .sceneType = .SIMD },
.{ .name = "2 weeks (minute)", .points = 20160, .stepMin = 1.0, .sceneType = .SIMD },
.{ .name = "2 weeks (second)", .points = 1209600, .stepMin = 1.0 / 60.0, .sceneType = .SIMD },
.{ .name = "1 month (minute)", .points = 43200, .stepMin = 1.0, .sceneType = .SIMD },
};

const scenariosMT = [_]Scenario {
const scenariosMT = [_]Scenario{
.{ .name = "2 weeks (minute)", .points = 20160, .stepMin = 1.0, .sceneType = .Multithreaded },
.{ .name = "1 month (second)", .points = 2592000, .stepMin = 1.0 / 60.0, .sceneType = .Multithreaded },
.{ .name = "3 months (second)", .points = 7776000, .stepMin = 1.0 / 60.0, .sceneType = .Multithreaded },
Expand All @@ -55,8 +55,8 @@ const iterations = 10;

fn simdWorker(sgp4: *const Sgp4, times: []const f64) void {
var i: usize = 0;
while (i + BatchSize <= times.len) : (i += BatchSize) {
const result = dispatch.sgp4Times8(sgp4, times[i..][0..BatchSize].*) catch continue;
while (i + batchSize <= times.len) : (i += batchSize) {
const result = dispatch.sgp4Times8(sgp4, times[i..][0..batchSize].*) catch continue;
std.mem.doNotOptimizeAway(result);
}
while (i < times.len) : (i += 1) {
Expand All @@ -80,46 +80,44 @@ pub fn main(init: std.process.Init) !void {

std.debug.print("\nastroz SGP4 Benchmark\n", .{});
std.debug.print("==================================================\n", .{});
std.debug.print("SIMD Batch Size: {} (oma runtime dispatch)\n", .{BatchSize});
std.debug.print("SIMD Batch Size: {} (oma runtime dispatch)\n", .{batchSize});
std.debug.print("\n--- Scalar Propagation ---\n", .{});

const num_threads = std.Thread.getCpuCount() catch 1;

try runScenarios(sgp4, &scenarios, num_threads, gpa, io);

std.debug.print("\n--- SIMD Batch{} Propagation ---\n", .{BatchSize});
std.debug.print("\n--- SIMD Batch{} Propagation ---\n", .{batchSize});

try runScenarios(sgp4, &scenariosSIMD, num_threads, gpa, io);

std.debug.print("\n--- Multithreaded SIMD Batch{} Propagation ---\n", .{BatchSize});
std.debug.print("\n--- Multithreaded SIMD Batch{} Propagation ---\n", .{batchSize});
std.debug.print("Threads: {}\n", .{num_threads});


try runScenarios(sgp4, &scenariosMT, num_threads, gpa, io);

std.debug.print("\n", .{});
}

fn getResults(name: []const u8, total_ns: usize, iters: usize, points: usize, totalPropsPerSec: *f64) void {
const avg_ms = @as(f64, @floatFromInt(total_ns / iters)) / 1_000_000.0;
const props_per_sec = @as(f64, @floatFromInt(points)) / (avg_ms / 1000.0);
totalPropsPerSec.* += props_per_sec;

std.debug.print("{s:<25} {d:>10.3} ms ({d:.2} prop/s)\n", .{
name,
avg_ms,
props_per_sec,
});
const avg_ms = @as(f64, @floatFromInt(total_ns / iters)) / 1_000_000.0;
const props_per_sec = @as(f64, @floatFromInt(points)) / (avg_ms / 1000.0);
totalPropsPerSec.* += props_per_sec;

std.debug.print("{s:<25} {d:>10.3} ms ({d:.2} prop/s)\n", .{
name,
avg_ms,
props_per_sec,
});
}

fn reportAvg(totalPropsPerSec: f64, scenarioSize: f64) void {
std.debug.print("{s:<25} {d:>17.2} prop/s\n", .{
"Average",
totalPropsPerSec / scenarioSize,
});
std.debug.print("{s:<25} {d:>17.2} prop/s\n", .{
"Average",
totalPropsPerSec / scenarioSize,
});
}


fn runScenarios(sgp4: Sgp4, scenarioArr: []const Scenario, num_threads: usize, gpa: std.mem.Allocator, io: std.Io) !void {
var totalPropsPerSec: f64 = 0.0;

Expand All @@ -145,8 +143,8 @@ fn runScenarios(sgp4: Sgp4, scenarioArr: []const Scenario, num_threads: usize, g
},
.SIMD => {
var i: usize = 0;
while (i + BatchSize <= scenario.points) : (i += BatchSize) {
const result = dispatch.sgp4Times8(&sgp4, times[i..][0..BatchSize].*) catch continue;
while (i + batchSize <= scenario.points) : (i += batchSize) {
const result = dispatch.sgp4Times8(&sgp4, times[i..][0..batchSize].*) catch continue;
std.mem.doNotOptimizeAway(result);
}
// scalar remainder
Expand Down
4 changes: 2 additions & 2 deletions bindings/python/src/satrec.zig
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ fn buildSuccessResult(pv: [2][3]f64) [*c]c.PyObject {
return result;
}

const SgpSimdN = Sgp4Batch.BatchSize;
const SgpSimdN = Sgp4Batch.batchSize;

/// Generic SIMD batch propagation for a single satellite (SGP4 or SDP4).
/// Processes times in chunks of SgpSimdN via oma dispatch, remainder with scalar propagate.
Expand Down Expand Up @@ -586,7 +586,7 @@ pub fn pySdp4BatchPropagateInto(_: [*c]c.PyObject, args: [*c]c.PyObject, kwds: [
return py.none();
}

const SdpSimdN = Sdp4Batch.BatchSize;
const SdpSimdN = Sdp4Batch.batchSize;

fn sdp4BatchPropagate(
sdp4_ptrs: []const *const Sdp4,
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/src/shared.zig
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const constants = astroz.constants;
pub const allocator = std.heap.c_allocator;

/// Batch size from core library (4 for AVX2, 8 for AVX512)
pub const BatchSize = astroz.Constellation.BatchSize;
pub const BatchSize = astroz.Constellation.batchSize;

/// Batch elements type for current batch size
pub const BatchElements = astroz.Constellation.Sgp4Batch.BatchElements(BatchSize);
Expand Down
92 changes: 46 additions & 46 deletions src/Constellation.zig
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ const WCS = @import("WorldCoordinateSystem.zig");

const Error = Sgp4.Error;

pub const BatchSize: usize = Sgp4Batch.BatchSize;
const BatchResults = Sgp4.PosVelArray(BatchSize);
const Sgp4Bels = Sgp4Batch.BatchElements(BatchSize);
const Sdp4Bels = Sdp4Batch.Sdp4BatchElements(BatchSize);
const Sdp4Carry = Sdp4Batch.ResonanceCarryBatch(BatchSize);
pub const batchSize: usize = Sgp4Batch.batchSize;
const BatchResults = Sgp4.PosVelArray(batchSize);
const Sgp4Bels = Sgp4Batch.BatchElements(batchSize);
const Sdp4Bels = Sdp4Batch.Sdp4BatchElements(batchSize);
const Sdp4Carry = Sdp4Batch.ResonanceCarryBatch(batchSize);

/// Output coordinate modes for propagation
pub const OutputMode = enum(u8) {
Expand Down Expand Up @@ -83,7 +83,7 @@ numSgp4: usize,

// SDP4 (deep space) batches
sdp4Batches: []Sdp4Bels,
sdp4BatchEpochs: [][BatchSize]f64,
sdp4BatchEpochs: [][batchSize]f64,
sdp4OrigIndices: []u32,
sdp4Carries: []Sdp4Carry,
numSdp4: usize,
Expand Down Expand Up @@ -126,8 +126,8 @@ pub fn init(allocator: Allocator, tles: []const Tle, grav: constants.Sgp4Gravity
}

// Batch SGP4
const nSgp4B = if (nSgp4 == 0) 0 else (nSgp4 + BatchSize - 1) / BatchSize;
const sgp4Pad = nSgp4B * BatchSize;
const nSgp4B = if (nSgp4 == 0) 0 else (nSgp4 + batchSize - 1) / batchSize;
const sgp4Pad = nSgp4B * batchSize;

const sgp4Batches = allocator.alloc(Sgp4Bels, nSgp4B) catch return Error.OutOfMemory;
errdefer allocator.free(sgp4Batches);
Expand All @@ -140,47 +140,47 @@ pub fn init(allocator: Allocator, tles: []const Tle, grav: constants.Sgp4Gravity
if (nSgp4 > 0) refEpoch = tleEpochJd(tles[sgp4Idx[0]]);

for (0..nSgp4B) |bi| {
const base = bi * BatchSize;
var bt: [BatchSize]Tle = undefined;
for (0..BatchSize) |lane| {
const base = bi * batchSize;
var bt: [batchSize]Tle = undefined;
for (0..batchSize) |lane| {
const src = if (base + lane < nSgp4) sgp4Idx[base + lane] else sgp4Idx[nSgp4 - 1];
bt[lane] = tles[src];
}
sgp4Batches[bi] = try Sgp4Batch.initBatchElements(BatchSize, bt, grav);
sgp4Batches[bi] = try Sgp4Batch.initBatchElements(batchSize, bt, grav);

const epochs: [BatchSize]f64 = simdMath.readArray(BatchSize, &sgp4Batches[bi].epochJd);
for (0..BatchSize) |lane| {
const epochs: [batchSize]f64 = simdMath.readArray(batchSize, &sgp4Batches[bi].epochJd);
for (0..batchSize) |lane| {
sgp4EpochOff[base + lane] = (refEpoch - epochs[lane]) * 1440.0;
sgp4Orig[base + lane] = if (base + lane < nSgp4) sgp4Idx[base + lane] else sgp4Idx[nSgp4 - 1];
}
}

// Batch SDP4
const nSdp4B = if (nSdp4 == 0) 0 else (nSdp4 + BatchSize - 1) / BatchSize;
const sdp4Pad = nSdp4B * BatchSize;
const nSdp4B = if (nSdp4 == 0) 0 else (nSdp4 + batchSize - 1) / batchSize;
const sdp4Pad = nSdp4B * batchSize;

const sdp4Batches = allocator.alloc(Sdp4Bels, nSdp4B) catch return Error.OutOfMemory;
errdefer allocator.free(sdp4Batches);
const sdp4Epochs = allocator.alloc([BatchSize]f64, nSdp4B) catch return Error.OutOfMemory;
const sdp4Epochs = allocator.alloc([batchSize]f64, nSdp4B) catch return Error.OutOfMemory;
errdefer allocator.free(sdp4Epochs);
const sdp4Orig = allocator.alloc(u32, sdp4Pad) catch return Error.OutOfMemory;
errdefer allocator.free(sdp4Orig);
const sdp4Carries = allocator.alloc(Sdp4Carry, nSdp4B) catch return Error.OutOfMemory;
errdefer allocator.free(sdp4Carries);

for (0..nSdp4B) |bi| {
const base = bi * BatchSize;
var be: [BatchSize]Sdp4.Elements = undefined;
var ep: [BatchSize]f64 = undefined;
for (0..BatchSize) |lane| {
const base = bi * batchSize;
var be: [batchSize]Sdp4.Elements = undefined;
var ep: [batchSize]f64 = undefined;
for (0..batchSize) |lane| {
const src = if (base + lane < nSdp4) base + lane else nSdp4 - 1;
be[lane] = sdp4Els[src];
ep[lane] = sdp4Els[src].sgp4.epochJd;
sdp4Orig[base + lane] = if (base + lane < nSdp4) sdp4Idx[src] else sdp4Idx[nSdp4 - 1];
}
sdp4Batches[bi] = Sdp4Batch.initFromElements(BatchSize, be, grav);
sdp4Batches[bi] = Sdp4Batch.initFromElements(batchSize, be, grav);
sdp4Epochs[bi] = ep;
sdp4Carries[bi] = Sdp4Batch.initCarry(BatchSize, &sdp4Batches[bi]);
sdp4Carries[bi] = Sdp4Batch.initCarry(batchSize, &sdp4Batches[bi]);
}

return .{
Expand Down Expand Up @@ -213,7 +213,7 @@ pub fn deinit(self: *Constellation) void {
/// Call when switching to a non-monotonic time sequence.
pub fn resetCarry(self: *Constellation) void {
for (self.sdp4Batches, self.sdp4Carries) |*batch, *carry| {
carry.* = Sdp4Batch.initCarry(BatchSize, batch);
carry.* = Sdp4Batch.initCarry(batchSize, batch);
}
}

Expand All @@ -225,7 +225,7 @@ const PropCtx = struct {
numSgp4: usize,

sdp4Batches: []const Sdp4Bels,
sdp4BatchEpochs: [][BatchSize]f64,
sdp4BatchEpochs: [][batchSize]f64,
sdp4OrigIndices: []const u32,
sdp4Carries: []Sdp4Carry,
numSdp4: usize,
Expand Down Expand Up @@ -418,10 +418,10 @@ inline fn sgp4Core(
batchIdx: usize,
timeIdx: usize,
) void {
const base = batchIdx * BatchSize;
const base = batchIdx * batchSize;

var tsince: [BatchSize]f64 = undefined;
inline for (0..BatchSize) |lane| {
var tsince: [batchSize]f64 = undefined;
inline for (0..batchSize) |lane| {
tsince[lane] = ctx.tsinceBase[timeIdx] + ctx.sgp4EpochOffsets[base + lane];
}

Expand All @@ -436,8 +436,8 @@ inline fn sgp4Core(
/// Check if any lane in a batch is active (not masked out)
inline fn sgp4BatchActive(ctx: PropCtx, batchIdx: usize) bool {
const mask = ctx.satelliteMask orelse return true;
const base = batchIdx * BatchSize;
inline for (0..BatchSize) |lane| {
const base = batchIdx * batchSize;
inline for (0..batchSize) |lane| {
if (base + lane < ctx.numSgp4) {
if (mask[ctx.sgp4OrigIndices[base + lane]] != 0) return true;
}
Expand All @@ -457,11 +457,11 @@ fn unifiedSdp4Range(
const jdT = ctx.jdFull[timeIdx];

for (batchStart..batchEnd) |batchIdx| {
const base = batchIdx * BatchSize;
const base = batchIdx * batchSize;

const batchEpochs: [BatchSize]f64 = ctx.sdp4BatchEpochs[batchIdx];
var tArr: [BatchSize]f64 = undefined;
inline for (0..BatchSize) |lane| {
const batchEpochs: [batchSize]f64 = ctx.sdp4BatchEpochs[batchIdx];
var tArr: [batchSize]f64 = undefined;
inline for (0..batchSize) |lane| {
tArr[lane] = (jdT - batchEpochs[lane]) * 1440.0;
}

Expand Down Expand Up @@ -489,7 +489,7 @@ inline fn writeOutput(
const sinG = if (mode != .teme) ctx.gmstSin[timeIdx] else 0.0;
const cosG = if (mode != .teme) ctx.gmstCos[timeIdx] else 0.0;

inline for (0..BatchSize) |lane| {
inline for (0..batchSize) |lane| {
if (base + lane < numReal and laneActive(ctx.satelliteMask, origIndices[base + lane])) {
const origIdx = origIndices[base + lane];
const ob = outBase(layout, @as(usize, origIdx), timeIdx, ctx.numTimes, ctx.numSatellites);
Expand Down Expand Up @@ -517,7 +517,7 @@ inline fn writeZeros(
numReal: usize,
timeIdx: usize,
) void {
inline for (0..BatchSize) |lane| {
inline for (0..batchSize) |lane| {
if (base + lane < numReal and laneActive(ctx.satelliteMask, origIndices[base + lane])) {
const origIdx = origIndices[base + lane];
const ob = outBase(layout, @as(usize, origIdx), timeIdx, ctx.numTimes, ctx.numSatellites);
Expand Down Expand Up @@ -559,7 +559,7 @@ pub fn propagateConstellation(
const pa = std.heap.page_allocator;

// Build identity origIndices: satellite i maps to output position i
const padded = batches.len * BatchSize;
const padded = batches.len * batchSize;
const origIndices = pa.alloc(u32, padded) catch return Error.OutOfMemory;
defer pa.free(origIndices);
for (origIndices, 0..) |*v, i| v.* = @intCast(i);
Expand Down Expand Up @@ -610,7 +610,7 @@ pub fn propagateConstellation(
/// `numSatellites` is the total output stride (columns in the output buffer)
pub fn propagateSdp4Constellation(
batches: []const Sdp4Bels,
batchEpochs: [][BatchSize]f64,
batchEpochs: [][batchSize]f64,
numSdp4: usize,
numSatellites: usize,
origIndices: []const u32,
Expand All @@ -636,7 +636,7 @@ pub fn propagateSdp4Constellation(
// Initialize carries
const carries = pa.alloc(Sdp4Carry, batches.len) catch return Error.OutOfMemory;
defer pa.free(carries);
for (batches, carries) |*b, *carry| carry.* = Sdp4Batch.initCarry(BatchSize, b);
for (batches, carries) |*b, *carry| carry.* = Sdp4Batch.initCarry(batchSize, b);

// Precompute GMST sin/cos if coordinate transform needed
var sinBuf: []f64 = &.{};
Expand Down Expand Up @@ -696,8 +696,8 @@ pub fn screenConstellation(
return Error.SatelliteDecayed;

const thresholdSq = threshold * threshold;
const targetBatchIdx = targetIdx / BatchSize;
const targetLane = targetIdx % BatchSize;
const targetBatchIdx = targetIdx / batchSize;
const targetLane = targetIdx % batchSize;
const targetBatch = &batches[targetBatchIdx];

for (0..numSatellites) |i| {
Expand All @@ -718,21 +718,21 @@ pub fn screenConstellation(
}

for (times, 0..) |time, timeIdx| {
var targetTimeArr: [BatchSize]f64 = undefined;
var targetTimeArr: [batchSize]f64 = undefined;
@memset(&targetTimeArr, time + targetEpochOffset);
const targetResults = dispatch.sgp4Batch8(targetBatch, targetTimeArr) catch continue;
const targetPos = eciToEcef(targetResults[targetLane][0], sinBuf[timeIdx], cosBuf[timeIdx]);

for (batches, 0..) |*batch, batchIdx| {
const satBase = batchIdx * BatchSize;
var batchTimes: [BatchSize]f64 = undefined;
inline for (0..BatchSize) |i| {
const satBase = batchIdx * batchSize;
var batchTimes: [batchSize]f64 = undefined;
inline for (0..batchSize) |i| {
batchTimes[i] = time + epochOffsets[satBase + i];
}

const batchResults = dispatch.sgp4Batch8(batch, batchTimes) catch continue;

for (0..BatchSize) |lane| {
for (0..batchSize) |lane| {
const satIdx = satBase + lane;
if (satIdx >= numSatellites or satIdx == targetIdx) continue;

Expand Down
Loading
Loading