Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions src/graphnet/datasets/snowstorm_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,20 @@


class SnowStormDataset(IceCubeHostedDataset):
"""IceCube SnowStorm simulation dataset.
"""IceCube SnowStorm Monte Carlo simulation dataset.

More information can be found at
https://wiki.icecube.wisc.edu/index.php/SnowStorm_MC#File_Locations
This is a IceCube Collaboration simulation dataset.
Requires a username and password.
This module provides access to certain RunIDs of the SnowStorm simulation data set.
It prepares the data for the training and evaluation of deep learning models in GraphNeT by parsing
it into the CuratedDataset format.

The data is organized by SnowStorm RunIDs containing pulsemaps input features
along with event-level truth information.

The access requires an IceCube Collaboration account.

References:
SnowStorm documentation: https://wiki.icecube.wisc.edu/index.php/SnowStorm_MC#File_Locations
SnowStorm paper: arXiv:1909.01530
"""

_experiment = "IceCube SnowStorm dataset"
Expand Down Expand Up @@ -91,7 +99,15 @@ def __init__(
def _prepare_args(
self, backend: str, features: List[str], truth: List[str]
) -> Tuple[Dict[str, Any], Union[List[int], None], Union[List[int], None]]:
"""Prepare arguments for dataset."""
"""Prepare arguments for dataset.

Args:
backend: backend of dataset. Only "sqlite" is supported.
features: List of features from user to use as input.
truth: List of event-level truth from user.

Returns: Dataset arguments, train/val selection, test selection
"""
assert backend == "sqlite"
dataset_paths = []
for rid in self._run_ids:
Expand All @@ -106,7 +122,6 @@ def _prepare_args(
# get RunID
pattern = rf"{re.escape(self.dataset_dir)}/(\d+)/.*"
event_counts: Dict[str, int] = {}
event_counts = {}
for path in dataset_paths:

# Extract the ID
Expand Down Expand Up @@ -175,7 +190,7 @@ def _create_comment(cls, event_counts: Dict[str, int] = {}) -> None:
runid_string += f"RunID {k} contains {v:10d} events\n"
tot += v
cls._comments = (
f"Contains ~{tot/1e6:.1f} million events:\n"
f"Contains ~{tot / 1e6:.1f} million events:\n"
+ runid_string
+ fixed_string
)
Expand Down
Loading