Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 89 additions & 113 deletions src/foamlib/_files/_parsing/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def __call__(
) from e

try:
ret = np.fromstring(data, sep=" ", dtype=self._dtype)
ret = np.array(data.split(), dtype=self._dtype)
except ValueError as e:
raise ParseError(
contents,
Expand Down Expand Up @@ -414,140 +414,112 @@ def __call__(
_parse_ascii_tensor_list = _ASCIINumericListParser(dtype=float, elshape=(9,))


_THREE_FACE_LIKE = re.compile(
rb"3(?:"
+ _SKIP.pattern
+ rb")?\((?:"
+ _SKIP.pattern
+ rb")?(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb"(?:"
+ _SKIP.pattern
+ rb"))(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb"(?:"
+ _SKIP.pattern
+ rb"))(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb")(?:"
+ _SKIP.pattern
+ rb")?\)"
_SUB_LIST_LIKE = re.compile(
rb"(?:" + _POSSIBLE_INTEGER.pattern + rb")(?:" + _SKIP.pattern + rb")?\([^()]*?\)"
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@RamogninoF problem to me is that this doesn't actually check that the sublist is well-formed. E.g. this will readily accept a list with a wrong count like 2 (1 2 3)...

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately I have no background in parsing logics etc. and all this is far beyond my capabilities, I just hope this can be a useful starting point for you. At the current state parsing of meshes in ascii format is just straight impossible due to the time required for parsing (I gave up even on a 10k cells mesh after it was taking more then 10 minutes parsing the faces file). I would like to be able to support handling also ascii meshes rather then only binary

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A simple alternative could be just to add hardcoded parser for up to 10-vertex faces or so, which I think would be more then enough for most cases

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or what if the string is parsed twice via regex, one to retrieve the list and one to get the prefix marking it's length, and these quantities are compared to validate the parsed data before returning?

)
_UNCOMMENTED_THREE_FACE_LIKE = re.compile(
rb"3\s*\(\s*(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb"\s*)(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb"\s*)(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb")\s*\)",
_UNCOMMENTED_SUB_LIST_LIKE = re.compile(
rb"(?:" + _POSSIBLE_INTEGER.pattern + rb")\s*\([^()]*?\)",
re.ASCII,
)
_FOUR_FACE_LIKE = re.compile(
rb"4(?:"
+ _SKIP.pattern
+ rb")?\((?:"
+ _SKIP.pattern
+ rb")?(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb"(?:"
+ _SKIP.pattern
+ rb"))(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb"(?:"
+ _SKIP.pattern
+ rb"))(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb"(?:"
+ _SKIP.pattern
+ rb"))(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb")(?:"
+ _SKIP.pattern
+ rb")?\)"
)
_UNCOMMENTED_FOUR_FACE_LIKE = re.compile(
rb"4\s*\(\s*(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb"\s*)(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb"\s*)(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb"\s*)(?:"
+ _POSSIBLE_INTEGER.pattern
+ rb")\s*\)",
re.ASCII,
)
_FACES_LIKE_LIST = re.compile(
_LIST_OF_LISTS_LIKE = re.compile(
rb"(?:(?:"
+ _SKIP.pattern
+ rb")?(?:"
+ _THREE_FACE_LIKE.pattern
+ rb"|"
+ _FOUR_FACE_LIKE.pattern
+ rb"))*(?:"
+ rb")?"
+ _SUB_LIST_LIKE.pattern
+ rb")*(?:"
+ _SKIP.pattern
+ rb")?\)"
)
_UNCOMMENTED_FACES_LIKE_LIST = re.compile(
rb"(?:\s*(?:"
+ _UNCOMMENTED_THREE_FACE_LIKE.pattern
+ rb"|"
+ _UNCOMMENTED_FOUR_FACE_LIKE.pattern
+ rb"))*\s*\)",
_UNCOMMENTED_LIST_OF_LISTS_LIKE = re.compile(
rb"(?:\s*" + _UNCOMMENTED_SUB_LIST_LIKE.pattern + rb")*\s*\)",
re.ASCII,
)


def _parse_ascii_faces_like_list(
contents: bytes | bytearray, pos: int
) -> tuple[list[np.ndarray[tuple[Literal[3, 4]], np.dtype[np.int64]]], int]:
try:
count, pos = _parse_number(contents, pos, target=int)
except ParseError:
count = None
else:
if count < 0:
raise ParseError(contents, pos, expected="non-negative list count")
pos = _skip(contents, pos)
class _ASCIINumericListListParser(Generic[_DType]):
def __init__(self, *, dtype: type[_DType]) -> None:
self._dtype = dtype

pos = _expect(contents, pos, b"(")
def __call__(
self,
contents: bytes | bytearray,
pos: int,
*,
empty_ok: bool = False,
) -> tuple[list[np.ndarray[tuple[int], np.dtype[np.float64 | np.int64]]], int]:
try:
count, pos = _parse_number(contents, pos, target=int)
except ParseError:
count = None
else:
if count < 0:
raise ParseError(contents, pos, expected="non-negative list count")
pos = _skip(contents, pos)

if match := _UNCOMMENTED_FACES_LIKE_LIST.match(contents, pos):
data = contents[pos : match.end() - 1]
pos = match.end()
pos = _expect(contents, pos, b"(")

elif match := _FACES_LIKE_LIST.match(contents, pos):
data = contents[pos : match.end() - 1]
pos = match.end()
if match := _UNCOMMENTED_LIST_OF_LISTS_LIKE.match(contents, pos):
data = contents[pos : match.end() - 1]
pos = match.end()

data = _COMMENTS.sub(b" ", data)
elif match := _LIST_OF_LISTS_LIKE.match(contents, pos):
data = contents[pos : match.end() - 1]
pos = match.end()

if not match:
raise ParseError(contents, pos, expected="faces-like list")
data = _COMMENTS.sub(b" ", data)

data = data.replace(b"(", b" ").replace(b")", b" ")
try:
data = data.decode("ascii")
except UnicodeDecodeError as e:
raise ParseError(contents, pos, expected="faces-like list") from e
if not match:
raise ParseError(contents, pos, expected="numeric list of lists")

try:
values = np.fromstring(data, sep=" ", dtype=int)
except ValueError as e:
raise ParseError(contents, pos, expected="faces-like list") from e
data = data.replace(b"(", b" ").replace(b")", b" ")
try:
data = data.decode("ascii")
except UnicodeDecodeError as e:
raise ParseError(contents, pos, expected="numeric list of lists") from e

# Resolve to explicit numpy dtype to ensure platform-consistent bit width
# (Python's `int` maps to int32 on Windows with numpy, but OpenFOAM labels
# should always be 64-bit when read in ASCII).
np_dtype: type = np.int64 if self._dtype is int else np.float64

# Use np.array(data.split()) rather than np.fromstring to:
# - avoid DeprecationWarning from np.fromstring when data contains
# trailing non-numeric content (which we use to detect type mismatch)
# - raise ValueError immediately on any non-parseable token (e.g. a
# float '0.1' when dtype=np.int64), which is caught below as ParseError
try:
values = np.array(data.split(), dtype=np_dtype)
except ValueError as e:
raise ParseError(contents, pos, expected="numeric list of lists") from e

ret: list[np.ndarray] = []
i = 0
while i < len(values):
n = int(values[i])
ret.append(values[i + 1 : i + n + 1])
i += n + 1

if count is None:
if not empty_ok and len(ret) == 0:
raise ParseError(
contents, pos, expected="non-empty numeric list of lists"
)
elif len(ret) != count:
raise ParseError(
contents, pos, expected=f"{count} elements (got {len(ret)})"
)

ret: list[np.ndarray] = []
i = 0
while i < len(values):
n = values[i]
ret.append(values[i + 1 : i + n + 1])
i += n + 1
return ret, pos

if count is not None and len(ret) != count:
raise ParseError(contents, pos, expected=f"{count} faces (got {len(ret)})")

return ret, pos
_parse_ascii_integer_list_list = _ASCIINumericListListParser(dtype=int)
_parse_ascii_float_list_list = _ASCIINumericListListParser(dtype=float)


def _parse_ascii_faces_like_list(
contents: bytes | bytearray, pos: int
) -> tuple[list[np.ndarray[tuple[int], np.dtype[np.int64]]], int]:
return _parse_ascii_integer_list_list(contents, pos)


def _parse_binary_numeric_list(
Expand Down Expand Up @@ -916,6 +888,10 @@ def _parse_standalone_data_entry(
return _parse_ascii_vector_list(contents, pos)
with contextlib.suppress(ParseError):
return _parse_ascii_faces_like_list(contents, pos)
# _parse_ascii_float_list_list is tried after faces-like (integer list-of-lists)
# to handle sparse/non-uniform float lists that look like n(v1 v2 ...) per row.
with contextlib.suppress(ParseError):
return _parse_ascii_float_list_list(contents, pos)

try:
entry1, pos1 = _parse_data(contents, pos)
Expand Down
100 changes: 100 additions & 0 deletions tests/test_files/test_parsing/test_poly_face_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from pathlib import Path

import numpy as np
from foamlib import FoamFile

faces_contents = r"""
/*--------------------------------*- C++ -*----------------------------------*\
| ========= | |
| \\ / F ield | OpenFOAM: The Open Source CFD Toolbox |
| \\ / O peration | Version: 2206 |
| \\ / A nd | Website: www.openfoam.com |
| \\/ M anipulation | |
\*---------------------------------------------------------------------------*/
FoamFile
{
version 2.0;
format ascii;
class faceList;
location "constant/polyMesh";
object faces;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

3
(
3(0 1 2)
4(3 4 5 6)
5(7 8 9 10 11)
)

// ************************************************************************* //
"""


def test_parse_poly_faces(tmp_path: Path) -> None:
"""Test that ascii faceList with triangles, quads, and pentagons is parsed correctly."""
path = tmp_path / "faces"
path.write_text(faces_contents)

file = FoamFile(path)
faces = file[None]

assert len(faces) == 3
assert np.array_equal(faces[0], [0, 1, 2])
assert np.array_equal(faces[1], [3, 4, 5, 6])
assert np.array_equal(faces[2], [7, 8, 9, 10, 11])


float_list_list_contents = r"""
3
(
2(0.1 0.2)
3(0.3 0.4 0.5)
1(0.6)
)
"""


def test_parse_float_list_list(tmp_path: Path) -> None:
"""Test that a standalone ascii numeric list-of-lists with float values is parsed correctly."""
path = tmp_path / "floats"
path.write_text(float_list_list_contents)

file = FoamFile(path)
data = file[None]

assert len(data) == 3
assert np.allclose(data[0], [0.1, 0.2])
assert np.allclose(data[1], [0.3, 0.4, 0.5])
assert np.allclose(data[2], [0.6])


commented_faces_contents = r"""
3
(
3(0 1 2) // triangle
4 /* quad */ (3 4 5 6)
5(
7 // comment inside
8
9
10
11
)
)
"""


def test_parse_commented_faces(tmp_path: Path) -> None:
"""Test that ascii faceList with inline comments is parsed correctly."""
path = tmp_path / "faces_commented"
path.write_text(commented_faces_contents)

file = FoamFile(path)
faces = file[None]

assert len(faces) == 3
assert np.array_equal(faces[0], [0, 1, 2])
assert np.array_equal(faces[1], [3, 4, 5, 6])
assert np.array_equal(faces[2], [7, 8, 9, 10, 11])
Loading