Skip to content

Commit 7a9aaa7

Browse files
committed
fix: lone surrogate regex matching + Script=Unknown Unicode data
Two fixes for built-ins/RegExp/property-escapes/generated (469/469 pass): 1. Lone surrogate regex matching (fixes 7 tests): - Change regex_exec() to take &[u16] instead of &str, preserving lone surrogates (U+D800..U+DFFF) that String::from_utf16_lossy() would replace with U+FFFD - Add value_to_u16() helper in value.rs that returns Vec<u16> directly for String values without lossy UTF-8 conversion - Update regexp_exec_dispatch / regexp_test_dispatch / regexp_abstract_exec to pass &[u16] through the exec chain 2. Script=Unknown Unicode data (fixes 2 tests): - regress crate had UNKNOWN = [Interval; 0] (empty table) - Copy regress source to regress-patched/, patch unicodetables.rs with 733 Unicode 17.0 Script=Unknown intervals computed from Scripts.txt - Add [patch] to Cargo.toml to use local patched copy
1 parent 55373b8 commit 7a9aaa7

60 files changed

Lines changed: 86939 additions & 14 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,6 @@ harness = false
6363
name = "bigint_bench"
6464
path = "benches/bigint_bench.rs"
6565
harness = false
66+
67+
[patch."https://github.com/ssrlive/regress.git"]
68+
regress = { path = "regress-patched" }

ci/runner.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ const SLOW_TESTS = [
352352

353353
// Skip entire directories whose tests are too slow
354354
const SLOW_DIRS = [
355-
'built-ins/RegExp/property-escapes/generated',
355+
// 'built-ins/RegExp/property-escapes/generated',
356356
];
357357

358358
/*

regress-patched/.cargo-ok

Whitespace-only changes.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
version: 2
3+
updates:
4+
- package-ecosystem: "github-actions"
5+
directory: "/"
6+
schedule:
7+
interval: "daily"
8+
- package-ecosystem: "cargo"
9+
directory: "/"
10+
schedule:
11+
interval: "daily"
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: autofix.ci
2+
on:
3+
workflow_call:
4+
pull_request:
5+
push:
6+
branches: [ "main" ]
7+
permissions:
8+
contents: read
9+
10+
jobs:
11+
autofix:
12+
runs-on: ubuntu-latest
13+
steps:
14+
- uses: actions/checkout@v5
15+
- uses: actions/cache@v5
16+
with:
17+
path: |
18+
~/.cargo/bin/
19+
~/.cargo/registry/index/
20+
~/.cargo/registry/cache/
21+
~/.cargo/git/db/
22+
target/
23+
key: autofix-${{ hashFiles('**/Cargo.lock') }}
24+
25+
- run: rustup toolchain install ${{ env.rust_clippy }} --profile minimal --component rustfmt --component clippy
26+
- run: rustup default ${{ env.rust_clippy }}
27+
28+
- run: cargo clippy --fix --workspace
29+
- run: cargo fmt --all
30+
31+
- uses: autofix-ci/action@7a166d7532b277f34e16238930461bf77f9d7ed8
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
on:
2+
pull_request:
3+
branches:
4+
- "**"
5+
push:
6+
branches:
7+
- "**"
8+
9+
name: Continuous integration
10+
11+
jobs:
12+
check:
13+
name: Check
14+
strategy:
15+
matrix:
16+
os: [ubuntu-latest, windows-latest, macos-latest]
17+
runs-on: ${{ matrix.os }}
18+
steps:
19+
- uses: actions/checkout@v5
20+
- uses: dtolnay/rust-toolchain@stable
21+
- uses: Swatinem/rust-cache@v2
22+
- run: cargo check --verbose
23+
24+
test:
25+
name: Test Suite
26+
strategy:
27+
matrix:
28+
os: [ubuntu-latest, windows-latest, macos-latest]
29+
features:
30+
- ""
31+
- "index-positions"
32+
- "prohibit-unsafe"
33+
- "utf16"
34+
- "index-positions,prohibit-unsafe"
35+
- "index-positions,utf16"
36+
- "prohibit-unsafe,utf16"
37+
- "index-positions,prohibit-unsafe,utf16"
38+
runs-on: ${{ matrix.os }}
39+
steps:
40+
- uses: actions/checkout@v5
41+
- uses: dtolnay/rust-toolchain@stable
42+
- uses: Swatinem/rust-cache@v2
43+
- run: cargo test --features "${{ matrix.features }}" --verbose
44+
45+
clippy:
46+
name: Clippy
47+
runs-on: ubuntu-latest
48+
steps:
49+
- uses: actions/checkout@v5
50+
- uses: dtolnay/rust-toolchain@stable
51+
with:
52+
components: clippy
53+
- uses: Swatinem/rust-cache@v2
54+
- run: cargo clippy --all-targets --features "backend-pikevm,std,index-positions,prohibit-unsafe,utf16" -- -D warnings
55+
56+
test-nightly:
57+
name: Test (Nightly)
58+
strategy:
59+
matrix:
60+
os: [ubuntu-latest, windows-latest, macos-latest]
61+
runs-on: ${{ matrix.os }}
62+
steps:
63+
- uses: actions/checkout@v5
64+
- uses: dtolnay/rust-toolchain@nightly
65+
- uses: Swatinem/rust-cache@v2
66+
- run: cargo test --all-features --verbose
67+
68+
doc:
69+
name: Documentation
70+
runs-on: ubuntu-latest
71+
steps:
72+
- uses: actions/checkout@v5
73+
- uses: dtolnay/rust-toolchain@stable
74+
- uses: Swatinem/rust-cache@v2
75+
- run: cargo doc --verbose --document-private-items
76+
env:
77+
RUSTDOCFLAGS: -D warnings

regress-patched/.gitignore

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/target
2+
**/*.rs.bk
3+
Cargo.lock
4+
.vscode
5+
.vscode/*
6+
.DS_Store
7+
regexp-pcre.js
8+
9+
.claude/
10+
CLAUDE.md
11+
12+
# Unicode Database Files
13+
DerivedCoreProperties.txt
14+
DerivedBinaryProperties.txt
15+
DerivedGeneralCategory.txt
16+
DerivedNormalizationProps.txt
17+
CaseFolding.txt
18+
emoji-data.txt
19+
PropList.txt
20+
Scripts.txt

regress-patched/Cargo.toml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
[package]
2+
name = "regress"
3+
version = "0.11.0"
4+
authors = ["ridiculousfish <corydoras@ridiculousfish.com>"]
5+
description = "A regular expression engine targeting EcmaScript syntax"
6+
license = "MIT OR Apache-2.0"
7+
repository = "https://github.com/ridiculousfish/regress"
8+
keywords = ["regex", "regexp"]
9+
edition = "2024"
10+
readme = "README.md"
11+
12+
[workspace]
13+
members = ["regress-tool", "gen-unicode", "."]
14+
default-members = ["regress-tool", "."]
15+
16+
[profile.release]
17+
18+
[features]
19+
default = ["backend-pikevm", "std"]
20+
21+
std = ["memchr/std"]
22+
23+
# Enables the PikeVM backend.
24+
backend-pikevm = []
25+
26+
# Prefers indexes to pointers for bytecode IP and string positions, for the paranoid.
27+
index-positions = []
28+
29+
# Prohibits all uses of unsafe code, for the paranoid.
30+
prohibit-unsafe = []
31+
32+
# Enables UTF-16 support. This disables some optimizations, so it should only be used when necessary.
33+
utf16 = []
34+
35+
# Enables Pattern trait implementation for str::find, str::contains, etc.
36+
pattern = []
37+
38+
[dependencies]
39+
hashbrown = "0.16.0"
40+
memchr = { version = "2.4.0", default-features = false }

0 commit comments

Comments
 (0)