Skip to content

Commit 01575c3

Browse files
committed
canonicalize: rewrote merge_whitespace. The main thing is that it preferentially moves whitespace onto mtext nodes if it is adjacent to an mtext node.
Nemeth_Rules.yaml: changed the rules for fill in the blank to not using the "fill in the blank" symbol for whitespace around mtext because people often add it for spacing around some connective word. Fixes daisy#468
1 parent 7ed19c0 commit 01575c3

File tree

3 files changed

+111
-29
lines changed

3 files changed

+111
-29
lines changed

Rules/Braille/Nemeth/Nemeth_Rules.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,16 @@
88
variables: [MatchingWhitespace: "true()"]
99
replace:
1010
- test:
11-
- if: "@data-previous-space-width > 1.1"
11+
# people sometimes use mtext for spaces around text -- see no_omission_for_spaces_in_middle()
12+
- if: "@data-previous-space-width > 1.1 and not(self::m:mtext)"
1213
then: [t: "⠿"]
1314
# non-breaking space width; also avoid spaces added after commas in scripts (78_1)
1415
- else_if: "@data-previous-space-width >= 0.7 and
1516
($NewScriptContext='' or not(preceding-sibling::*[1][self::m:mo and .=',']))"
1617
then: [t: "W"]
1718
- x: "."
1819
- test:
19-
- if: "@data-following-space-width > 1.1"
20+
- if: "@data-following-space-width > 1.1 and not(self::m:mtext)"
2021
then: [t: "⠿"]
2122
# non-breaking space width; also avoid spaces added after commas in scripts (78_1)
2223
- else_if: "@data-following-space-width >= 0.7 and

src/canonicalize.rs

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1840,40 +1840,61 @@ impl CanonicalizeContext {
18401840
return;
18411841
}
18421842

1843-
let mut i = 1; // we look back at previous child if we detect appropriate mtext -- prev child will always exist
1844-
let mut previous_child = as_element(children[0]);
1845-
let mut is_previous_child_whitespace = name(previous_child) == "mtext" && as_text(previous_child) == "\u{00A0}";
1843+
let mut i = 0;
1844+
let mut previous_mtext_with_width: Option<Element<'_>> = None; // prefer to spacing on previous mtext
1845+
let mut whitespace: Option<f64> = None;
18461846
while i < children.len() {
18471847
let child = as_element(children[i]);
18481848
let is_child_whitespace = name(child) == "mtext" && as_text(child) == "\u{00A0}";
1849-
// debug!("merge_whitespace: {}", mml_to_string(child));
1850-
if is_child_whitespace && is_previous_child_whitespace {
1851-
// grab the width of the previous and current child, add them together
1852-
let previous_width = previous_child.attribute_value("data-width").unwrap_or("0");
1853-
let child_width = child.attribute_value("data-width").unwrap_or("0");
1854-
let width = previous_width.parse::<f64>().unwrap_or(0.0)
1855-
+ child_width.parse::<f64>().unwrap_or(0.0);
1856-
// set the combined width on the previous child and remove the current child (don't inc 'i')
1857-
previous_child.set_attribute_value("data-width", &width.to_string());
1858-
children.remove(i);
1859-
// previous child is unchanged
1860-
} else if is_previous_child_whitespace {
1849+
debug!("merge_whitespace: i={}, whitespace={:?}, mtext set={} {}",
1850+
i, whitespace, previous_mtext_with_width.is_some(), mml_to_string(child));
1851+
if is_child_whitespace {
1852+
// update the running total of whitespace
1853+
let child_width = child.attribute_value("data-width").unwrap_or("0")
1854+
.parse::<f64>().unwrap_or(0.0) ;
1855+
whitespace = match whitespace {
1856+
None => Some(child_width),
1857+
Some(w) => Some(w + child_width),
1858+
};
1859+
if children.len() == 1 {
1860+
i += 1; // don't remove only child
1861+
} else {
1862+
children.remove(i); // remove the current child (don't inc 'i')
1863+
}
1864+
} else if let Some(ws) = whitespace {
18611865
// done with sequence of whitespaces
1862-
child.set_attribute_value("data-previous-space-width", previous_child.attribute_value("data-width").unwrap());
1863-
children.remove(i-1);
1864-
previous_child = child;
1865-
is_previous_child_whitespace = false;
1866+
if let Some(prev_mtext) = previous_mtext_with_width {
1867+
// prefer to set on previous mtext
1868+
prev_mtext.set_attribute_value("data-following-space-width", (ws).to_string().as_str());
1869+
previous_mtext_with_width = None;
1870+
} else {
1871+
child.set_attribute_value("data-previous-space-width", ws.to_string().as_str());
1872+
if name(child) == "mtext" {
1873+
previous_mtext_with_width = Some(child);
1874+
}
1875+
}
1876+
whitespace = None;
1877+
i += 1;
18661878
} else {
18671879
i += 1;
1868-
previous_child = child;
1869-
is_previous_child_whitespace = is_child_whitespace;
1880+
previous_mtext_with_width = None;
18701881
}
18711882
}
1872-
if children.len() > 1 && is_previous_child_whitespace {
1873-
// last child in mrow (= previous_child) is white space -- mark space *after*
1874-
let non_space_child = as_element(children[children.len()-2]);
1875-
non_space_child.set_attribute_value("data-following-space-width", previous_child.attribute_value("data-width").unwrap());
1876-
children.remove(children.len()-1);
1883+
debug!(" after loop: whitespace={:?}, {}", whitespace, mml_to_string(as_element(children[children.len()-1])));
1884+
if let Some(mut ws) = whitespace {
1885+
// last child in mrow is white space -- mark with space *after*
1886+
if children.len() == 1 {
1887+
// only child -- check to see if we need to set the space-width
1888+
let child = as_element(children[0]);
1889+
let child_width = child.attribute_value("data-width").unwrap_or("0").parse::<f64>().unwrap_or(0.0);
1890+
if (child_width - ws).abs() > 0.001 {
1891+
ws += child_width;
1892+
child.set_attribute_value("data-following-space-width", ws.to_string().as_str());
1893+
}
1894+
} else {
1895+
let non_space_child = as_element(children[children.len()-1]);
1896+
non_space_child.set_attribute_value("data-following-space-width", ws.to_string().as_str());
1897+
}
18771898
}
18781899
}
18791900

@@ -5572,7 +5593,7 @@ mod canonicalize_tests {
55725593
<mrow data-changed='added'>
55735594
<mi>cos</mi>
55745595
<mo data-changed='added'>&#x2061;</mo>
5575-
<mi data-previous-space-width='0.700'>x</mi>
5596+
<mi data-previous-space-width='0.7'>x</mi>
55765597
</mrow>
55775598
</math>";
55785599
assert!(are_strs_canonically_equal(test_str, target_str, &[]));

tests/braille/Nemeth/other.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,63 @@ fn find_baseline_indicator_bug_364() {
9393
};
9494
}
9595

96+
#[test]
97+
fn no_omission_for_spaces_at_start_or_end_single() {
98+
// http://github.com/TalkingCatSW/MathCAT/issues/468
99+
let expr = r#" <math><mtext> </mtext><mtext> </mtext><mi>x</mi><mtext> </mtext><mtext> </mtext></math>"#;
100+
test_braille("Nemeth", expr, "⠰⠭");
101+
}
102+
103+
#[test]
104+
fn no_omission_for_spaces_at_start() {
105+
// http://github.com/TalkingCatSW/MathCAT/issues/468
106+
let expr = r#"<math><mtext> </mtext><mtext> </mtext><mn>2</mn><mo>+</mo><mi>x</mi></math>"#;
107+
test_braille("Nemeth", expr, "⠼⠆⠬⠭");
108+
}
109+
110+
#[test]
111+
fn no_omission_for_spaces_at_end() {
112+
// http://github.com/TalkingCatSW/MathCAT/issues/468
113+
let expr = r#"<math><msup><mi>x</mi><mn>2</mn></msup><mo>+</mo><mn>9</mn>
114+
<mtext> </mtext><mtext> </mtext><mtext> </mtext><mtext> </mtext><mtext> </mtext></math>"#;
115+
test_braille("Nemeth", expr, "⠭⠘⠆⠐⠬⠔");
116+
}
117+
118+
#[test]
119+
fn no_omission_for_spaces_in_middle() {
120+
// http://github.com/TalkingCatSW/MathCAT/issues/468
121+
let expr = r#"<math xmlns="http://www.w3.org/1998/Math/MathML" display="block">
122+
<mstyle displaystyle="true" scriptlevel="0">
123+
<mfrac>
124+
<mn>1</mn>
125+
<mn>2</mn>
126+
</mfrac>
127+
</mstyle>
128+
<mo stretchy="false">(</mo>
129+
<mi>p</mi>
130+
<mo>+</mo>
131+
<mi>q</mi>
132+
<mo stretchy="false">)</mo>
133+
<mtext>&#xA0;</mtext>
134+
<mtext>&#xA0;</mtext>
135+
<mtext>&#xA0;</mtext>
136+
<mtext>&#xA0;</mtext>
137+
<mtext>or</mtext>
138+
<mtext>&#xA0;</mtext>
139+
<mtext>&#xA0;</mtext>
140+
<mtext>&#xA0;</mtext>
141+
<mtext>&#xA0;</mtext>
142+
<mstyle displaystyle="true" scriptlevel="0">
143+
<mfrac>
144+
<mrow>
145+
<mi>p</mi>
146+
<mo>+</mo>
147+
<mi>q</mi>
148+
</mrow>
149+
<mn>2</mn>
150+
</mfrac>
151+
</mstyle>
152+
</math>"#;
153+
test_braille("Nemeth", expr, "⠹⠂⠌⠆⠼⠷⠏⠬⠟⠾⠀⠕⠗⠀⠹⠏⠬⠟⠌⠆⠼");
154+
}
155+

0 commit comments

Comments
 (0)