@@ -458,7 +458,7 @@ pub fn scan_and_mark_chemistry(mathml: Element) -> bool {
458458 }
459459 }
460460 }
461- debug ! ( "...after marking:\n {}" , mml_to_string( child) ) ;
461+ // debug!("...after marking:\n{}", mml_to_string(child));
462462
463463 if child. attribute ( CHEM_FORMULA ) . is_none ( ) && child. attribute ( CHEM_EQUATION ) . is_none ( ) {
464464 if !has_maybe_chemistry ( mathml) {
@@ -590,48 +590,52 @@ fn is_changed_after_unmarking_chemistry(mathml: Element) -> bool {
590590 // could be no preceding children to canonicalization creating mrows (see issue #303), so might need to use parent, etc
591591 while preceding_children. is_empty ( ) {
592592 preceding_children = parent. preceding_siblings ( ) ;
593- parent = get_parent ( parent) ;
594593 if name ( parent) == "math" {
595- panic ! ( "is_changed_after_unmarking_chemistry: error no preceding children to merge. mathml= \n {}" , mml_to_string ( mathml ) ) ;
594+ break ; // consider {SIN}^{-1} -- no preceding child
596595 }
596+ parent = get_parent ( parent) ;
597597 }
598598
599- // deal with the first element (if it needs unwrapping, it has only prescripts)
600- let first_element_of_split = as_element ( preceding_children[ preceding_children. len ( ) -1 ] ) ;
601- // debug!("first_element_of_split: \n{}", mml_to_string(first_element_of_split));
602- if name ( first_element_of_split) == "mmultiscripts" {
603- // take the base and make it the first child of preceding_children (what will get merged)
604- // put the rest of the elements (the prescripts) at the end of the parent last element (mathml) which must be an mmultiscripts
605- let first_element_children = first_element_of_split. children ( ) ;
606- assert_eq ! ( name( mathml) , "mmultiscripts" ) ;
607- let mut script_children = mathml. children ( ) ;
608- assert_eq ! ( name( as_element( script_children[ 0 ] ) ) , "mi" ) ;
609- assert ! ( !script_children. len( ) . is_multiple_of( 2 ) ) ; // doesn't have <mprescripts/>
610- script_children. push ( first_element_children[ 1 ] ) ; // mprescripts
611- script_children. push ( first_element_children[ 2 ] ) ; // prescripts subscript
612- script_children. push ( first_element_children[ 3 ] ) ; // prescripts superscript
613-
614- let base_of_first_element = first_element_children[ 0 ] ; // base
615- assert_eq ! ( name( as_element( base_of_first_element) ) , "mi" ) ;
616- let script_base = as_element ( script_children[ 0 ] ) ;
617- let mut merged_base_text = as_text ( as_element ( base_of_first_element) ) . to_string ( ) ;
618- merged_base_text. push_str ( as_text ( script_base) ) ;
619- script_base. set_text ( & merged_base_text) ;
620- script_base. remove_attribute ( "mathvariant" ) ;
621- script_base. remove_attribute ( ADDED_ATTR_VALUE ) ;
622- script_base. remove_attribute ( MAYBE_CHEMISTRY ) ;
623- script_base. remove_attribute ( SPLIT_TOKEN ) ;
624- mathml. replace_children ( script_children) ;
625-
626- first_element_of_split. remove_from_parent ( ) ;
627- return true ;
599+ let mut new_script_children = vec ! [ ] ;
600+ if !preceding_children. is_empty ( ) {
601+ // deal with the first element (if it needs unwrapping, it has only prescripts)
602+ let first_element_of_split = as_element ( preceding_children[ preceding_children. len ( ) -1 ] ) ;
603+ // debug!("first_element_of_split: \n{}", mml_to_string(first_element_of_split));
604+ if name ( first_element_of_split) == "mmultiscripts" {
605+ // take the base and make it the first child of preceding_children (what will get merged)
606+ // put the rest of the elements (the prescripts) at the end of the parent last element (mathml) which must be an mmultiscripts
607+ let first_element_children = first_element_of_split. children ( ) ;
608+ assert_eq ! ( name( mathml) , "mmultiscripts" ) ;
609+ let mut script_children = mathml. children ( ) ;
610+ assert_eq ! ( name( as_element( script_children[ 0 ] ) ) , "mi" ) ;
611+ assert ! ( !script_children. len( ) . is_multiple_of( 2 ) ) ; // doesn't have <mprescripts/>
612+ script_children. push ( first_element_children[ 1 ] ) ; // mprescripts
613+ script_children. push ( first_element_children[ 2 ] ) ; // prescripts subscript
614+ script_children. push ( first_element_children[ 3 ] ) ; // prescripts superscript
615+
616+ let base_of_first_element = first_element_children[ 0 ] ; // base
617+ assert_eq ! ( name( as_element( base_of_first_element) ) , "mi" ) ;
618+ let script_base = as_element ( script_children[ 0 ] ) ;
619+ let mut merged_base_text = as_text ( as_element ( base_of_first_element) ) . to_string ( ) ;
620+ merged_base_text. push_str ( as_text ( script_base) ) ;
621+ script_base. set_text ( & merged_base_text) ;
622+ script_base. remove_attribute ( "mathvariant" ) ;
623+ script_base. remove_attribute ( ADDED_ATTR_VALUE ) ;
624+ script_base. remove_attribute ( MAYBE_CHEMISTRY ) ;
625+ script_base. remove_attribute ( SPLIT_TOKEN ) ;
626+ mathml. replace_children ( script_children) ;
627+
628+ first_element_of_split. remove_from_parent ( ) ;
629+ return true ;
630+ }
631+ new_script_children. push ( ChildOfElement :: Element ( first_element_of_split) ) ;
628632 }
633+ debug ! ( "mathml after handling preceding children:\n {}" , mml_to_string( mathml) ) ;
629634 let mut children_of_script = mathml. children ( ) ;
630635 let split_child = as_element ( children_of_script[ 0 ] ) ;
631- let mut new_script_children = vec ! [ ChildOfElement :: Element ( first_element_of_split) ] ;
632636 new_script_children. append ( & mut children_of_script) ;
633637 mathml. replace_children ( new_script_children) ; // temporarily has bad number of children
634- // debug!("After making bad script:\n{}", mml_to_string(mathml));
638+ debug ! ( "After making bad script:\n {}" , mml_to_string( mathml) ) ;
635639 if let Err ( err) = merge_element ( split_child) {
636640 panic ! ( "{}" , err) ;
637641 }
@@ -1370,7 +1374,7 @@ pub fn likely_adorned_chem_formula(mathml: Element) -> isize {
13701374
13711375 let mut empty_superscript = false ;
13721376 if tag_name == "msup" || tag_name == "msubsup" {
1373- // debug!("likely_adorned_chem_formula: mathml\n{}", mml_to_string(mathml));
1377+ debug ! ( "likely_adorned_chem_formula: mathml\n {}" , mml_to_string( mathml) ) ;
13741378 let superscript = as_element ( children[ if tag_name == "msup" { 1 } else { 2 } ] ) ;
13751379 empty_superscript = name ( superscript) == "mtext" && as_text ( superscript) . trim ( ) . is_empty ( ) ;
13761380 if !empty_superscript {
@@ -1411,25 +1415,30 @@ pub fn likely_adorned_chem_formula(mathml: Element) -> isize {
14111415 if is_adorned_electron ( children[ 0 ] , prescripts) {
14121416 return 100 ; // very likely chemistry
14131417 }
1414-
1418+ let base = as_element ( children[ 0 ] ) ;
1419+ let base_name = name ( base) ;
1420+ let atomic_number = if matches ! ( base_name, "mi" | "mtext" ) &&
1421+ let Some ( atomic_number) = CHEMICAL_ELEMENT_ATOMIC_NUMBER . get ( as_text ( base) ) {
1422+ * atomic_number
1423+ } else {
1424+ return NOT_CHEMISTRY ;
1425+ } ;
14151426 if pre_superscript_name == "mo" {
14161427 // Lewis dot prescript case
14171428 if pre_subscript_name != "none" {
14181429 return NOT_CHEMISTRY ;
14191430 }
14201431 likelihood += likely_chem_superscript ( pre_superscript) ;
14211432 } else if pre_superscript_name == "mn" { // must have a pre-superscript (neutrons + protons)
1422- // fix could make sure they are integers
1423- likelihood += 1 ; // looking like an atomic number
1424- if pre_subscript_name == "mn" {
1425- // make sure the atomic number matches the base
1426- let base = as_element ( children[ 0 ] ) ;
1427- let base_name = name ( base) ;
1428- if ( base_name == "mi" || base_name == "mtext" ) &&
1429- let Some ( atomic_number) = CHEMICAL_ELEMENT_ATOMIC_NUMBER . get ( as_text ( base) ) &&
1430- as_text ( pre_subscript) == atomic_number. to_string ( ) {
1431- likelihood = CHEMISTRY_THRESHOLD ;
1432- }
1433+ if let Some ( mass) = as_text ( pre_superscript) . parse :: < u32 > ( ) . ok ( ) {
1434+ // "drip line" is 1.5 * mass < 3.5 * mass -- it is possible to outside of this range, but VERY unlikely
1435+ // to avoid floating point, we multiply by 2 and compare to 3 and 7
1436+ if 3 * atomic_number < 2 * mass && 2 * mass < 7 * atomic_number {
1437+ likelihood += 3 ;
1438+ }
1439+ }
1440+ if pre_subscript_name == "mn" && as_text ( pre_subscript) == atomic_number. to_string ( ) {
1441+ likelihood = CHEMISTRY_THRESHOLD ;
14331442 }
14341443 } else {
14351444 return NOT_CHEMISTRY ;
@@ -1476,6 +1485,7 @@ pub fn likely_adorned_chem_formula(mathml: Element) -> isize {
14761485 likelihood += likely_chem_formula ( base) ;
14771486 }
14781487
1488+ debug ! ( "returning from likely_adorned_chem_formula: likelihood={}, mathml\n {}" , likelihood, mml_to_string( mathml) ) ;
14791489 return likelihood;
14801490
14811491
@@ -2803,14 +2813,14 @@ mod chem_tests {
28032813 </mrow>
28042814 </mrow>
28052815 </math>" ;
2806- let target = "<math>
2807- <mmultiscripts data-previous-space-width='-0.083'>
2808- <mi mathvariant='normal'>U</mi>
2809- <mprescripts></mprescripts>
2810- <none/ >
2811- <mn>238</mn>
2816+ let target = " <math>
2817+ <mmultiscripts data-previous-space-width='-0.083' data-chem-formula='5' >
2818+ <mi mathvariant='normal' data-chem-element='2 '>U</mi>
2819+ <mprescripts></mprescripts>
2820+ <none></none >
2821+ <mn>238</mn>
28122822 </mmultiscripts>
2813- </math>" ;
2823+ </math>" ;
28142824 assert ! ( are_strs_canonically_equal( test, target, & [ ] ) ) ;
28152825 }
28162826
@@ -3024,15 +3034,15 @@ mod chem_tests {
30243034 </mrow>
30253035 </math>" ;
30263036 let target = "<math>
3027- <mrow data-chem-formula='7 '>
3028- <mmultiscripts data-previous-space-width='-0.083' data-chem-formula='3 '>
3037+ <mrow data-chem-formula='11 '>
3038+ <mmultiscripts data-previous-space-width='-0.083' data-chem-formula='5 '>
30293039 <mi mathvariant='normal' data-chem-element='2'>O</mi>
30303040 <mprescripts></mprescripts>
30313041 <none></none>
30323042 <mn>18</mn>
30333043 </mmultiscripts>
30343044 <mo data-changed='added' data-chem-formula-op='0'>⁣</mo>
3035- <mmultiscripts data-previous-space-width='0.027999999999999997' data-chem-formula='3 '>
3045+ <mmultiscripts data-previous-space-width='0.027999999999999997' data-chem-formula='5 '>
30363046 <mi mathvariant='normal' data-chem-element='2'>O</mi>
30373047 <mprescripts></mprescripts>
30383048 <none></none>
0 commit comments