diff --git a/README.md b/README.md index a879ff5..b88f0bb 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,15 @@ The engine should support every construction the parser does (see below). Currently the only font set supported is Computer Modern. +# Advanced Customization + +There is a reference to a boolean flag for enabling/disabling a heuristic that is meant to remove spacing from binary operator symbols if they are +used as unary prefixes instead: +```julia +MathTeXEngine.UNSPACE_BINARY_OPERATORS_HEURISTIC[] = true # default +``` +If the flag is set to `true`, `L"+1"` should not have additional spacing around `+` while `L"1+2"` has. + ## Engine examples ### Basic examples diff --git a/src/MathTeXEngine.jl b/src/MathTeXEngine.jl index ffb03f3..91da575 100644 --- a/src/MathTeXEngine.jl +++ b/src/MathTeXEngine.jl @@ -28,6 +28,13 @@ export glyph_index # Reexport from LaTeXStrings export @L_str +# Global settings flags + +## parser flag to indicate whether or not to (try to) remove space for +## binary operator symbols if they are used as unary prefixes +## (only in math mode) +const UNSPACE_BINARY_OPERATORS_HEURISTIC = Ref(true) + include("parser/tokenizer.jl") include("parser/texexpr.jl") include("parser/commands_data.jl") diff --git a/src/parser/parser.jl b/src/parser/parser.jl index ed13f3a..42bc780 100644 --- a/src/parser/parser.jl +++ b/src/parser/parser.jl @@ -59,7 +59,9 @@ end show_stack(stack) = show_stack(stdout, stack) -function push_down!(stack) +function push_down!(stack, inside_math=false) + global UNSPACE_BINARY_OPERATORS_HEURISTIC + top = pop!(stack) if head(top) == :group # Replace empty groups by 0 spaces @@ -70,6 +72,24 @@ function push_down!(stack) top = only(top.args) end end + if UNSPACE_BINARY_OPERATORS_HEURISTIC[] && inside_math + if head(top) == :spaced + # for `:spaced` expressions (binary operators mainly) inspect what comes before + undo_spacing = false + if isempty(stack) + undo_spacing = true + else + prev = first(stack) + if !(_is_plausible_left_arg(prev)) + # if prior element is not argument for a binary operator, then remove symmetric spacing + undo_spacing = true + end + end + if undo_spacing + top = only(top.args) + end + end + end push!(first(stack), top) if head(first(stack)) in [:subscript, :superscript] @@ -79,10 +99,10 @@ function push_down!(stack) push!(first(stack).args, decorated) end - conclude_command!!(stack) + conclude_command!!(stack, inside_math) end -function conclude_command!!(stack) +function conclude_command!!(stack, inside_math=false) com = first(stack) head(com) != :command && return false nargs = length(com.args) - 1 @@ -90,7 +110,7 @@ function conclude_command!!(stack) if required_args(first(com.args)) == nargs pop!(stack) push!(stack, command_expr(com.args[1], com.args[2:end])) - push_down!(stack) + push_down!(stack, inside_math) end end @@ -145,7 +165,7 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) if token == dollar if head(first(stack)) == :inline_math inside_math = false - push_down!(stack) + push_down!(stack, inside_math) else inside_math = true push!(stack, TeXExpr(:inline_math)) @@ -155,7 +175,7 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) throw(TeXParseError("unexpected new line", stack, length(tex), tex)) end - push_down!(stack) + push_down!(stack, inside_math) push!(stack, TeXExpr(:line)) elseif token == lcurly push!(stack, TeXExpr(:group)) @@ -163,7 +183,7 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) if head(first(stack)) != :group throw(TeXParseError("missing closing '}'", stack, pos, tex)) end - push_down!(stack) + push_down!(stack, inside_math) elseif token == left push!(stack, TeXExpr(:delimited, delimiter(raw"\left", tex[pos:pos+len-1]))) elseif token == right @@ -183,7 +203,7 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) elseif token == command com_str = tex[pos:pos+len-1] push!(stack, TeXExpr(:command, [com_str])) - conclude_command!!(stack) + conclude_command!!(stack, inside_math) elseif token == underscore || token == caret || token == primes dec = (token == underscore) ? :subscript : :superscript @@ -222,8 +242,9 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) else expr = canonical_expr(c) end + push!(stack, expr) - push_down!(stack) + push_down!(stack, inside_math) end catch err throw(TeXParseError("unexpected error", stack, pos, tex)) @@ -231,7 +252,7 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) end if head(first(stack)) == :line - push_down!(stack) + push_down!(stack, inside_math) end if length(stack) > 1 @@ -245,3 +266,31 @@ function texparse(tex ; root = TeXExpr(:lines), showdebug = false) return lines end end + +function _is_plausible_left_arg(texpr) + if head(texpr) in (:punctuation, :space) + ## punctuation or explicit spacing likely does not precede symbol for binary op + return false + elseif head(texpr) in (:function, :integral, :underover) + ## function without parentheses `\sin +1` + ## integral sign `∫ -1` + ## other unary symbols `∑ ± 1` + return false + elseif head(texpr) in (:superscript, :subscript) + ## sub- or superscripts without parenthesis + return false + elseif head(texpr) == :delimiter + ## beginning of parentheses group + if length(texpr.args)==1 && texpr.args[1] in ('(', '[', '<') + return false + end + elseif head(texpr) in (:inline_math, :group, :delimited) + ## look at last element within group expressions + if isempty(texpr.args) + return false # consistent with TeXExpr(:space, 0) in case of :group + else + return _is_plausible_left_arg(last(texpr.args)) + end + end + return true +end \ No newline at end of file diff --git a/test/parser.jl b/test/parser.jl index 6370ec3..fb663df 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -187,6 +187,91 @@ end (:char, 'd'))) end + @testset "Unspaced symbol" begin + MathTeXEngine.UNSPACE_BINARY_OPERATORS_HEURISTIC[] = true + ## keep space in binary operations + test_parse(raw"$2-1$", ( + :inline_math, + (:digit, '2'), + (:spaced, (:symbol, '−')), + (:digit, '1') + )) + ## but remove if used as unary symbol + test_parse(raw"$-1$", ( + :inline_math, + (:symbol, '−'), + (:digit, '1') + )) + ## same for spaced commands + test_parse(raw"$2\pm1$", ( + :inline_math, + (:digit, '2'), + (:spaced, (:symbol, '±')), + (:digit, '1') + )) + test_parse(raw"$\pm1$", ( + :inline_math, + (:symbol, '±'), + (:digit, '1') + )) + + ## within parentheses: + test_parse(raw"$(2-1)$", ( + :inline_math, + (:delimiter, "("), + (:digit, '2'), + (:spaced, (:symbol, '−')), + (:digit, '1'), + (:delimiter, ")"), + )) + test_parse(raw"$(-1)$", ( + :inline_math, + (:delimiter, "("), + (:symbol, '−'), + (:digit, '1'), + (:delimiter, ")"), + )) + ## zero space/empty group removes binary spacing + test_parse(raw"$(2{}-1)$", ( + :inline_math, + (:delimiter, "("), + (:digit, '2'), + (:space, 0.0), + (:symbol, '−'), + (:digit, '1'), + (:delimiter, ")"), + )) + ## exponents + test_parse(raw"$a^+$", ( + :inline_math, + (:decorated, + (:char, 'a'), + nothing, + (:symbol, '+') + ) + )) + test_parse(raw"$a^{+}$", ( + :inline_math, + (:decorated, + (:char, 'a'), + nothing, + (:symbol, '+') + ) + )) + test_parse(raw"$a^{1+2}$", ( + :inline_math, + (:decorated, + (:char, 'a'), + nothing, + (:group, + (:digit, '1'), + (:spaced, (:symbol, '+')), + (:digit, '2'), + ) + ) + )) + end + @testset "Subscript and superscript" begin @test texparse(raw"a^2_3") == texparse(raw"a_3^2") @test texparse(raw"^7_b") == texparse(raw"{}^7_b")