Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion experimental/internal/lexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/bufbuild/protocompile/experimental/source"
"github.com/bufbuild/protocompile/experimental/token"
"github.com/bufbuild/protocompile/experimental/token/keyword"
"github.com/bufbuild/protocompile/internal/decimal"
"github.com/bufbuild/protocompile/internal/ext/stringsx"
)

Expand Down Expand Up @@ -109,7 +110,8 @@ type lexer struct {
cursor, count int
braces []token.ID
scratch []byte
scratchFloat *big.Float
scratchInt big.Int
Comment thread
doriable marked this conversation as resolved.
scratchDec *decimal.Decimal

// Used for determining longest runs of unrecognized tokens.
badBytes int
Expand Down
21 changes: 4 additions & 17 deletions experimental/internal/lexer/math.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,22 @@
package lexer

import (
"math"
"math/big"
"math/bits"

"github.com/bufbuild/protocompile/internal/ext/unicodex"
)

var log2Table = func() (logs [16]float64) {
for i := range logs {
logs[i] = math.Log2(float64(i + 1))
}
return logs
}()

type parseIntResult struct {
small uint64
big *big.Float
big *big.Int
hasThousands bool
}

// parseInt parses an integer into a uint64 or, on overflow, into a big.Int.
//
// This function ignores any thousands separator underscores in digits.
func parseInt(digits string, base byte) (result parseIntResult, ok bool) {
var bigBase, bigDigit *big.Float
for _, r := range digits {
if r == '_' {
result.hasThousands = true
Expand All @@ -61,16 +52,12 @@ func parseInt(digits string, base byte) (result parseIntResult, ok bool) {
}

// We overflowed, so we need to spill into a big.Float.
result.big = new(big.Float)
result.big = new(big.Int)
result.big.SetUint64(result.small)

bigBase = new(big.Float).SetUint64(uint64(base)) // Memoize converting the base.
bigDigit = new(big.Float)
}

result.big.Mul(result.big, bigBase)
result.big.Add(result.big, bigDigit.SetUint64(uint64(digit)))
result.big.Mul(result.big, big.NewInt(int64(base)))
result.big.Add(result.big, big.NewInt(int64(digit)))
}

return result, true
}
91 changes: 23 additions & 68 deletions experimental/internal/lexer/number.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,15 @@ package lexer
import (
"fmt"
"math"
"math/big"
"regexp"
"strconv"
"strings"
"unicode"

"github.com/bufbuild/protocompile/experimental/internal/errtoken"
"github.com/bufbuild/protocompile/experimental/internal/taxa"
"github.com/bufbuild/protocompile/experimental/internal/tokenmeta"
"github.com/bufbuild/protocompile/experimental/token"
"github.com/bufbuild/protocompile/internal/decimal"
"github.com/bufbuild/protocompile/internal/ext/unicodex"
"github.com/bufbuild/protocompile/internal/ext/unsafex"
)
Expand Down Expand Up @@ -145,11 +144,11 @@ func lexNumber(l *lexer) token.Token {
result, ok := parseInt(digits, base)
switch {
case !ok:
if l.scratchFloat == nil {
l.scratchFloat = new(big.Float)
if l.scratchDec == nil {
l.scratchDec = new(decimal.Decimal)
}

v := l.scratchFloat
v := l.scratchDec
meta := token.MutateMeta[tokenmeta.Number](tok)

// Convert legacyOctal values that are *not* pure integers into decimal
Expand All @@ -162,56 +161,14 @@ func lexNumber(l *lexer) token.Token {
meta.IsFloat = strings.ContainsAny(digits, ".-") // Positive exponents are not necessarily floats.
meta.ThousandsSep = strings.Contains(digits, "_")

// Wrapper over big.Float.Parse that ensures we never round. big.Float
// does not have a parse mode that uses maximum precision for that
// input, so we infer the correct precision from the size of the
// mantissa and exponent.
parse := func(v *big.Float, digits string) (*big.Float, error) {
n := tok.AsNumber()
mant := n.Mantissa().Text()
exp := n.Exponent().Text()

// Convert digits into binary digits. Note that digits in base b
// is log_b(n). Note that, per the log base change formula:
//
// log_2(n) = log_b(n)/log_b(2)
// log_b(2) = log_2(2)/log_2(b) = 1/log_2(b)
//
// Thus, we want to multiply by log_2(b), which we precompute in
// a table.
prec := float64(len(mant)) * log2Table[base-1]
// If there is an exponent, add it to the precision.
if exp != "" {
// Convert to the right base and add it to prec.
exp, _ := strconv.ParseInt(exp, 0, 64)
prec += math.Abs(float64(exp)) * log2Table[expBase-1]
}

v.SetPrec(uint(math.Ceil(prec)))
_, _, err := v.Parse(digits, 0)
return v, err
}

var err error
switch base {
case 10:
match := decFloat.FindStringSubmatch(digits)
if match == nil {
if !decFloat.MatchString(digits) {
goto fail
}

if expBase != 2 {
v, err = parse(v, digits)
break
}

v, err = parse(v, match[1])
exp, err := strconv.ParseInt(match[3], 10, 64)
if err != nil {
exp = math.MaxInt
}
exp += int64(v.MantExp(nil))
v.SetMantExp(v, int(exp))
v, err = v.Parse(digits)

case 16:
if !hexFloat.MatchString(digits) {
Expand All @@ -223,7 +180,7 @@ func lexNumber(l *lexer) token.Token {
l.scratch = append(l.scratch, digits...)
digits := unsafex.StringAlias(l.scratch)

v, err = parse(v, digits)
v, err = v.Parse(digits)

default:
goto fail
Expand All @@ -233,36 +190,34 @@ func lexNumber(l *lexer) token.Token {
goto fail
}

// We want this to overflow to Infinity as needed, which ParseFloat
// We want this to overflow to Infinity as needed, which Float64
// will do for us. Otherwise it will ties-to-even as the
// protobuf.com spec requires.
//
// ParseFloat itself says it "returns the nearest floating-point
// number rounded using IEEE754 unbiased rounding", which is just a
// weird, non-standard way to say "ties-to-even".
if meta.IsFloat {
f64, acc := v.Float64()
if acc != big.Exact {
meta.Big = v
l.scratchFloat = nil
} else {
meta.Word = math.Float64bits(f64)
l.scratchFloat.SetUint64(0)
switch {
case !meta.IsFloat && v.IsInt():
n := v.Int(&l.scratchInt)
if n.IsUint64() {
meta.Word = n.Uint64()
return tok
}
} else {
u64, acc := v.Uint64()
if acc != big.Exact {
meta.Big = v
l.scratchFloat = nil
} else {
meta.Word = u64
l.scratchFloat.SetUint64(0)
case v.IsFloat():

if f, exact := v.Float64(); exact {
meta.Word = math.Float64bits(f)
return tok
}
}

meta.Big = v
l.scratchDec = nil
return tok

case result.big != nil:
token.MutateMeta[tokenmeta.Number](tok).Big = result.big
token.MutateMeta[tokenmeta.Number](tok).Big = new(decimal.Decimal).ReuseInt(result.big)

case base == 10 && !result.hasThousands:
// We explicitly do not call SetValue for the most common case of base
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
17 Space Unknown 122:123 009:005 "\n"
18 Number Unknown 123:125 010:001 "08" int:0/8/1 pre:"0"
19 Space Unknown 125:126 010:003 "\n"
20 Number Unknown 126:130 011:001 "08.9" fp:8.900390625/10/1
20 Number Unknown 126:130 011:001 "08.9" fp:8.9/10/1
21 Space Unknown 130:131 011:005 "\n"
22 Number Unknown 131:135 012:001 "0x0G" int:0/16/1 pre:"0x"
23 Space Unknown 135:136 012:005 "\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@
19 Space Unknown 130:131 010:003 "\n"
20 Number Unknown 131:134 011:001 "0.0" fp:0/10/1
21 Space Unknown 134:135 011:004 "\n"
22 Number Unknown 135:139 012:001 ".123" fp:0.1230010986328125/10/1
22 Number Unknown 135:139 012:001 ".123" fp:0.123/10/1
23 Space Unknown 139:140 012:005 "\n"
24 Number Unknown 140:147 013:001 "555.555" fp:555.5549926757812/10/1
24 Number Unknown 140:147 013:001 "555.555" fp:555.555/10/1
25 Space Unknown 147:148 013:008 "\n"
26 Number Unknown 148:157 014:001 "1.234e-12" fp:1.234e-12/10/10
27 Space Unknown 157:158 014:010 "\n"
28 Number Unknown 158:165 015:001 ".953e20" fp:9.53e+19/10/10
28 Number Unknown 158:165 015:001 ".953e20" fp:95300000000000000000/10/10
29 Space Unknown 165:166 015:008 "\n"
30 Number Unknown 166:171 016:001 "5E+40" int:50000000000000000000000000000000000000000/10/10
31 Space Unknown 171:172 016:006 "\n"
32 Number Unknown 172:177 017:001 "1p100" int:2535301200456458802993406410752/10/2
32 Number Unknown 172:177 017:001 "1p100" int:1267650600228229401496703205376/10/2
33 Space Unknown 177:178 017:006 "\n"
34 Number Unknown 178:188 018:001 "0x1.fffp10" fp:2047.75/16/2 pre:"0x"
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# kind keyword offsets linecol text
0 Number Unknown 000:034 001:001 "0xffffffffffffffffffffffffffffffff" int:340282366920938463463374607431768211456/16/1 pre:"0x"
0 Number Unknown 000:034 001:001 "0xffffffffffffffffffffffffffffffff" int:340282366920938463463374607431768211455/16/1 pre:"0x"
7 changes: 2 additions & 5 deletions experimental/internal/tokenmeta/meta.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package tokenmeta

import (
"math/big"
"github.com/bufbuild/protocompile/internal/decimal"
)

// Meta is a type defined in this package.
Expand All @@ -26,10 +26,7 @@ type Meta interface{ meta() }
type Number struct {
// Inlined storage for small int/float values.
Word uint64

// big.Float can represent any uint64 or float64 (except NaN), and any
// *big.Int, too.
Big *big.Float
Big *decimal.Decimal

// Length of a prefix or suffix on this integer.
// The prefix is the base prefix; the suffix is any identifier
Expand Down
3 changes: 2 additions & 1 deletion experimental/ir/lower_eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/bufbuild/protocompile/experimental/source"
"github.com/bufbuild/protocompile/experimental/token"
"github.com/bufbuild/protocompile/experimental/token/keyword"
"github.com/bufbuild/protocompile/internal/decimal"
"github.com/bufbuild/protocompile/internal/ext/iterx"
"github.com/bufbuild/protocompile/internal/ext/slicesx"
"github.com/bufbuild/protocompile/internal/ext/stringsx"
Expand Down Expand Up @@ -878,7 +879,7 @@ func (e *evaluator) checkIntBounds(args evalArgs, signed bool, bits int, neg boo
switch n := got.(type) {
case uint64:
v = n
case *big.Float:
case *decimal.Decimal:
// We assume that a big.Float is always larger than a uint64.
tooLarge = true
default:
Expand Down
4 changes: 2 additions & 2 deletions experimental/ir/testdata/options/values/float.proto.fds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ file:
}
2: {
`0000000000000000000000000000000000000000000000800000000000000080000000205fa00242`
`0000d8d9df7cdb3d0000000000002440000000000000f07f000000000000f0ff000000000000f87f`
`bbbdd7d9df7cdb3d0000000000002440000000000000f07f000000000000f0ff000000000000f87f`
`000000000000f8ff000000000000f07f000000000000f0ff000000000000f87f000000000000f8ff`
`182d4454fb210940ffffffffffffef7f0000000000001000000000000000b03c000000000000f07f`
`182d4454fb210940feffffffffffef7f0000000000001000000000000000b03c000000000000f07f`
`000000000000f0ff`
}
}
Expand Down
2 changes: 1 addition & 1 deletion experimental/parser/legalize_option.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ func legalizeLiteral(p *parser, value ast.ExprLiteral) {
if what == taxa.Int {
switch base {
case 2:
v, _ := n.Value().Int(nil)
v := n.Value().Int(nil)
d.Apply(
report.SuggestEdits(value, "use a hexadecimal literal instead", report.Edit{
Start: 0,
Expand Down
5 changes: 2 additions & 3 deletions experimental/parser/testdata/parser/option/values.proto.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ decls:
- def:
kind: KIND_OPTION
name.components: [{ ident: "x" }]
value.literal.float_value: 42.3984375
value.literal.float_value: 42.4
- def:
kind: KIND_OPTION
name.components: [{ ident: "x" }]
Expand Down Expand Up @@ -94,8 +94,7 @@ decls:
name.components: [{ ident: "x" }]
value.dict.entries:
- { key.path.components: [{ ident: "x" }], value.literal.int_value: 0 }
- key.path.components: [{ ident: "x" }]
value.literal.float_value: 42.3984375
- { key.path.components: [{ ident: "x" }], value.literal.float_value: 42.4 }
- key.path.components: [{ ident: "x" }]
value.path.components: [{ ident: "inf" }]
- key.path.components: [{ ident: "x" }]
Expand Down
Loading
Loading