Skip to content

Commit 11f7d8f

Browse files
authored
Refactor Float.round/2 algorithm for better performance (#15329)
The previous implementation noted the trade-off: ```elixir # This implementation is slow since it relies on big integers. # Faster implementations are available on more recent papers # and could be implemented in the future. ``` It computed `m * power_of_5(count)` where `count` could grow to ~104, producing 250-bit bignums for a single division. The cost grew with both float magnitude and target precision. ### Benchmark results 11 different float workloads, measured with Benchee on Apple M2. Reported times are the median μs of one iteration over all 11 workloads. precision | current | cox (this PR) | native_double | cox vs current | native_double vs current | native_double vs cox | |---|---|---|---|---|---|---| | 0 | 0.25 μs | 0.25 μs | 0.25 μs | 1.1× | 1.1× | ~tied | | 1 | 2.42 μs | 0.33 μs | 0.33 μs | **7.1×** | 7.0× | ~tied | | 2 | 2.67 μs | 0.63 μs | 0.33 μs | **3.8×** | 7.7× | 2.0× | | 3 | 4.29 μs | 0.63 μs | 0.33 μs | **5.8×** | 11.9× | 2.1× | | 5 | 5.50 μs | 0.67 μs | 0.33 μs | **6.8×** | 15.6× | 2.3× | | 8 | 7.21 μs | 0.96 μs | 0.38 μs | **7.5×** | 19.5× | 2.6× | | 12 | 7.75 μs | 0.96 μs | 0.38 μs | **8.2×** | 20.3× | 2.5× | | 15 | 8.58 μs | 1.58 μs | 0.38 μs | **5.3×** | 22.8× | 4.3× | Median μs per iteration over the 11-float mixed workload (per-call median ≈ value / 11). - **`current`** is the bignum-based implementation we're replacing - **`cox`** is this PR - **`native_double`** is included as a "what does this look like in pure native float arithmetic" reference (`:erlang.round(f * pow) / pow`) - it's 2-4× faster than Cox but incorrect on tie inputs because `5.5675 * 1000` rounds to `5567.5` in IEEE due to double-rounding (multiplication round + integer round), then bumps to `5568`. It's why we can't simply use the obvious approach. Cox preserves exact semantics by working on the float's true binary representation. #### Floats benchmarked ``` floats = [ {"small_pi", 3.141592653589793}, # typical small value {"common_money", 5.5675}, # tie-boundary case {"large_e10", 1.2345e10}, # large magnitude {"tiny_e_neg8", 1.2345e-8}, # very small magnitude {"near_zero", 0.00001}, # close to zero {"negative", -123.456789}, # negative sign {"integer_valued", 12.0}, # exactly representable integer {"tie_half", 12.5}, # half-tie {"tie_5675", 5.5675}, # documented tie example {"large_e30", 1.234e30}, # very large {"max_finite_ish", 1.7e308} # near max double ] ```
1 parent f4e1b34 commit 11f7d8f

2 files changed

Lines changed: 240 additions & 101 deletions

File tree

lib/elixir/lib/float.ex

Lines changed: 133 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -344,15 +344,12 @@ defmodule Float do
344344
345345
"""
346346
@spec round(float, precision_range) :: float
347-
# This implementation is slow since it relies on big integers.
348-
# Faster implementations are available on more recent papers
349-
# and could be implemented in the future.
350347
def round(float, precision \\ 0)
351348

352349
def round(float, 0) when float == 0.0, do: float
353350

354351
def round(float, 0) when is_float(float) do
355-
case float |> :erlang.round() |> :erlang.float() do
352+
case :erlang.round(float) * 1.0 do
356353
zero when zero == 0.0 and float < 0.0 -> -0.0
357354
rounded -> rounded
358355
end
@@ -366,135 +363,170 @@ defmodule Float do
366363
raise ArgumentError, invalid_precision_message(precision)
367364
end
368365

366+
# Decimal-place rounding via exact rational scaling. This is the bignum
367+
# core used by reference implementations like David M. Gay's "Correctly
368+
# Rounded Binary-Decimal and Decimal-Binary Conversions" (cited in the
369+
# @doc above), Python's round(), and Java's BigDecimal.setScale.
370+
#
371+
# 1. Decompose float exactly: |float| = mantissa / 2^shift.
372+
# 2. Scale exactly: |float| * 10^precision = mantissa * 10^precision / 2^shift.
373+
# Because precision is bounded to 0..15, the product fits in ~103 bits
374+
# (53-bit mantissa + ~50-bit power of ten) and BEAM bignums handle it
375+
# directly without approximation.
376+
# 3. Round the exact rational to an integer per the requested mode
377+
# (half_up / floor / ceil) using quotient and remainder.
378+
# 4. Emit the float closest to rounded_int / 10^precision:
379+
# - fast path: when rounded_int < 2^53, both operands are exactly
380+
# representable as floats and IEEE division is correctly rounded.
381+
# - slow path: bignum alignment + manual mantissa extraction with
382+
# round-to-nearest-even for the trailing bit.
383+
#
384+
# The integer-rounding decision (step 3) and the binary-emission decision
385+
# (step 4) are deliberately independent: step 3 picks the exact rational
386+
# the user asked for, step 4 picks the closest float to that rational.
387+
# Conflating them is the classic source of double-rounding bugs.
388+
#
389+
# Faster algorithms exist (Cox 2026's table-based uscale; Ryū / Schubfach
390+
# for round-trip printing) but target different problems or assume
391+
# fixed-width machine arithmetic that BEAM doesn't expose efficiently.
392+
# At precision <= 15, the exact path is small, easy to audit, and fast
393+
# enough that a more complex algorithm has not been justified by benchmarks.
369394
defp round(num, _precision, _rounding) when is_float(num) and num == 0.0, do: num
370395

371-
defp round(float, precision, rounding) do
372-
<<sign::1, exp::11, significant::52-bitstring>> = <<float::float>>
373-
{num, count} = decompose(significant, 1)
374-
count = count - exp + 1023
396+
defp round(float, precision, mode) do
397+
<<sign::1, exp::11, mantissa::52>> = <<float::float>>
375398

376399
cond do
377-
# Precision beyond 15 digits
378-
count >= 104 ->
379-
case rounding do
380-
:ceil when sign === 0 -> 1 / power_of_10(precision)
381-
:floor when sign === 1 -> -1 / power_of_10(precision)
382-
:ceil when sign === 1 -> -0.0
383-
:half_up when sign === 1 -> -0.0
384-
_ -> 0.0
385-
end
400+
# Subnormal — tiny but non-zero; treat per-mode (ceil(+) and floor(-) bump
401+
# to 10^-precision; everything else rounds to signed zero).
402+
exp == 0 ->
403+
tiny_round(sign, precision, mode)
386404

387-
# We are asking more precision than we have
388-
count <= precision ->
405+
# |float| >= 2^52 — has no fractional bits, return unchanged.
406+
exp - 1075 >= 0 ->
389407
float
390408

391409
true ->
392-
# Difference in precision between float and asked precision
393-
# We subtract 1 because we need to calculate the remainder too
394-
diff = count - precision - 1
395-
396-
# Get up to latest so we calculate the remainder
397-
power_of_10 = power_of_10(diff)
398-
399-
# Convert the numerand to decimal base
400-
num = num * power_of_5(count)
401-
402-
# Move to the given precision - 1
403-
num = div(num, power_of_10)
404-
div = div(num, 10)
405-
num = rounding(rounding, sign, num, div)
406-
407-
# Convert back to float without loss
408-
# https://www.exploringbinary.com/correct-decimal-to-floating-point-using-big-integers/
409-
den = power_of_10(precision)
410-
boundary = den <<< 52
411-
412-
cond do
413-
num == 0 and sign == 1 ->
414-
-0.0
415-
416-
num == 0 ->
417-
0.0
418-
419-
num >= boundary ->
420-
{den, exp} = scale_down(num, boundary, 52)
421-
decimal_to_float(sign, num, den, exp)
422-
423-
true ->
424-
{num, exp} = scale_up(num, boundary, 52)
425-
decimal_to_float(sign, num, den, exp)
426-
end
410+
mantissa = @power_of_2_to_52 ||| mantissa
411+
shift = 1075 - exp
412+
do_round(sign, mantissa, shift, precision, mode)
427413
end
428414
end
429415

430-
defp decompose(significant, initial) do
431-
decompose(significant, 1, 0, initial)
432-
end
433-
434-
defp decompose(<<1::1, bits::bitstring>>, count, last_count, acc) do
435-
decompose(bits, count + 1, count, (acc <<< (count - last_count)) + 1)
416+
# |float * 10^precision| < 0.5 — integer round is 0; ceil/floor still bump per sign.
417+
defp do_round(sign, _mantissa, shift, precision, mode) when shift >= 104 do
418+
tiny_round(sign, precision, mode)
436419
end
437420

438-
defp decompose(<<0::1, bits::bitstring>>, count, last_count, acc) do
439-
decompose(bits, count + 1, last_count, acc)
440-
end
441-
442-
defp decompose(<<>>, _count, last_count, acc) do
443-
{acc, last_count}
444-
end
421+
defp do_round(sign, mantissa, shift, precision, mode) do
422+
power = power_of_10(precision)
423+
product = mantissa * power
424+
half = 1 <<< (shift - 1)
425+
quotient = product >>> shift
426+
remainder = product - (quotient <<< shift)
427+
rounded_int = round_step(mode, sign, quotient, remainder, half)
445428

446-
defp scale_up(num, boundary, exp) when num >= boundary, do: {num, exp}
447-
defp scale_up(num, boundary, exp), do: scale_up(num <<< 1, boundary, exp - 1)
429+
cond do
430+
rounded_int == 0 ->
431+
signed_zero(sign)
448432

449-
defp scale_down(num, den, exp) do
450-
new_den = den <<< 1
433+
rounded_int < @power_of_2_to_52 <<< 1 ->
434+
# Both rounded_int and power fit in 53 bits, so IEEE float division
435+
# is correctly rounded.
436+
result = rounded_int / power
437+
if sign == 1, do: -result, else: result
451438

452-
if num < new_den do
453-
{den >>> 52, exp}
454-
else
455-
scale_down(num, new_den, exp + 1)
439+
true ->
440+
bignum_to_float(sign, rounded_int, power)
456441
end
457442
end
458443

459-
defp decimal_to_float(sign, num, den, exp) do
460-
quo = div(num, den)
461-
rem = num - quo * den
444+
defp round_step(:half_up, _sign, quotient, remainder, half) do
445+
if remainder >= half, do: quotient + 1, else: quotient
446+
end
462447

463-
tmp =
464-
case den >>> 1 do
465-
den when rem > den -> quo + 1
466-
den when rem < den -> quo
467-
_ when (quo &&& 1) === 1 -> quo + 1
468-
_ -> quo
448+
defp round_step(:floor, 0, quotient, _remainder, _half), do: quotient
449+
defp round_step(:floor, 1, quotient, remainder, _half) when remainder > 0, do: quotient + 1
450+
defp round_step(:floor, 1, quotient, _remainder, _half), do: quotient
451+
452+
defp round_step(:ceil, 0, quotient, remainder, _half) when remainder > 0, do: quotient + 1
453+
defp round_step(:ceil, 0, quotient, _remainder, _half), do: quotient
454+
defp round_step(:ceil, 1, quotient, _remainder, _half), do: quotient
455+
456+
defp signed_zero(0), do: 0.0
457+
defp signed_zero(1), do: -0.0
458+
459+
# Result of rounding a non-zero float whose |float * 10^precision| < 0.5.
460+
# ceil(+) → +10^-precision, floor(-) → -10^-precision, others → signed 0.
461+
defp tiny_round(0, precision, :ceil), do: 1.0 / power_of_10(precision)
462+
defp tiny_round(1, precision, :floor), do: -1.0 / power_of_10(precision)
463+
defp tiny_round(sign, _precision, _mode), do: signed_zero(sign)
464+
465+
# Slow path: emit float closest to `sign * rounded_int / power` when
466+
# rounded_int >= 2^53. The binary emission step is always IEEE
467+
# round-to-nearest-even, regardless of the integer-rounding mode.
468+
defp bignum_to_float(sign, rounded_int, power) do
469+
shift_adjust = bit_length(rounded_int) - bit_length(power) - 53
470+
{numerator, denominator, exp} = align(rounded_int, power, shift_adjust)
471+
472+
quotient = div(numerator, denominator)
473+
remainder = numerator - quotient * denominator
474+
half = denominator >>> 1
475+
476+
mantissa =
477+
cond do
478+
remainder > half -> quotient + 1
479+
remainder < half -> quotient
480+
(quotient &&& 1) === 1 -> quotient + 1
481+
true -> quotient
469482
end
470483

471-
tmp = tmp - @power_of_2_to_52
472-
<<tmp::float>> = <<sign::1, exp + 1023::11, tmp::52>>
473-
tmp
484+
# Carry-bit normalization: `mantissa` lives in [2^52, 2^53]. The upper
485+
# bound `2^53` is reachable when `align/3` returns an upper-bound quotient
486+
# or when rounding carries. Rebalance into the canonical [2^52, 2^53)
487+
# range so the 52-bit packing below doesn't silently truncate.
488+
{mantissa, exp} =
489+
if mantissa == @power_of_2_to_52 <<< 1,
490+
do: {@power_of_2_to_52, exp + 1},
491+
else: {mantissa, exp}
492+
493+
<<result::float>> = <<sign::1, exp + 1023::11, mantissa - @power_of_2_to_52::52>>
494+
result
474495
end
475496

476-
defp rounding(:floor, 1, _num, div), do: div + 1
477-
defp rounding(:ceil, 0, _num, div), do: div + 1
497+
# Pick (numerator, denominator, exp) so that numerator/denominator ∈ [2^52, 2^53)
498+
# and the resulting float = numerator/denominator * 2^(exp-52).
499+
defp align(rounded_int, power, shift_adjust) when shift_adjust >= 0 do
500+
new_power = power <<< shift_adjust
478501

479-
defp rounding(:half_up, _sign, num, div) do
480-
case rem(num, 10) do
481-
rem when rem < 5 -> div
482-
rem when rem >= 5 -> div + 1
502+
if rounded_int < new_power <<< 53,
503+
do: {rounded_int, new_power, 52 + shift_adjust},
504+
else: {rounded_int, new_power <<< 1, 53 + shift_adjust}
505+
end
506+
507+
defp align(rounded_int, power, shift_adjust) do
508+
shifted = rounded_int <<< -shift_adjust
509+
510+
cond do
511+
shifted >= power <<< 53 -> {shifted, power <<< 1, 53 + shift_adjust}
512+
shifted >= power <<< 52 -> {shifted, power, 52 + shift_adjust}
513+
true -> {shifted <<< 1, power, 51 + shift_adjust}
483514
end
484515
end
485516

486-
defp rounding(_, _, _, div), do: div
517+
defp bit_length(0), do: 0
518+
defp bit_length(integer) when integer > 0, do: bit_length(integer, 0)
519+
defp bit_length(integer, acc) when integer >= 1 <<< 64, do: bit_length(integer >>> 64, acc + 64)
520+
defp bit_length(integer, acc) when integer >= 1 <<< 16, do: bit_length(integer >>> 16, acc + 16)
521+
defp bit_length(integer, acc) when integer >= 1 <<< 4, do: bit_length(integer >>> 4, acc + 4)
522+
defp bit_length(integer, acc) when integer >= 1, do: bit_length(integer >>> 1, acc + 1)
523+
defp bit_length(_integer, acc), do: acc
487524

488-
Enum.reduce(0..104, 1, fn x, acc ->
489-
defp power_of_10(unquote(x)), do: unquote(acc)
525+
Enum.reduce(0..15, 1, fn exponent, acc ->
526+
defp power_of_10(unquote(exponent)), do: unquote(acc)
490527
acc * 10
491528
end)
492529

493-
Enum.reduce(0..104, 1, fn x, acc ->
494-
defp power_of_5(unquote(x)), do: unquote(acc)
495-
acc * 5
496-
end)
497-
498530
@doc """
499531
Returns a pair of integers whose ratio is exactly equal
500532
to the original float and with a positive denominator.

0 commit comments

Comments
 (0)