@@ -344,15 +344,12 @@ defmodule Float do
344344
345345 """
346346 @ spec round ( float , precision_range ) :: float
347- # This implementation is slow since it relies on big integers.
348- # Faster implementations are available on more recent papers
349- # and could be implemented in the future.
350347 def round ( float , precision \\ 0 )
351348
352349 def round ( float , 0 ) when float == 0.0 , do: float
353350
354351 def round ( float , 0 ) when is_float ( float ) do
355- case float |> :erlang . round ( ) |> :erlang . float ( ) do
352+ case :erlang . round ( float ) * 1.0 do
356353 zero when zero == 0.0 and float < 0.0 -> - 0.0
357354 rounded -> rounded
358355 end
@@ -366,135 +363,170 @@ defmodule Float do
366363 raise ArgumentError , invalid_precision_message ( precision )
367364 end
368365
366+ # Decimal-place rounding via exact rational scaling. This is the bignum
367+ # core used by reference implementations like David M. Gay's "Correctly
368+ # Rounded Binary-Decimal and Decimal-Binary Conversions" (cited in the
369+ # @doc above), Python's round(), and Java's BigDecimal.setScale.
370+ #
371+ # 1. Decompose float exactly: |float| = mantissa / 2^shift.
372+ # 2. Scale exactly: |float| * 10^precision = mantissa * 10^precision / 2^shift.
373+ # Because precision is bounded to 0..15, the product fits in ~103 bits
374+ # (53-bit mantissa + ~50-bit power of ten) and BEAM bignums handle it
375+ # directly without approximation.
376+ # 3. Round the exact rational to an integer per the requested mode
377+ # (half_up / floor / ceil) using quotient and remainder.
378+ # 4. Emit the float closest to rounded_int / 10^precision:
379+ # - fast path: when rounded_int < 2^53, both operands are exactly
380+ # representable as floats and IEEE division is correctly rounded.
381+ # - slow path: bignum alignment + manual mantissa extraction with
382+ # round-to-nearest-even for the trailing bit.
383+ #
384+ # The integer-rounding decision (step 3) and the binary-emission decision
385+ # (step 4) are deliberately independent: step 3 picks the exact rational
386+ # the user asked for, step 4 picks the closest float to that rational.
387+ # Conflating them is the classic source of double-rounding bugs.
388+ #
389+ # Faster algorithms exist (Cox 2026's table-based uscale; Ryū / Schubfach
390+ # for round-trip printing) but target different problems or assume
391+ # fixed-width machine arithmetic that BEAM doesn't expose efficiently.
392+ # At precision <= 15, the exact path is small, easy to audit, and fast
393+ # enough that a more complex algorithm has not been justified by benchmarks.
369394 defp round ( num , _precision , _rounding ) when is_float ( num ) and num == 0.0 , do: num
370395
371- defp round ( float , precision , rounding ) do
372- << sign :: 1 , exp :: 11 , significant :: 52 - bitstring >> = << float :: float >>
373- { num , count } = decompose ( significant , 1 )
374- count = count - exp + 1023
396+ defp round ( float , precision , mode ) do
397+ << sign :: 1 , exp :: 11 , mantissa :: 52 >> = << float :: float >>
375398
376399 cond do
377- # Precision beyond 15 digits
378- count >= 104 ->
379- case rounding do
380- :ceil when sign === 0 -> 1 / power_of_10 ( precision )
381- :floor when sign === 1 -> - 1 / power_of_10 ( precision )
382- :ceil when sign === 1 -> - 0.0
383- :half_up when sign === 1 -> - 0.0
384- _ -> 0.0
385- end
400+ # Subnormal — tiny but non-zero; treat per-mode (ceil(+) and floor(-) bump
401+ # to 10^-precision; everything else rounds to signed zero).
402+ exp == 0 ->
403+ tiny_round ( sign , precision , mode )
386404
387- # We are asking more precision than we have
388- count <= precision ->
405+ # |float| >= 2^52 — has no fractional bits, return unchanged.
406+ exp - 1075 >= 0 ->
389407 float
390408
391409 true ->
392- # Difference in precision between float and asked precision
393- # We subtract 1 because we need to calculate the remainder too
394- diff = count - precision - 1
395-
396- # Get up to latest so we calculate the remainder
397- power_of_10 = power_of_10 ( diff )
398-
399- # Convert the numerand to decimal base
400- num = num * power_of_5 ( count )
401-
402- # Move to the given precision - 1
403- num = div ( num , power_of_10 )
404- div = div ( num , 10 )
405- num = rounding ( rounding , sign , num , div )
406-
407- # Convert back to float without loss
408- # https://www.exploringbinary.com/correct-decimal-to-floating-point-using-big-integers/
409- den = power_of_10 ( precision )
410- boundary = den <<< 52
411-
412- cond do
413- num == 0 and sign == 1 ->
414- - 0.0
415-
416- num == 0 ->
417- 0.0
418-
419- num >= boundary ->
420- { den , exp } = scale_down ( num , boundary , 52 )
421- decimal_to_float ( sign , num , den , exp )
422-
423- true ->
424- { num , exp } = scale_up ( num , boundary , 52 )
425- decimal_to_float ( sign , num , den , exp )
426- end
410+ mantissa = @ power_of_2_to_52 ||| mantissa
411+ shift = 1075 - exp
412+ do_round ( sign , mantissa , shift , precision , mode )
427413 end
428414 end
429415
430- defp decompose ( significant , initial ) do
431- decompose ( significant , 1 , 0 , initial )
432- end
433-
434- defp decompose ( << 1 :: 1 , bits :: bitstring >> , count , last_count , acc ) do
435- decompose ( bits , count + 1 , count , ( acc <<< ( count - last_count ) ) + 1 )
416+ # |float * 10^precision| < 0.5 — integer round is 0; ceil/floor still bump per sign.
417+ defp do_round ( sign , _mantissa , shift , precision , mode ) when shift >= 104 do
418+ tiny_round ( sign , precision , mode )
436419 end
437420
438- defp decompose ( << 0 :: 1 , bits :: bitstring >> , count , last_count , acc ) do
439- decompose ( bits , count + 1 , last_count , acc )
440- end
441-
442- defp decompose ( << >> , _count , last_count , acc ) do
443- { acc , last_count }
444- end
421+ defp do_round ( sign , mantissa , shift , precision , mode ) do
422+ power = power_of_10 ( precision )
423+ product = mantissa * power
424+ half = 1 <<< ( shift - 1 )
425+ quotient = product >>> shift
426+ remainder = product - ( quotient <<< shift )
427+ rounded_int = round_step ( mode , sign , quotient , remainder , half )
445428
446- defp scale_up ( num , boundary , exp ) when num >= boundary , do: { num , exp }
447- defp scale_up ( num , boundary , exp ) , do: scale_up ( num <<< 1 , boundary , exp - 1 )
429+ cond do
430+ rounded_int == 0 ->
431+ signed_zero ( sign )
448432
449- defp scale_down ( num , den , exp ) do
450- new_den = den <<< 1
433+ rounded_int < @ power_of_2_to_52 <<< 1 ->
434+ # Both rounded_int and power fit in 53 bits, so IEEE float division
435+ # is correctly rounded.
436+ result = rounded_int / power
437+ if sign == 1 , do: - result , else: result
451438
452- if num < new_den do
453- { den >>> 52 , exp }
454- else
455- scale_down ( num , new_den , exp + 1 )
439+ true ->
440+ bignum_to_float ( sign , rounded_int , power )
456441 end
457442 end
458443
459- defp decimal_to_float ( sign , num , den , exp ) do
460- quo = div ( num , den )
461- rem = num - quo * den
444+ defp round_step ( :half_up , _sign , quotient , remainder , half ) do
445+ if remainder >= half , do: quotient + 1 , else: quotient
446+ end
462447
463- tmp =
464- case den >>> 1 do
465- den when rem > den -> quo + 1
466- den when rem < den -> quo
467- _ when ( quo &&& 1 ) === 1 -> quo + 1
468- _ -> quo
448+ defp round_step ( :floor , 0 , quotient , _remainder , _half ) , do: quotient
449+ defp round_step ( :floor , 1 , quotient , remainder , _half ) when remainder > 0 , do: quotient + 1
450+ defp round_step ( :floor , 1 , quotient , _remainder , _half ) , do: quotient
451+
452+ defp round_step ( :ceil , 0 , quotient , remainder , _half ) when remainder > 0 , do: quotient + 1
453+ defp round_step ( :ceil , 0 , quotient , _remainder , _half ) , do: quotient
454+ defp round_step ( :ceil , 1 , quotient , _remainder , _half ) , do: quotient
455+
456+ defp signed_zero ( 0 ) , do: 0.0
457+ defp signed_zero ( 1 ) , do: - 0.0
458+
459+ # Result of rounding a non-zero float whose |float * 10^precision| < 0.5.
460+ # ceil(+) → +10^-precision, floor(-) → -10^-precision, others → signed 0.
461+ defp tiny_round ( 0 , precision , :ceil ) , do: 1.0 / power_of_10 ( precision )
462+ defp tiny_round ( 1 , precision , :floor ) , do: - 1.0 / power_of_10 ( precision )
463+ defp tiny_round ( sign , _precision , _mode ) , do: signed_zero ( sign )
464+
465+ # Slow path: emit float closest to `sign * rounded_int / power` when
466+ # rounded_int >= 2^53. The binary emission step is always IEEE
467+ # round-to-nearest-even, regardless of the integer-rounding mode.
468+ defp bignum_to_float ( sign , rounded_int , power ) do
469+ shift_adjust = bit_length ( rounded_int ) - bit_length ( power ) - 53
470+ { numerator , denominator , exp } = align ( rounded_int , power , shift_adjust )
471+
472+ quotient = div ( numerator , denominator )
473+ remainder = numerator - quotient * denominator
474+ half = denominator >>> 1
475+
476+ mantissa =
477+ cond do
478+ remainder > half -> quotient + 1
479+ remainder < half -> quotient
480+ ( quotient &&& 1 ) === 1 -> quotient + 1
481+ true -> quotient
469482 end
470483
471- tmp = tmp - @ power_of_2_to_52
472- << tmp :: float >> = << sign :: 1 , exp + 1023 :: 11 , tmp :: 52 >>
473- tmp
484+ # Carry-bit normalization: `mantissa` lives in [2^52, 2^53]. The upper
485+ # bound `2^53` is reachable when `align/3` returns an upper-bound quotient
486+ # or when rounding carries. Rebalance into the canonical [2^52, 2^53)
487+ # range so the 52-bit packing below doesn't silently truncate.
488+ { mantissa , exp } =
489+ if mantissa == @ power_of_2_to_52 <<< 1 ,
490+ do: { @ power_of_2_to_52 , exp + 1 } ,
491+ else: { mantissa , exp }
492+
493+ << result :: float >> = << sign :: 1 , exp + 1023 :: 11 , mantissa - @ power_of_2_to_52 :: 52 >>
494+ result
474495 end
475496
476- defp rounding ( :floor , 1 , _num , div ) , do: div + 1
477- defp rounding ( :ceil , 0 , _num , div ) , do: div + 1
497+ # Pick (numerator, denominator, exp) so that numerator/denominator ∈ [2^52, 2^53)
498+ # and the resulting float = numerator/denominator * 2^(exp-52).
499+ defp align ( rounded_int , power , shift_adjust ) when shift_adjust >= 0 do
500+ new_power = power <<< shift_adjust
478501
479- defp rounding ( :half_up , _sign , num , div ) do
480- case rem ( num , 10 ) do
481- rem when rem < 5 -> div
482- rem when rem >= 5 -> div + 1
502+ if rounded_int < new_power <<< 53 ,
503+ do: { rounded_int , new_power , 52 + shift_adjust } ,
504+ else: { rounded_int , new_power <<< 1 , 53 + shift_adjust }
505+ end
506+
507+ defp align ( rounded_int , power , shift_adjust ) do
508+ shifted = rounded_int <<< - shift_adjust
509+
510+ cond do
511+ shifted >= power <<< 53 -> { shifted , power <<< 1 , 53 + shift_adjust }
512+ shifted >= power <<< 52 -> { shifted , power , 52 + shift_adjust }
513+ true -> { shifted <<< 1 , power , 51 + shift_adjust }
483514 end
484515 end
485516
486- defp rounding ( _ , _ , _ , div ) , do: div
517+ defp bit_length ( 0 ) , do: 0
518+ defp bit_length ( integer ) when integer > 0 , do: bit_length ( integer , 0 )
519+ defp bit_length ( integer , acc ) when integer >= 1 <<< 64 , do: bit_length ( integer >>> 64 , acc + 64 )
520+ defp bit_length ( integer , acc ) when integer >= 1 <<< 16 , do: bit_length ( integer >>> 16 , acc + 16 )
521+ defp bit_length ( integer , acc ) when integer >= 1 <<< 4 , do: bit_length ( integer >>> 4 , acc + 4 )
522+ defp bit_length ( integer , acc ) when integer >= 1 , do: bit_length ( integer >>> 1 , acc + 1 )
523+ defp bit_length ( _integer , acc ) , do: acc
487524
488- Enum . reduce ( 0 .. 104 , 1 , fn x , acc ->
489- defp power_of_10 ( unquote ( x ) ) , do: unquote ( acc )
525+ Enum . reduce ( 0 .. 15 , 1 , fn exponent , acc ->
526+ defp power_of_10 ( unquote ( exponent ) ) , do: unquote ( acc )
490527 acc * 10
491528 end )
492529
493- Enum . reduce ( 0 .. 104 , 1 , fn x , acc ->
494- defp power_of_5 ( unquote ( x ) ) , do: unquote ( acc )
495- acc * 5
496- end )
497-
498530 @ doc """
499531 Returns a pair of integers whose ratio is exactly equal
500532 to the original float and with a positive denominator.
0 commit comments