improve bignum parsing

9il · 9il · commit 2a0fd33ef8e1 · 2022-06-01T15:05:06.000+04:00
diff --git a/dub.sdl b/dub.sdl
@@ -9,9 +9,12 @@ dependency "mir-core" version=">=1.1.106"
 
 // versions "TeslAlgoM"
 
+
 buildType "unittest" {
     buildOptions "unittests" "debugMode" "debugInfo"
-    versions "mir_bignum_test" "mir_bignum_test_llv" // "mir_ndslice_test" "mir_test"
+    versions "mir_bignum_test" "mir_bignum_test_llv"
+    versions "mir_ndslice_test"
+    versions "mir_test"
     dflags "-lowmem"
 }
 buildType "unittest-dip1008" {
diff --git a/source/mir/bignum/decimal.d b/source/mir/bignum/decimal.d
@@ -213,11 +213,11 @@ struct Decimal(uint size64)
                 }
             }
 
-            uint d = str[0] - '0';
+            ulong d = str[0] - C('0');
             str = str[1 .. $];
             exponent = 0;
 
-            ulong v;
+            ulong v, multplier = void;
 
             if (_expect(d >= 10, false))
             {
@@ -228,7 +228,7 @@ struct Decimal(uint size64)
                         if (str.length == 0)
                             return false;
                         key = DecimalExponentKey.dot;
-                        d = str[0] - '0';
+                        d = str[0] - C('0');
                         str = str[1 .. $];
                         if (_expect(d < 10, true))
                             goto FI;
@@ -247,7 +247,7 @@ struct Decimal(uint size64)
                 {
                     if (str.length == 0)
                         goto R;
-                    d = str[0] - '0';
+                    d = str[0] - C('0');
                     str = str[1 .. $];
                     if (d < 10)
                         return false;
@@ -260,15 +260,15 @@ struct Decimal(uint size64)
         S:
             if (str.length == 0)
                 goto R;
-            d = str[0] - '0';
+            d = str[0] - C('0');
             str = str[1 .. $];
 
             if (d < 10)
             {
         F0:
                 import mir.checkedint: mulu, addu;
                 bool overflow;
-                v = mulu(v, cast(uint)10, overflow);
+                v = mulu(v, 10u, overflow);
                 if (overflow)
                     return false;
                 v = addu(v, d, overflow);
@@ -283,7 +283,7 @@ struct Decimal(uint size64)
                 {
                     if (str.length == 0)
                         return false;
-                    d = str[0] - '0';
+                    d = str[0] - C('0');
                     str = str[1 .. $];
                     if (_expect(d < 10, true))
                         goto F0;
@@ -314,51 +314,76 @@ struct Decimal(uint size64)
                     import mir.bignum.internal.parse: isMadeOfEightDigits, parseEightDigits;
                     if (str.length >= 8 && isMadeOfEightDigits(str[0 .. 8]))
                     {
-                        ulong multplier = 100000000;
-                        ulong value = parseEightDigits(str[0 .. 8]);
+                        multplier = 100000000;
+                        d = parseEightDigits(str[0 .. 8]);
                         str = str[8 .. $];
                         exponentShift -= 8;
-                        if (str.length >= 8 && isMadeOfEightDigits(str[0 .. 8]))
+                        if (str.length >= 7)
                         {
-                            multplier = 100000000 * 100000000;
-                            value *= 100000000;
-                            value += parseEightDigits(str[0 .. 8]);
-                            str = str[8 .. $];
-                            exponentShift -= 8;
-                        }
-
-                        {
-                            import mir.checkedint: mulu, addu;
-                            bool overflow;
-                            v = mulu(v, multplier, overflow);
-                            if (overflow)
-                                return false;
-                            v = addu(v, value, overflow);
-                            if (overflow)
-                                return false;
+                            if (isMadeOfEightDigits((str.ptr - 1)[0 .. 8]))
+                            {
+                                multplier = 100000000 * 10000000;
+                                d -= str.ptr[-1] - '0';
+                                d *= 10000000;
+                                d += parseEightDigits((str.ptr - 1)[0 .. 8]);
+                                str = str[7 .. $];
+                                exponentShift -= 7;
+                                if (str.length)
+                                {
+                                    auto md = str[0] - C('0');
+                                    if (md < 10)
+                                    {
+                                        d *= 10;
+                                        multplier = 100000000 * 100000000;
+                                        d += md;
+                                        str = str[1 .. $];
+                                    }
+                                }
+                            }
+                            else
+                            {
+                            TrySix:
+                                if (isMadeOfEightDigits((str.ptr - 2)[0 .. 8]))
+                                {
+                                    multplier = 100000000 * 1000000;
+                                    d -= str.ptr[-1] - '0';
+                                    d -= (str.ptr[-2] - '0') * 10;
+                                    d *= 1000000;
+                                    d += parseEightDigits((str.ptr - 2)[0 .. 8]);
+                                    str = str[6 .. $];
+                                    exponentShift -= 6;
+                                }
+                            }
+      
                         }
+                        else
+                        if (str.length == 6)
+                            goto TrySix;
+                        goto FIL;
                     }
                 }
 
-                d = str[0] - '0';
+                d = str[0] - C('0');
                 str = str[1 .. $];
                 if (_expect(d >= 10, false))
                     goto DOB;
             FI:
+                exponentShift--;
+                multplier = 10;
+            FIL:
                 {
                     import mir.checkedint: mulu, addu;
                     bool overflow;
-                    v = mulu(v, cast(uint)10, overflow);
+                    v = mulu(v, multplier, overflow);
                     if (overflow)
                         return false;
                     v = addu(v, d, overflow);
                     if (overflow)
                         return false;
                 }
-                exponentShift--;
                 if (str.length == 0)
                     goto E;
-                d = str[0] - '0';
+                d = str[0] - C('0');
                 str = str[1 .. $];
                 if (d < 10)
                     goto FI;
@@ -369,7 +394,7 @@ struct Decimal(uint size64)
                     {
                         if (str.length == 0)
                             return false;
-                        d = str[0] - '0';
+                        d = str[0] - C('0');
                         str = str[1 .. $];
                         if (_expect(d < 10, true))
                             goto FI;
@@ -817,42 +842,43 @@ unittest
 
     // Check precise percentate parsing
     assert(decimal.fromStringImpl("71.7", key, -2));
-    assert(key == DecimalExponentKey.dot);
+    key.should == DecimalExponentKey.dot;
     // The result is exact value instead of 0.7170000000000001 = 71.7 / 100
-    assert(cast(double) decimal == 0.717);
+    (cast(double) decimal).should == 0.717;
 
     assert(decimal.fromStringImpl("+0.334e-5"w, key));
-    assert(key == DecimalExponentKey.e);
-    assert(cast(double) decimal == 0.334e-5);
+    key.should == DecimalExponentKey.e;
+    (cast(double) decimal).should == 0.334e-5;
 
     assert(decimal.fromStringImpl("100_000_000"w, key));
-    assert(key == DecimalExponentKey.none);
-    assert(cast(double) decimal == 1e8);
+    key.should == DecimalExponentKey.none;
+    (cast(double) decimal).should == 1e8;
 
     assert(decimal.fromStringImpl("-334D-5"d, key));
-    assert(key == DecimalExponentKey.D);
-    assert(cast(double) decimal == -334e-5);
+    key.should == DecimalExponentKey.D;
+    (cast(double) decimal).should == -334e-5;
 
     assert(decimal.fromStringImpl("2482734692817364218734682973648217364981273648923423", key));
-    assert(key == DecimalExponentKey.none);
-    assert(cast(double) decimal == 2482734692817364218734682973648217364981273648923423.0);
+    key.should == DecimalExponentKey.none;
+    (cast(double) decimal).should == 2482734692817364218734682973648217364981273648923423.0;
 
     assert(decimal.fromStringImpl(".023", key));
-    assert(key == DecimalExponentKey.dot);
-    assert(cast(double) decimal == .023);
+    key.should == DecimalExponentKey.dot;
+    (cast(double) decimal).should == .023;
 
     assert(decimal.fromStringImpl("0E100", key));
-    assert(key == DecimalExponentKey.E);
-    assert(cast(double) decimal == 0);
+    key.should == DecimalExponentKey.E;
+    (cast(double) decimal).should == 0;
 
     foreach (str; ["-nan", "-NaN", "-NAN"])
     {
         assert(decimal.fromStringImpl(str, key));
         assert(decimal.coefficient.length > 0);
         assert(decimal.exponent == decimal.exponent.max);
         assert(decimal.coefficient.sign);
-        assert(key == DecimalExponentKey.nan);
-        assert(cast(double) decimal != cast(double) decimal);
+        key.should == DecimalExponentKey.nan;
+        auto nan = cast(double) decimal;
+        (cast(double) decimal).should == double.nan;
     }
 
     foreach (str; ["inf", "Inf", "INF"])
@@ -861,7 +887,7 @@ unittest
         assert(decimal.coefficient.length == 0);
         assert(decimal.exponent == decimal.exponent.max);
         assert(key == DecimalExponentKey.infinity);
-        assert(cast(double) decimal == double.infinity);
+        (cast(double) decimal).should == double.infinity;
     }
 
     assert(decimal.fromStringImpl("-inf", key));
diff --git a/source/mir/bignum/internal/dec2float.d b/source/mir/bignum/internal/dec2float.d
@@ -258,9 +258,6 @@ private T algorithmM(T)(scope const size_t[] coefficients, long exponent)
 {
     pragma(inline, false);
 
-    // import mir.stdio;
-    // debug dump("algorithmM", coefficients, exponent);
-
     import mir.bitop: ctlz;
     import mir.bignum.fp: Fp;
     import mir.bignum.integer: BigInt;
diff --git a/source/mir/bignum/internal/parse.d b/source/mir/bignum/internal/parse.d
@@ -1,12 +1,18 @@
 module mir.bignum.internal.parse;
 
+/+
+https://arxiv.org/abs/2101.11408
+Number Parsing at a Gigabyte per Second
+Daniel Lemire
++/
 bool isMadeOfEightDigits()(ref const char[8] chars)
 {
     pragma(inline, true);
     ulong val = (cast(ulong[1]) cast(ubyte[8]) chars)[0];
     return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080));
 }
 
+// ditto
 uint parseEightDigits()(ref const char[8] chars)
 {
     pragma(inline, true);
diff --git a/source/mir/parse.d b/source/mir/parse.d
diff --git a/source/mir/test.d b/source/mir/test.d

Original file line number	Diff line number	Diff line change
`@@ -258,9 +258,6 @@ private T algorithmM(T)(scope const size_t[] coefficients, long exponent)`
`258`	`258`	`{`
`259`	`259`	`pragma(inline, false);`
`260`	`260`
`261`		`- // import mir.stdio;`
`262`		`- // debug dump("algorithmM", coefficients, exponent);`
`263`		`-`
`264`	`261`	`import mir.bitop: ctlz;`
`265`	`262`	`import mir.bignum.fp: Fp;`
`266`	`263`	`import mir.bignum.integer: BigInt;`
Original file line number	Diff line number	Diff line change
`@@ -1,12 +1,18 @@`
`1`	`1`	`module mir.bignum.internal.parse;`
`2`	`2`
	`3`	`+/+`
	`4`	`+https://arxiv.org/abs/2101.11408`
	`5`	`+Number Parsing at a Gigabyte per Second`
	`6`	`+Daniel Lemire`
	`7`	`++/`
`3`	`8`	`bool isMadeOfEightDigits()(ref const char[8] chars)`
`4`	`9`	`{`
`5`	`10`	`pragma(inline, true);`
`6`	`11`	`ulong val = (cast(ulong[1]) cast(ubyte[8]) chars)[0];`
`7`	`12`	`return !((((val + 0x4646464646464646) \| (val - 0x3030303030303030)) & 0x8080808080808080));`
`8`	`13`	`}`
`9`	`14`
	`15`	`+// ditto`
`10`	`16`	`uint parseEightDigits()(ref const char[8] chars)`
`11`	`17`	`{`
`12`	`18`	`pragma(inline, true);`