Skip to content

Commit 91c8d6f

Browse files
gururaj1512kgryte
andauthored
feat: add number/float64/base/to-float16
PR-URL: #8207 Co-authored-by: Athan Reines <kgryte@gmail.com> Reviewed-by: Athan Reines <kgryte@gmail.com>
1 parent 90af5e7 commit 91c8d6f

24 files changed

Lines changed: 1191 additions & 0 deletions
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
<!--
2+
3+
@license Apache-2.0
4+
5+
Copyright (c) 2025 The Stdlib Authors.
6+
7+
Licensed under the Apache License, Version 2.0 (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
19+
-->
20+
21+
# toFloat16
22+
23+
> Convert a [double-precision floating-point number][ieee754] to the nearest [half-precision floating-point number][half-precision-floating-point-format].
24+
25+
<section class="usage">
26+
27+
## Usage
28+
29+
```javascript
30+
var float64ToFloat16 = require( '@stdlib/number/float64/base/to-float16' );
31+
```
32+
33+
#### float64ToFloat16( x )
34+
35+
Converts a [double-precision floating-point number][ieee754] to the nearest [half-precision floating-point number][ieee754].
36+
37+
```javascript
38+
var y = float64ToFloat16( 1.337 );
39+
// returns 1.3369140625
40+
```
41+
42+
</section>
43+
44+
<!-- /.usage -->
45+
46+
<section class="notes">
47+
48+
## Notes
49+
50+
- This function may be used as a polyfill for the ES2025 built-in [`Math.f16round`][math-f16round].
51+
52+
</section>
53+
54+
<!-- /.notes -->
55+
56+
<section class="examples">
57+
58+
## Examples
59+
60+
<!-- eslint no-undef: "error" -->
61+
62+
```javascript
63+
var uniform = require( '@stdlib/random/array/uniform' );
64+
var logEachMap = require( '@stdlib/console/log-each-map' );
65+
var float64ToFloat16 = require( '@stdlib/number/float64/base/to-float16' );
66+
67+
// Generate an array of random numbers:
68+
var x = uniform( 100, 0.0, 100.0 );
69+
70+
// Convert each double-precision floating-point number to the nearest half-precision floating-point number:
71+
logEachMap( 'float64: %f => float16: %f', x, float64ToFloat16 );
72+
```
73+
74+
</section>
75+
76+
<!-- /.examples -->
77+
78+
<!-- Section for related `stdlib` packages. Do not manually edit this section, as it is automatically populated. -->
79+
80+
<section class="related">
81+
82+
</section>
83+
84+
<!-- /.related -->
85+
86+
<!-- Section for all links. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
87+
88+
<section class="links">
89+
90+
[ieee754]: https://en.wikipedia.org/wiki/IEEE_754-1985
91+
92+
[half-precision-floating-point-format]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
93+
94+
[math-f16round]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/f16round
95+
96+
</section>
97+
98+
<!-- /.links -->
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
// MODULES //
22+
23+
var bench = require( '@stdlib/bench' );
24+
var uniform = require( '@stdlib/random/array/uniform' );
25+
var isnan = require( '@stdlib/math/base/assert/is-nan' );
26+
var pkg = require( './../package.json' ).name;
27+
var float64ToFloat16 = require( './../lib' );
28+
var polyfill = require( './../lib/polyfill.js' );
29+
30+
31+
// VARIABLES //
32+
33+
var opts = {
34+
'skip': ( typeof Math.f16round === 'undefined' ) // eslint-disable-line stdlib/no-builtin-math
35+
};
36+
37+
38+
// MAIN //
39+
40+
bench( pkg, function benchmark( b ) {
41+
var x;
42+
var y;
43+
var i;
44+
45+
x = uniform( 100, -5.0e4, 5.0e4 );
46+
47+
b.tic();
48+
for ( i = 0; i < b.iterations; i++ ) {
49+
y = float64ToFloat16( x[ i%x.length ] );
50+
if ( isnan( y ) ) {
51+
b.fail( 'should not return NaN' );
52+
}
53+
}
54+
b.toc();
55+
if ( isnan( y ) ) {
56+
b.fail( 'should not return NaN' );
57+
}
58+
b.pass( 'benchmark finished' );
59+
b.end();
60+
});
61+
62+
bench( pkg+'::polyfill', function benchmark( b ) {
63+
var x;
64+
var y;
65+
var i;
66+
67+
x = uniform( 100, -5.0e4, 5.0e4 );
68+
69+
b.tic();
70+
for ( i = 0; i < b.iterations; i++ ) {
71+
y = polyfill( x[ i%x.length ] );
72+
if ( isnan( y ) ) {
73+
b.fail( 'should not return NaN' );
74+
}
75+
}
76+
b.toc();
77+
if ( isnan( y ) ) {
78+
b.fail( 'should not return NaN' );
79+
}
80+
b.pass( 'benchmark finished' );
81+
b.end();
82+
});
83+
84+
bench( pkg+'::builtin', opts, function benchmark( b ) {
85+
var x;
86+
var y;
87+
var i;
88+
89+
x = uniform( 100, -5.0e4, 5.0e4 );
90+
91+
b.tic();
92+
for ( i = 0; i < b.iterations; i++ ) {
93+
y = Math.f16round( x[ i%x.length ] ); // eslint-disable-line stdlib/no-builtin-math
94+
if ( isnan( y ) ) {
95+
b.fail( 'should not return NaN' );
96+
}
97+
}
98+
b.toc();
99+
if ( isnan( y ) ) {
100+
b.fail( 'should not return NaN' );
101+
}
102+
b.pass( 'benchmark finished' );
103+
b.end();
104+
});
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
{{alias}}( x )
3+
Converts a double-precision floating-point number to the nearest half-
4+
precision floating-point number.
5+
6+
Parameters
7+
----------
8+
x: number
9+
Double-precision floating-point number.
10+
11+
Returns
12+
-------
13+
out: float
14+
Nearest half-precision floating-point number.
15+
16+
Examples
17+
--------
18+
> var y = {{alias}}( 1.337 )
19+
1.3369140625
20+
21+
See Also
22+
--------
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
// TypeScript Version: 4.1
20+
21+
/**
22+
* Converts a double-precision floating-point number to the nearest half-precision floating-point number.
23+
*
24+
* @param x - double-precision floating-point number
25+
* @returns nearest half-precision floating-point number
26+
*
27+
* @example
28+
* var y = float64ToFloat16( 1.337 );
29+
* // returns 1.3369140625
30+
*/
31+
declare function float64ToFloat16( x: number ): number;
32+
33+
34+
// EXPORTS //
35+
36+
export = float64ToFloat16;
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
import float64ToFloat16 = require( './index' );
20+
21+
22+
// TESTS //
23+
24+
// The function returns a number...
25+
{
26+
float64ToFloat16( 3.14 ); // $ExpectType number
27+
float64ToFloat16( 0 ); // $ExpectType number
28+
}
29+
30+
// The compiler throws an error if the function is provided a value other than a number...
31+
{
32+
float64ToFloat16( true ); // $ExpectError
33+
float64ToFloat16( false ); // $ExpectError
34+
float64ToFloat16( '5' ); // $ExpectError
35+
float64ToFloat16( [] ); // $ExpectError
36+
float64ToFloat16( {} ); // $ExpectError
37+
float64ToFloat16( ( x: number ): number => x ); // $ExpectError
38+
}
39+
40+
// The compiler throws an error if the function is provided insufficient arguments...
41+
{
42+
float64ToFloat16(); // $ExpectError
43+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
var uniform = require( '@stdlib/random/array/uniform' );
22+
var logEachMap = require( '@stdlib/console/log-each-map' );
23+
var float64ToFloat16 = require( './../lib' );
24+
25+
// Generate an array of random numbers:
26+
var x = uniform( 100, 0.0, 100.0 );
27+
28+
// Convert each double-precision floating-point number to the nearest half-precision floating-point number:
29+
logEachMap( 'float64: %f => float16: %f', x, float64ToFloat16 );
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2025 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
/**
22+
* Convert a double-precision floating-point number to the nearest half-precision floating-point number.
23+
*
24+
* @module @stdlib/number/float64/base/to-float16
25+
*
26+
* @example
27+
* var float64ToFloat16 = require( '@stdlib/number/float64/base/to-float16' );
28+
*
29+
* var y = float64ToFloat16( 1.337 );
30+
* // returns 1.3369140625
31+
*/
32+
33+
// MODULES //
34+
35+
var builtin = require( './main.js' );
36+
var polyfill = require( './polyfill.js' );
37+
38+
39+
// MAIN //
40+
41+
var main;
42+
if ( typeof builtin === 'function' ) {
43+
main = builtin;
44+
} else {
45+
main = polyfill;
46+
}
47+
48+
49+
// EXPORTS //
50+
51+
module.exports = main;

0 commit comments

Comments
 (0)