@@ -265,9 +265,10 @@ int_to_hash(npy_int64 v) {
265265 return hash ;
266266}
267267
268+ // This is a adapted from https://github.com/python/cpython/blob/ba65a065cf07a7a9f53be61057a090f7311a5ad7/Python/pyhash.c#L92
268269#define HASH_MODULUS (((size_t)1 << 61) - 1)
269270#define HASH_BITS 61
270- Py_hash_t
271+ static inline Py_hash_t
271272double_to_hash (double v )
272273{
273274 int e , sign ;
@@ -966,38 +967,19 @@ lookup_hash_string(
966967static Py_ssize_t
967968lookup_int (FAMObject * self , PyObject * key ) {
968969 npy_int64 v = 0 ;
969- // NOTE: we handle PyArray Scalar Byte, Short with PyNumber_Check, below, saving four branches here
970- if (PyArray_IsScalar (key , Int )) {
971- v = (npy_int64 )PyArrayScalar_VAL (key , Int );
970+ // NOTE: we handle PyArray Scalar Byte, Short, UByte, UShort with PyNumber_Check, below, saving four branches here
971+ if (PyArray_IsScalar (key , LongLong )) {
972+ v = (npy_int64 )PyArrayScalar_VAL (key , LongLong );
972973 }
973974 else if (PyArray_IsScalar (key , Long )) {
974975 v = (npy_int64 )PyArrayScalar_VAL (key , Long );
975976 }
976- else if (PyArray_IsScalar (key , LongLong )) {
977- v = (npy_int64 )PyArrayScalar_VAL (key , LongLong );
978- }
979- else if (PyArray_IsScalar (key , UInt )) {
980- v = (npy_int64 )PyArrayScalar_VAL (key , UInt );
981- }
982- else if (PyArray_IsScalar (key , ULong )) {
983- v = (npy_int64 )PyArrayScalar_VAL (key , ULong );
984- }
985- else if (PyArray_IsScalar (key , ULongLong )) {
986- v = (npy_int64 )PyArrayScalar_VAL (key , ULongLong );
987- }
988- else if (PyArray_IsScalar (key , Half )) {
989- double dv = npy_half_to_double (PyArrayScalar_VAL (key , Half ));
990- if (floor (dv ) != dv ) {
991- return -1 ;
992- }
993- v = (npy_int64 )dv ;
994- }
995- else if (PyArray_IsScalar (key , Float )) {
996- double dv = (double )PyArrayScalar_VAL (key , Float );
997- if (floor (dv ) != dv ) {
977+ else if (PyLong_Check (key )) {
978+ v = PyLong_AsLongLong (key );
979+ if (v == -1 && PyErr_Occurred ()) {
980+ PyErr_Clear ();
998981 return -1 ;
999982 }
1000- v = (npy_int64 )dv ;
1001983 }
1002984 else if (PyArray_IsScalar (key , Double )) {
1003985 double dv = PyArrayScalar_VAL (key , Double );
@@ -1017,12 +999,31 @@ lookup_int(FAMObject *self, PyObject* key) {
1017999 return -1 ;
10181000 }
10191001 }
1020- else if (PyLong_Check (key )) {
1021- v = PyLong_AsLongLong (key );
1022- if (v == -1 && PyErr_Occurred ()) {
1023- PyErr_Clear ();
1002+ else if (PyArray_IsScalar (key , ULongLong )) {
1003+ v = (npy_int64 )PyArrayScalar_VAL (key , ULongLong );
1004+ }
1005+ else if (PyArray_IsScalar (key , ULong )) {
1006+ v = (npy_int64 )PyArrayScalar_VAL (key , ULong );
1007+ }
1008+ else if (PyArray_IsScalar (key , Int )) {
1009+ v = (npy_int64 )PyArrayScalar_VAL (key , Int );
1010+ }
1011+ else if (PyArray_IsScalar (key , UInt )) {
1012+ v = (npy_int64 )PyArrayScalar_VAL (key , UInt );
1013+ }
1014+ else if (PyArray_IsScalar (key , Float )) {
1015+ double dv = (double )PyArrayScalar_VAL (key , Float );
1016+ if (floor (dv ) != dv ) {
10241017 return -1 ;
10251018 }
1019+ v = (npy_int64 )dv ;
1020+ }
1021+ else if (PyArray_IsScalar (key , Half )) {
1022+ double dv = npy_half_to_double (PyArrayScalar_VAL (key , Half ));
1023+ if (floor (dv ) != dv ) {
1024+ return -1 ;
1025+ }
1026+ v = (npy_int64 )dv ;
10261027 }
10271028 else if (PyBool_Check (key )) {
10281029 v = PyObject_IsTrue (key );
@@ -1046,20 +1047,12 @@ static Py_ssize_t
10461047lookup_uint (FAMObject * self , PyObject * key ) {
10471048 npy_uint64 v = 0 ;
10481049
1049- // NOTE: we handle PyArray Scalar UByte, UShort with PyNumber_Check, below, saving four branches here
1050- if (PyArray_IsScalar (key , Int )) {
1051- npy_int64 si = (npy_int64 )PyArrayScalar_VAL (key , Int );
1052- if (si < 0 ) {
1053- return -1 ;
1054- }
1055- v = (npy_uint64 )si ;
1050+ // NOTE: we handle PyArray Scalar Byte, Short, UByte, UShort with PyNumber_Check, below, saving four branches here
1051+ if (PyArray_IsScalar (key , ULongLong )) {
1052+ v = (npy_uint64 )PyArrayScalar_VAL (key , ULongLong );
10561053 }
1057- else if (PyArray_IsScalar (key , Long )) {
1058- npy_int64 si = (npy_int64 )PyArrayScalar_VAL (key , Long );
1059- if (si < 0 ) {
1060- return -1 ;
1061- }
1062- v = (npy_uint64 )si ;
1054+ else if (PyArray_IsScalar (key , ULong )) {
1055+ v = (npy_uint64 )PyArrayScalar_VAL (key , ULong );
10631056 }
10641057 else if (PyArray_IsScalar (key , LongLong )) {
10651058 npy_int64 si = (npy_int64 )PyArrayScalar_VAL (key , LongLong );
@@ -1068,28 +1061,19 @@ lookup_uint(FAMObject *self, PyObject* key) {
10681061 }
10691062 v = (npy_uint64 )si ;
10701063 }
1071- else if (PyArray_IsScalar (key , UInt )) {
1072- v = (npy_uint64 )PyArrayScalar_VAL (key , UInt );
1073- }
1074- else if (PyArray_IsScalar (key , ULong )) {
1075- v = (npy_uint64 )PyArrayScalar_VAL (key , ULong );
1076- }
1077- else if (PyArray_IsScalar (key , ULongLong )) {
1078- v = (npy_uint64 )PyArrayScalar_VAL (key , ULongLong );
1079- }
1080- else if (PyArray_IsScalar (key , Half )) {
1081- double dv = npy_half_to_double (PyArrayScalar_VAL (key , Half ));
1082- if (dv < 0 || floor (dv ) != dv ) {
1064+ else if (PyArray_IsScalar (key , Long )) {
1065+ npy_int64 si = (npy_int64 )PyArrayScalar_VAL (key , Long );
1066+ if (si < 0 ) {
10831067 return -1 ;
10841068 }
1085- v = (npy_uint64 )dv ;
1069+ v = (npy_uint64 )si ;
10861070 }
1087- else if (PyArray_IsScalar (key , Float )) {
1088- double dv = (double )PyArrayScalar_VAL (key , Float );
1089- if (dv < 0 || floor (dv ) != dv ) {
1071+ else if (PyLong_Check (key )) {
1072+ v = PyLong_AsUnsignedLongLong (key );
1073+ if (v == (unsigned long long )-1 && PyErr_Occurred ()) {
1074+ PyErr_Clear ();
10901075 return -1 ;
10911076 }
1092- v = (npy_uint64 )dv ;
10931077 }
10941078 else if (PyArray_IsScalar (key , Double )) {
10951079 double dv = PyArrayScalar_VAL (key , Double );
@@ -1112,12 +1096,29 @@ lookup_uint(FAMObject *self, PyObject* key) {
11121096 return -1 ;
11131097 }
11141098 }
1115- else if (PyLong_Check (key )) {
1116- v = PyLong_AsUnsignedLongLong (key );
1117- if (v == (unsigned long long )-1 && PyErr_Occurred ()) {
1118- PyErr_Clear ();
1099+ else if (PyArray_IsScalar (key , Int )) {
1100+ npy_int64 si = (npy_int64 )PyArrayScalar_VAL (key , Int );
1101+ if (si < 0 ) {
11191102 return -1 ;
11201103 }
1104+ v = (npy_uint64 )si ;
1105+ }
1106+ else if (PyArray_IsScalar (key , UInt )) {
1107+ v = (npy_uint64 )PyArrayScalar_VAL (key , UInt );
1108+ }
1109+ else if (PyArray_IsScalar (key , Float )) {
1110+ double dv = (double )PyArrayScalar_VAL (key , Float );
1111+ if (dv < 0 || floor (dv ) != dv ) {
1112+ return -1 ;
1113+ }
1114+ v = (npy_uint64 )dv ;
1115+ }
1116+ else if (PyArray_IsScalar (key , Half )) {
1117+ double dv = npy_half_to_double (PyArrayScalar_VAL (key , Half ));
1118+ if (dv < 0 || floor (dv ) != dv ) {
1119+ return -1 ;
1120+ }
1121+ v = (npy_uint64 )dv ;
11211122 }
11221123 else if (PyBool_Check (key )) {
11231124 v = PyObject_IsTrue (key );
@@ -1141,6 +1142,7 @@ lookup_uint(FAMObject *self, PyObject* key) {
11411142 return lookup_hash_uint (self , v , hash );
11421143}
11431144
1145+
11441146static Py_ssize_t
11451147lookup_double (FAMObject * self , PyObject * key ) {
11461148 double v = 0 ;
@@ -1511,7 +1513,7 @@ grow_table(FAMObject *self, Py_ssize_t keys_size)
15111513}
15121514
15131515
1514- // Given a new, possibly un-initialized FAMObject, copy attrs from self to new. Note that if fam_init calls this, it will only do this routine. Return 0 on success, -1 on error.
1516+ // Given a new, possibly un-initialized FAMObject, copy attrs from self to new. Return 0 on success, -1 on error.
15151517int
15161518copy_to_new (PyTypeObject * cls , FAMObject * self , FAMObject * new )
15171519{
@@ -1553,7 +1555,7 @@ static PyObject *
15531555fam_new (PyTypeObject * cls , PyObject * args , PyObject * kwargs );
15541556
15551557
1556- // Create a copy of self. Returns NULL on error.
1558+ // Create a copy of self. Used in `fam_or()`. Returns a new FAMObject on success, NULL on error.
15571559static FAMObject *
15581560copy (PyTypeObject * cls , FAMObject * self )
15591561{
@@ -1830,32 +1832,32 @@ fam_values(FAMObject *self)
18301832}
18311833
18321834
1833- // This macro can be used with integer and floating point NumPy types, given an `npy_type` and a specialized `insert_func`. Uses context of `fam_init` to get `fam`, `contiguous`, `a`, `keys_size`, and `i`.
1834- # define INSERT_SCALARS (npy_type , insert_func , pre_insert ) \
1835- if (contiguous) { \
1836- npy_type* b = (npy_type*)PyArray_DATA(a); \
1837- npy_type* b_end = b + keys_size; \
1838- while (b < b_end) { \
1839- if (insert_func(fam, pre_insert(*b), i, -1)) { \
1840- goto error; \
1841- } \
1842- b++; \
1843- i++; \
1844- } \
1845- } \
1846- else { \
1847- for (; i < keys_size; i++) { \
1848- if (insert_func(fam, \
1849- pre_insert(*(npy_type*)PyArray_GETPTR1(a, i)), \
1850- i, \
1851- -1)) { \
1852- goto error; \
1853- } \
1854- } \
1855- } \
1856-
1857-
1858- // This macro is for inserting flexible-sized types, Unicode (Py_UCS4) or strings (char). Uses context of `fam_init`.
1835+ // This macro can be used with integer and floating point NumPy types, given an `npy_type` and a specialized `insert_func`. Uses context of `fam_init` to get `fam`, `contiguous`, `a`, `keys_size`, and `i`. An optional `pre_insert` function can be supplied to transform extracted values before calling the insert function.
1836+ # define INSERT_SCALARS (npy_type , insert_func , pre_insert ) \
1837+ if (contiguous) { \
1838+ npy_type* b = (npy_type*)PyArray_DATA(a); \
1839+ npy_type* b_end = b + keys_size; \
1840+ while (b < b_end) { \
1841+ if (insert_func(fam, pre_insert(*b), i, -1)) { \
1842+ goto error; \
1843+ } \
1844+ b++; \
1845+ i++; \
1846+ } \
1847+ } \
1848+ else { \
1849+ for (; i < keys_size; i++) { \
1850+ if (insert_func(fam, \
1851+ pre_insert(*(npy_type*)PyArray_GETPTR1(a, i)),\
1852+ i, \
1853+ -1)) { \
1854+ goto error; \
1855+ } \
1856+ } \
1857+ } \
1858+
1859+
1860+ // This macro is for inserting flexible-sized types, Unicode (Py_UCS4) or strings (char). Uses context of `fam_init`.
18591861# define INSERT_FLEXIBLE (char_type , insert_func , get_end_func ) \
18601862char_type* p = NULL; \
18611863if (contiguous) { \
@@ -1883,8 +1885,7 @@ else { \
18831885static PyObject *
18841886fam_new (PyTypeObject * cls , PyObject * args , PyObject * kwargs )
18851887{
1886- // NOTE: original fam_new used to be able to provide a same reference back if a fam was in the args; this is tricky now that we have fam_init
1887-
1888+ // NOTE: The original fam_new used to be able to provide a same reference back if a fam was in the args; this is tricky now that we have fam_init
18881889 FAMObject * self = (FAMObject * )cls -> tp_alloc (cls , 0 );
18891890 if (!self ) {
18901891 return NULL ;
@@ -1911,9 +1912,11 @@ fam_init(PyObject *self, PyObject *args, PyObject *kwargs)
19111912 return -1 ;
19121913 }
19131914
1914- int keys_array_type = KAT_LIST ;
1915+ int keys_array_type = KAT_LIST ; // default, will override if necessary
19151916
19161917 PyObject * keys = NULL ;
1918+ Py_ssize_t keys_size = 0 ;
1919+
19171920 if (!PyArg_UnpackTuple (args , name , 0 , 1 , & keys )) {
19181921 return -1 ;
19191922 }
@@ -1933,18 +1936,15 @@ fam_init(PyObject *self, PyObject *args, PyObject *kwargs)
19331936 }
19341937 int array_t = PyArray_TYPE (a );
19351938 if (cls != & AMType &&
1936- (PyTypeNum_ISSIGNED (array_t )
1937- || PyTypeNum_ISUNSIGNED (array_t )
1939+ (PyTypeNum_ISINTEGER (array_t ) // signed and unsigned
19381940 || PyTypeNum_ISFLOAT (array_t )
1939- || array_t == NPY_UNICODE
1940- || array_t == NPY_STRING )
1941+ || PyTypeNum_ISFLEXIBLE (array_t ))
19411942 ){
19421943 if ((PyArray_FLAGS (a ) & NPY_ARRAY_WRITEABLE )) {
19431944 PyErr_Format (PyExc_TypeError , "Arrays must be immutable when given to a %s" , name );
19441945 return -1 ;
19451946 }
19461947 keys_array_type = at_to_kat (array_t );
1947- assert (keys_array_type ); // must be truthy
19481948 Py_INCREF (keys );
19491949 }
19501950 else { // if an AutoMap or an array that we do not custom-hash, we create a list
@@ -1955,9 +1955,11 @@ fam_init(PyObject *self, PyObject *args, PyObject *kwargs)
19551955 keys = PyArray_ToList (a ); // converts to objs
19561956 }
19571957 }
1958+ keys_size = PyArray_SIZE (a );
19581959 }
19591960 else { // assume an arbitrary iterable
19601961 keys = PySequence_List (keys );
1962+ keys_size = PyList_GET_SIZE (keys );
19611963 }
19621964
19631965 if (!keys ) {
@@ -1966,10 +1968,6 @@ fam_init(PyObject *self, PyObject *args, PyObject *kwargs)
19661968
19671969 fam -> keys = keys ;
19681970 fam -> keys_array_type = keys_array_type ;
1969-
1970- Py_ssize_t keys_size = keys_array_type
1971- ? PyArray_SIZE ((PyArrayObject * )keys )
1972- : PyList_GET_SIZE (keys );
19731971 fam -> keys_size = keys_size ;
19741972 fam -> key_buffer = NULL ;
19751973 key_count_global += keys_size ;
@@ -2015,7 +2013,6 @@ fam_init(PyObject *self, PyObject *args, PyObject *kwargs)
20152013 INSERT_SCALARS (npy_float , insert_double , );
20162014 break ;
20172015 case KAT_FLOAT16 :
2018- // conversion to double requires special handling
20192016 INSERT_SCALARS (npy_half , insert_double , npy_half_to_double );
20202017 break ;
20212018 case KAT_UNICODE : {
@@ -2026,7 +2023,6 @@ fam_init(PyObject *self, PyObject *args, PyObject *kwargs)
20262023 break ;
20272024 }
20282025 case KAT_STRING : {
2029- // Over allocate buffer by 1 so there is room for null at end. This buffer is only used in lookup();
20302026 Py_ssize_t dt_size = PyArray_DESCR (a )-> elsize ;
20312027 INSERT_FLEXIBLE (char , insert_string , char_get_end_p );
20322028 break ;
0 commit comments