Skip to content

Commit ee8150b

Browse files
authored
drop Python2 support (#67)
* drop Python2 support * version bump
1 parent 4b3e7f4 commit ee8150b

8 files changed

Lines changed: 2713 additions & 2343 deletions

File tree

pip-freeze.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
appnope==0.1.2
21
attrs==21.4.0
32
backcall==0.2.0
43
Cython==0.29.26

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def get_system_bits():
127127
)
128128

129129

130-
VERSION = "0.3.8"
130+
VERSION = "0.4.0"
131131
URL = "https://github.com/escherba/python-cityhash"
132132

133133

src/cityhash.cpp

Lines changed: 829 additions & 715 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cityhash.pyx

Lines changed: 69 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#cython: infer_types=True
22
#cython: embedsignature=True
33
#cython: binding=False
4-
#cython: language_level=2
4+
#cython: language_level=3
55
#distutils: language=c++
66

77
"""
@@ -10,7 +10,7 @@ Python wrapper for CityHash
1010

1111
__author__ = "Eugene Scherba"
1212
__email__ = "escherba+cityhash@gmail.com"
13-
__version__ = '0.3.8'
13+
__version__ = '0.4.0'
1414
__all__ = [
1515
"CityHash32",
1616
"CityHash64",
@@ -41,16 +41,24 @@ cdef extern from "<utility>" namespace "std" nogil:
4141
bint operator >= (pair&, pair&)
4242

4343

44+
cdef extern from "Python.h":
45+
# Note that following functions can potentially raise an exception,
46+
# thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can
47+
# potentially allocate memory inside in unlikely case of when underlying
48+
# unicode object was stored as non-utf8 and utf8 wasn't requested before.
49+
const char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t* length) except NULL
50+
51+
4452
cdef extern from "city.h" nogil:
4553
ctypedef uint32_t uint32
4654
ctypedef uint64_t uint64
4755
ctypedef pair[uint64, uint64] uint128
48-
cdef uint32 c_CityHash32 "CityHash32" (const char *buff, size_t length)
49-
cdef uint64 c_CityHash64 "CityHash64" (const char *buff, size_t length)
50-
cdef uint64 c_CityHash64WithSeed "CityHash64WithSeed" (const char *buff, size_t length, uint64 seed)
51-
cdef uint64 c_CityHash64WithSeeds "CityHash64WithSeeds" (const char *buff, size_t length, uint64 seed0, uint64 seed1)
52-
cdef uint128 c_CityHash128 "CityHash128" (const char *s, size_t length)
53-
cdef uint128 c_CityHash128WithSeed "CityHash128WithSeed" (const char *s, size_t length, uint128 seed)
56+
cdef uint32 c_Hash32 "CityHash32" (const char *buff, size_t length)
57+
cdef uint64 c_Hash64 "CityHash64" (const char *buff, size_t length)
58+
cdef uint64 c_Hash64WithSeed "CityHash64WithSeed" (const char *buff, size_t length, uint64 seed)
59+
cdef uint64 c_Hash64WithSeeds "CityHash64WithSeeds" (const char *buff, size_t length, uint64 seed0, uint64 seed1)
60+
cdef uint128 c_Hash128 "CityHash128" (const char *s, size_t length)
61+
cdef uint128 c_Hash128WithSeed "CityHash128WithSeed" (const char *s, size_t length, uint128 seed)
5462

5563

5664
from cpython cimport long
@@ -61,7 +69,6 @@ from cpython.buffer cimport PyBuffer_Release
6169
from cpython.buffer cimport PyBUF_SIMPLE
6270

6371
from cpython.unicode cimport PyUnicode_Check
64-
from cpython.unicode cimport PyUnicode_AsUTF8String
6572

6673
from cpython.bytes cimport PyBytes_Check
6774
from cpython.bytes cimport PyBytes_GET_SIZE
@@ -88,19 +95,20 @@ Raises:
8895
ValueError: if input buffer is not C-contiguous
8996
"""
9097
cdef Py_buffer buf
91-
cdef bytes obj
9298
cdef uint32 result
99+
cdef const char* encoding
100+
cdef Py_ssize_t encoding_size = 0
101+
93102
if PyUnicode_Check(data):
94-
obj = PyUnicode_AsUTF8String(data)
95-
PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
96-
result = c_CityHash32(<const char*>buf.buf, buf.len)
97-
PyBuffer_Release(&buf)
103+
encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
104+
result = c_Hash32(encoding, encoding_size)
98105
elif PyBytes_Check(data):
99-
result = c_CityHash32(<const char*>PyBytes_AS_STRING(data),
100-
PyBytes_GET_SIZE(data))
106+
result = c_Hash32(
107+
<const char*>PyBytes_AS_STRING(data),
108+
PyBytes_GET_SIZE(data))
101109
elif PyObject_CheckBuffer(data):
102110
PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
103-
result = c_CityHash32(<const char*>buf.buf, buf.len)
111+
result = c_Hash32(<const char*>buf.buf, buf.len)
104112
PyBuffer_Release(&buf)
105113
else:
106114
raise _type_error("data", ["basestring", "buffer"], data)
@@ -120,19 +128,20 @@ Raises:
120128
ValueError: if input buffer is not C-contiguous
121129
"""
122130
cdef Py_buffer buf
123-
cdef bytes obj
124131
cdef uint64 result
132+
cdef const char* encoding
133+
cdef Py_ssize_t encoding_size = 0
134+
125135
if PyUnicode_Check(data):
126-
obj = PyUnicode_AsUTF8String(data)
127-
PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
128-
result = c_CityHash64(<const char*>buf.buf, buf.len)
129-
PyBuffer_Release(&buf)
136+
encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
137+
result = c_Hash64(encoding, encoding_size)
130138
elif PyBytes_Check(data):
131-
result = c_CityHash64(<const char*>PyBytes_AS_STRING(data),
132-
PyBytes_GET_SIZE(data))
139+
result = c_Hash64(
140+
<const char*>PyBytes_AS_STRING(data),
141+
PyBytes_GET_SIZE(data))
133142
elif PyObject_CheckBuffer(data):
134143
PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
135-
result = c_CityHash64(<const char*>buf.buf, buf.len)
144+
result = c_Hash64(<const char*>buf.buf, buf.len)
136145
PyBuffer_Release(&buf)
137146
else:
138147
raise _type_error("data", ["basestring", "buffer"], data)
@@ -154,19 +163,20 @@ Raises:
154163
OverflowError: if seed cannot be converted to unsigned int64
155164
"""
156165
cdef Py_buffer buf
157-
cdef bytes obj
158166
cdef uint64 result
167+
cdef const char* encoding
168+
cdef Py_ssize_t encoding_size = 0
169+
159170
if PyUnicode_Check(data):
160-
obj = PyUnicode_AsUTF8String(data)
161-
PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
162-
result = c_CityHash64WithSeed(<const char*>buf.buf, buf.len, seed)
163-
PyBuffer_Release(&buf)
171+
encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
172+
result = c_Hash64WithSeed(encoding, encoding_size, seed)
164173
elif PyBytes_Check(data):
165-
result = c_CityHash64WithSeed(<const char*>PyBytes_AS_STRING(data),
166-
PyBytes_GET_SIZE(data), seed)
174+
result = c_Hash64WithSeed(
175+
<const char*>PyBytes_AS_STRING(data),
176+
PyBytes_GET_SIZE(data), seed)
167177
elif PyObject_CheckBuffer(data):
168178
PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
169-
result = c_CityHash64WithSeed(<const char*>buf.buf, buf.len, seed)
179+
result = c_Hash64WithSeed(<const char*>buf.buf, buf.len, seed)
170180
PyBuffer_Release(&buf)
171181
else:
172182
raise _type_error("data", ["basestring", "buffer"], data)
@@ -189,19 +199,20 @@ Raises:
189199
OverflowError: if seed cannot be converted to unsigned int64
190200
"""
191201
cdef Py_buffer buf
192-
cdef bytes obj
193202
cdef uint64 result
203+
cdef const char* encoding
204+
cdef Py_ssize_t encoding_size = 0
205+
194206
if PyUnicode_Check(data):
195-
obj = PyUnicode_AsUTF8String(data)
196-
PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
197-
result = c_CityHash64WithSeeds(<const char*>buf.buf, buf.len, seed0, seed1)
198-
PyBuffer_Release(&buf)
207+
encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
208+
result = c_Hash64WithSeeds(encoding, encoding_size, seed0, seed1)
199209
elif PyBytes_Check(data):
200-
result = c_CityHash64WithSeeds(<const char*>PyBytes_AS_STRING(data),
201-
PyBytes_GET_SIZE(data), seed0, seed1)
210+
result = c_Hash64WithSeeds(
211+
<const char*>PyBytes_AS_STRING(data),
212+
PyBytes_GET_SIZE(data), seed0, seed1)
202213
elif PyObject_CheckBuffer(data):
203214
PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
204-
result = c_CityHash64WithSeeds(<const char*>buf.buf, buf.len, seed0, seed1)
215+
result = c_Hash64WithSeeds(<const char*>buf.buf, buf.len, seed0, seed1)
205216
PyBuffer_Release(&buf)
206217
else:
207218
raise _type_error("data", ["basestring", "buffer"], data)
@@ -221,19 +232,20 @@ Raises:
221232
TypeError: if input data is not a string or a buffer
222233
"""
223234
cdef Py_buffer buf
224-
cdef bytes obj
225235
cdef pair[uint64, uint64] result
236+
cdef const char* encoding
237+
cdef Py_ssize_t encoding_size = 0
238+
226239
if PyUnicode_Check(data):
227-
obj = PyUnicode_AsUTF8String(data)
228-
PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
229-
result = c_CityHash128(<const char*>buf.buf, buf.len)
230-
PyBuffer_Release(&buf)
240+
encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
241+
result = c_Hash128(encoding, encoding_size)
231242
elif PyBytes_Check(data):
232-
result = c_CityHash128(<const char*>PyBytes_AS_STRING(data),
233-
PyBytes_GET_SIZE(data))
243+
result = c_Hash128(
244+
<const char*>PyBytes_AS_STRING(data),
245+
PyBytes_GET_SIZE(data))
234246
elif PyObject_CheckBuffer(data):
235247
PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
236-
result = c_CityHash128(<const char*>buf.buf, buf.len)
248+
result = c_Hash128(<const char*>buf.buf, buf.len)
237249
PyBuffer_Release(&buf)
238250
else:
239251
raise _type_error("data", ["basestring", "buffer"], data)
@@ -255,24 +267,24 @@ Raises:
255267
OverflowError: if seed cannot be converted to unsigned int64
256268
"""
257269
cdef Py_buffer buf
258-
cdef bytes obj
259270
cdef pair[uint64, uint64] result
260271
cdef pair[uint64, uint64] tseed
272+
cdef const char* encoding
273+
cdef Py_ssize_t encoding_size = 0
261274
262275
tseed.first = seed >> 64ULL
263276
tseed.second = seed & ((1ULL << 64ULL) - 1ULL)
264277
265278
if PyUnicode_Check(data):
266-
obj = PyUnicode_AsUTF8String(data)
267-
PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
268-
result = c_CityHash128WithSeed(<const char*>buf.buf, buf.len, tseed)
269-
PyBuffer_Release(&buf)
279+
encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
280+
result = c_Hash128WithSeed(encoding, encoding_size, tseed)
270281
elif PyBytes_Check(data):
271-
result = c_CityHash128WithSeed(<const char*>PyBytes_AS_STRING(data),
272-
PyBytes_GET_SIZE(data), tseed)
282+
result = c_Hash128WithSeed(
283+
<const char*>PyBytes_AS_STRING(data),
284+
PyBytes_GET_SIZE(data), tseed)
273285
elif PyObject_CheckBuffer(data):
274286
PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
275-
result = c_CityHash128WithSeed(<const char*>buf.buf, buf.len, tseed)
287+
result = c_Hash128WithSeed(<const char*>buf.buf, buf.len, tseed)
276288
PyBuffer_Release(&buf)
277289
else:
278290
raise _type_error("data", ["basestring", "buffer"], data)

0 commit comments

Comments
 (0)