11# cython: infer_types=True
22# cython: embedsignature=True
33# cython: binding=False
4- # cython: language_level=2
4+ # cython: language_level=3
55# distutils: language=c++
66
77"""
@@ -10,7 +10,7 @@ Python wrapper for CityHash
1010
1111__author__ = " Eugene Scherba"
1212__email__ = " escherba+cityhash@gmail.com"
13- __version__ = ' 0.3.8 '
13+ __version__ = ' 0.4.0 '
1414__all__ = [
1515 " CityHash32" ,
1616 " CityHash64" ,
@@ -41,16 +41,24 @@ cdef extern from "<utility>" namespace "std" nogil:
4141 bint operator >= (pair& , pair& )
4242
4343
44+ cdef extern from " Python.h" :
45+ # Note that following functions can potentially raise an exception,
46+ # thus they cannot be declared 'nogil'. Also PyUnicode_AsUTF8AndSize() can
47+ # potentially allocate memory inside in unlikely case of when underlying
48+ # unicode object was stored as non-utf8 and utf8 wasn't requested before.
49+ const char * PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t* length) except NULL
50+
51+
4452cdef extern from " city.h" nogil:
4553 ctypedef uint32_t uint32
4654 ctypedef uint64_t uint64
4755 ctypedef pair[uint64, uint64] uint128
48- cdef uint32 c_CityHash32 " CityHash32" (const char * buff, size_t length)
49- cdef uint64 c_CityHash64 " CityHash64" (const char * buff, size_t length)
50- cdef uint64 c_CityHash64WithSeed " CityHash64WithSeed" (const char * buff, size_t length, uint64 seed)
51- cdef uint64 c_CityHash64WithSeeds " CityHash64WithSeeds" (const char * buff, size_t length, uint64 seed0, uint64 seed1)
52- cdef uint128 c_CityHash128 " CityHash128" (const char * s, size_t length)
53- cdef uint128 c_CityHash128WithSeed " CityHash128WithSeed" (const char * s, size_t length, uint128 seed)
56+ cdef uint32 c_Hash32 " CityHash32" (const char * buff, size_t length)
57+ cdef uint64 c_Hash64 " CityHash64" (const char * buff, size_t length)
58+ cdef uint64 c_Hash64WithSeed " CityHash64WithSeed" (const char * buff, size_t length, uint64 seed)
59+ cdef uint64 c_Hash64WithSeeds " CityHash64WithSeeds" (const char * buff, size_t length, uint64 seed0, uint64 seed1)
60+ cdef uint128 c_Hash128 " CityHash128" (const char * s, size_t length)
61+ cdef uint128 c_Hash128WithSeed " CityHash128WithSeed" (const char * s, size_t length, uint128 seed)
5462
5563
5664from cpython cimport long
@@ -61,7 +69,6 @@ from cpython.buffer cimport PyBuffer_Release
6169from cpython.buffer cimport PyBUF_SIMPLE
6270
6371from cpython.unicode cimport PyUnicode_Check
64- from cpython.unicode cimport PyUnicode_AsUTF8String
6572
6673from cpython.bytes cimport PyBytes_Check
6774from cpython.bytes cimport PyBytes_GET_SIZE
@@ -88,19 +95,20 @@ Raises:
8895 ValueError : if input buffer is not C- contiguous
8996 """
9097 cdef Py_buffer buf
91- cdef bytes obj
9298 cdef uint32 result
99+ cdef const char* encoding
100+ cdef Py_ssize_t encoding_size = 0
101+
93102 if PyUnicode_Check(data):
94- obj = PyUnicode_AsUTF8String(data)
95- PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
96- result = c_CityHash32(<const char*>buf.buf, buf.len)
97- PyBuffer_Release(&buf)
103+ encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
104+ result = c_Hash32(encoding, encoding_size)
98105 elif PyBytes_Check(data):
99- result = c_CityHash32(<const char*>PyBytes_AS_STRING(data),
100- PyBytes_GET_SIZE(data))
106+ result = c_Hash32(
107+ <const char*>PyBytes_AS_STRING(data),
108+ PyBytes_GET_SIZE(data))
101109 elif PyObject_CheckBuffer(data):
102110 PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
103- result = c_CityHash32 (<const char*>buf.buf, buf.len)
111+ result = c_Hash32 (<const char*>buf.buf, buf.len)
104112 PyBuffer_Release(&buf)
105113 else:
106114 raise _type_error("data", ["basestring", "buffer"], data)
@@ -120,19 +128,20 @@ Raises:
120128 ValueError : if input buffer is not C- contiguous
121129 """
122130 cdef Py_buffer buf
123- cdef bytes obj
124131 cdef uint64 result
132+ cdef const char* encoding
133+ cdef Py_ssize_t encoding_size = 0
134+
125135 if PyUnicode_Check(data):
126- obj = PyUnicode_AsUTF8String(data)
127- PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
128- result = c_CityHash64(<const char*>buf.buf, buf.len)
129- PyBuffer_Release(&buf)
136+ encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
137+ result = c_Hash64(encoding, encoding_size)
130138 elif PyBytes_Check(data):
131- result = c_CityHash64(<const char*>PyBytes_AS_STRING(data),
132- PyBytes_GET_SIZE(data))
139+ result = c_Hash64(
140+ <const char*>PyBytes_AS_STRING(data),
141+ PyBytes_GET_SIZE(data))
133142 elif PyObject_CheckBuffer(data):
134143 PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
135- result = c_CityHash64 (<const char*>buf.buf, buf.len)
144+ result = c_Hash64 (<const char*>buf.buf, buf.len)
136145 PyBuffer_Release(&buf)
137146 else:
138147 raise _type_error("data", ["basestring", "buffer"], data)
@@ -154,19 +163,20 @@ Raises:
154163 OverflowError : if seed cannot be converted to unsigned int64
155164 """
156165 cdef Py_buffer buf
157- cdef bytes obj
158166 cdef uint64 result
167+ cdef const char* encoding
168+ cdef Py_ssize_t encoding_size = 0
169+
159170 if PyUnicode_Check(data):
160- obj = PyUnicode_AsUTF8String(data)
161- PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
162- result = c_CityHash64WithSeed(<const char*>buf.buf, buf.len, seed)
163- PyBuffer_Release(&buf)
171+ encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
172+ result = c_Hash64WithSeed(encoding, encoding_size, seed)
164173 elif PyBytes_Check(data):
165- result = c_CityHash64WithSeed(<const char*>PyBytes_AS_STRING(data),
166- PyBytes_GET_SIZE(data), seed)
174+ result = c_Hash64WithSeed(
175+ <const char*>PyBytes_AS_STRING(data),
176+ PyBytes_GET_SIZE(data), seed)
167177 elif PyObject_CheckBuffer(data):
168178 PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
169- result = c_CityHash64WithSeed (<const char*>buf.buf, buf.len, seed)
179+ result = c_Hash64WithSeed (<const char*>buf.buf, buf.len, seed)
170180 PyBuffer_Release(&buf)
171181 else:
172182 raise _type_error("data", ["basestring", "buffer"], data)
@@ -189,19 +199,20 @@ Raises:
189199 OverflowError : if seed cannot be converted to unsigned int64
190200 """
191201 cdef Py_buffer buf
192- cdef bytes obj
193202 cdef uint64 result
203+ cdef const char* encoding
204+ cdef Py_ssize_t encoding_size = 0
205+
194206 if PyUnicode_Check(data):
195- obj = PyUnicode_AsUTF8String(data)
196- PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
197- result = c_CityHash64WithSeeds(<const char*>buf.buf, buf.len, seed0, seed1)
198- PyBuffer_Release(&buf)
207+ encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
208+ result = c_Hash64WithSeeds(encoding, encoding_size, seed0, seed1)
199209 elif PyBytes_Check(data):
200- result = c_CityHash64WithSeeds(<const char*>PyBytes_AS_STRING(data),
201- PyBytes_GET_SIZE(data), seed0, seed1)
210+ result = c_Hash64WithSeeds(
211+ <const char*>PyBytes_AS_STRING(data),
212+ PyBytes_GET_SIZE(data), seed0, seed1)
202213 elif PyObject_CheckBuffer(data):
203214 PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
204- result = c_CityHash64WithSeeds (<const char*>buf.buf, buf.len, seed0, seed1)
215+ result = c_Hash64WithSeeds (<const char*>buf.buf, buf.len, seed0, seed1)
205216 PyBuffer_Release(&buf)
206217 else:
207218 raise _type_error("data", ["basestring", "buffer"], data)
@@ -221,19 +232,20 @@ Raises:
221232 TypeError : if input data is not a string or a buffer
222233 """
223234 cdef Py_buffer buf
224- cdef bytes obj
225235 cdef pair[uint64, uint64] result
236+ cdef const char* encoding
237+ cdef Py_ssize_t encoding_size = 0
238+
226239 if PyUnicode_Check(data):
227- obj = PyUnicode_AsUTF8String(data)
228- PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
229- result = c_CityHash128(<const char*>buf.buf, buf.len)
230- PyBuffer_Release(&buf)
240+ encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
241+ result = c_Hash128(encoding, encoding_size)
231242 elif PyBytes_Check(data):
232- result = c_CityHash128(<const char*>PyBytes_AS_STRING(data),
233- PyBytes_GET_SIZE(data))
243+ result = c_Hash128(
244+ <const char*>PyBytes_AS_STRING(data),
245+ PyBytes_GET_SIZE(data))
234246 elif PyObject_CheckBuffer(data):
235247 PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
236- result = c_CityHash128 (<const char*>buf.buf, buf.len)
248+ result = c_Hash128 (<const char*>buf.buf, buf.len)
237249 PyBuffer_Release(&buf)
238250 else:
239251 raise _type_error("data", ["basestring", "buffer"], data)
@@ -255,24 +267,24 @@ Raises:
255267 OverflowError : if seed cannot be converted to unsigned int64
256268 """
257269 cdef Py_buffer buf
258- cdef bytes obj
259270 cdef pair[uint64, uint64] result
260271 cdef pair[uint64, uint64] tseed
272+ cdef const char* encoding
273+ cdef Py_ssize_t encoding_size = 0
261274
262275 tseed.first = seed >> 64ULL
263276 tseed.second = seed & ((1ULL << 64ULL) - 1ULL)
264277
265278 if PyUnicode_Check(data):
266- obj = PyUnicode_AsUTF8String(data)
267- PyObject_GetBuffer(obj, &buf, PyBUF_SIMPLE)
268- result = c_CityHash128WithSeed(<const char*>buf.buf, buf.len, tseed)
269- PyBuffer_Release(&buf)
279+ encoding = PyUnicode_AsUTF8AndSize(data, &encoding_size)
280+ result = c_Hash128WithSeed(encoding, encoding_size, tseed)
270281 elif PyBytes_Check(data):
271- result = c_CityHash128WithSeed(<const char*>PyBytes_AS_STRING(data),
272- PyBytes_GET_SIZE(data), tseed)
282+ result = c_Hash128WithSeed(
283+ <const char*>PyBytes_AS_STRING(data),
284+ PyBytes_GET_SIZE(data), tseed)
273285 elif PyObject_CheckBuffer(data):
274286 PyObject_GetBuffer(data, &buf, PyBUF_SIMPLE)
275- result = c_CityHash128WithSeed (<const char*>buf.buf, buf.len, tseed)
287+ result = c_Hash128WithSeed (<const char*>buf.buf, buf.len, tseed)
276288 PyBuffer_Release(&buf)
277289 else:
278290 raise _type_error("data", ["basestring", "buffer"], data)
0 commit comments