Skip to content

Commit 7ba3ac0

Browse files
authored
update readme (#16)
* get rid of separate include/ dir * update readme * version bump
1 parent d5c6afc commit 7ba3ac0

13 files changed

Lines changed: 471 additions & 460 deletions

MANIFEST.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
include LICENSE
22
include README.rst
33
include cpuinfo.py
4-
include include/*.h
4+
include src/*.h
55
include src/*.cc
66
include src/*.cpp
77
include src/*.pyx

README.rst

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,12 @@ MetroHash algorithm. For stateless hashing, it exports ``metrohash64`` and
6060
Incremental hashing
6161
~~~~~~~~~~~~~~~~~~~
6262

63-
For incremental hashing, use ``MetroHash64`` and ``MetroHash128`` classes.
64-
Incremental hashing is associative and guarantees that any combination of input
65-
slices will result in the same final hash value. This is useful for processing
66-
large inputs and stream data. Example with two slices:
63+
Unlike its cousins CityHash and FarmHash, MetroHash allows incremental
64+
(stateful) hashing. For incremental hashing, use ``MetroHash64`` and
65+
``MetroHash128`` classes. Incremental hashing is associative and guarantees
66+
that any combination of input slices will result in the same final hash value.
67+
This is useful for processing large inputs and stream data. Example with two
68+
slices:
6769

6870
.. code-block:: python
6971
@@ -85,10 +87,14 @@ Note that the resulting hash value above is the same as in:
8587
Buffer protocol support
8688
~~~~~~~~~~~~~~~~~~~~~~~
8789

88-
The methods in this module support Python `Buffer Protocol
89-
<https://docs.python.org/3/c-api/buffer.html>`__, which allows them to be used
90-
on any object that exports a buffer interface. Here is an example showing
91-
hashing of a 4D NumPy array:
90+
The Python `Buffer Protocol <https://docs.python.org/3/c-api/buffer.html>`__
91+
allows Python objects to expose their data as raw byte arrays to other objects,
92+
for fast access without copying to a separate location in memory. Among
93+
others, NumPy is a major framework that supports this protocol.
94+
95+
All hashing functions in this packege will read byte arrays from objects that
96+
expose them via the buffer protocol. Here is an example showing hashing of a 4D
97+
NumPy array:
9298

9399
.. code-block:: python
94100
@@ -97,8 +103,8 @@ hashing of a 4D NumPy array:
97103
>>> metrohash.hash64_int(arr)
98104
12125832280816116063
99105
100-
Note that arrays need to be contiguous for this to work. To convert a
101-
non-contiguous array, use ``np.ascontiguousarray()`` method.
106+
The arrays need to be contiguous for this to work. To convert a non-contiguous
107+
array, use NumPy's ``ascontiguousarray()`` function.
102108

103109
Development
104110
-----------

cpp.mk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ CXX := g++
22
CXXFLAGS := -std=c++11 -O3 -msse4.2
33
LDFLAGS :=
44
SRCEXT := cc
5-
INC := -I include
5+
INC := -I src
66
LIB := -L lib
77

88
INPUT := ./data/sample_100k.txt

pip-freeze.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ ipdb==0.13.9
88
ipython==7.30.1
99
jedi==0.18.1
1010
matplotlib-inline==0.1.3
11-
-e git+https://github.com/escherba/python-metrohash@03ec8e4b0b21bf4a9726b3625d5ebc6e791b2a82#egg=metrohash
11+
-e git+https://github.com/escherba/python-metrohash@cffe9bc1c0b48c2269c9e0abe1fb564ccf86d41f#egg=metrohash
1212
numpy==1.21.5
1313
packaging==21.3
1414
parso==0.8.3
@@ -18,7 +18,7 @@ pluggy==1.0.0
1818
prompt-toolkit==3.0.24
1919
ptyprocess==0.7.0
2020
py==1.11.0
21-
Pygments==2.11.0
21+
Pygments==2.11.1
2222
pyparsing==3.0.6
2323
pytest==6.2.5
2424
toml==0.10.2

setup.py

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ def is_pure(self):
3131

3232
CXXFLAGS = []
3333

34-
print(f"building for platform: {os.name}")
34+
print("building for platform: %s" % os.name)
35+
print("available CPU flags: %s" % CPU_FLAGS)
36+
3537
if os.name == "nt":
3638
CXXFLAGS.extend(["/O2"])
3739
else:
@@ -42,58 +44,58 @@ def is_pure(self):
4244
])
4345

4446

47+
if 'ssse3' in CPU_FLAGS:
48+
print("Compiling with SSSE3 enabled")
49+
CXXFLAGS.append('-mssse3')
50+
else:
51+
print("compiling without SSE3 support")
52+
53+
4554
if 'sse4_2' in CPU_FLAGS:
4655
print("Compiling with SSE4.2 enabled")
4756
CXXFLAGS.append('-msse4.2')
4857
else:
4958
print("compiling without SSE4.2 support")
5059

5160

52-
INCLUDE_DIRS = ['include']
61+
INCLUDE_DIRS = ['src']
5362
CXXHEADERS = [
54-
"include/metro.h",
55-
"include/metrohash.h",
56-
"include/metrohash128.h",
57-
"include/metrohash128crc.h",
58-
"include/metrohash64.h",
59-
"include/platform.h",
63+
"src/metro.h",
64+
"src/metrohash.h",
65+
"src/metrohash128.h",
66+
"src/metrohash128crc.h",
67+
"src/metrohash64.h",
68+
"src/platform.h",
6069
]
6170
CXXSOURCES = [
6271
"src/metrohash64.cc",
6372
"src/metrohash128.cc",
6473
]
6574

66-
CMDCLASS = {}
6775
EXT_MODULES = []
6876

6977
if USE_CYTHON:
7078
print("building extension using Cython")
71-
CMDCLASS['build_ext'] = build_ext
72-
EXT_MODULES.append(
73-
Extension(
74-
"metrohash",
75-
CXXSOURCES + ["src/metrohash.pyx"],
76-
depends=CXXHEADERS,
77-
language="c++",
78-
extra_compile_args=CXXFLAGS,
79-
include_dirs=INCLUDE_DIRS,
80-
)
81-
)
79+
CMDCLASS = {'build_ext': build_ext}
80+
SRC_EXT = ".pyx"
8281
else:
8382
print("building extension w/o Cython")
84-
EXT_MODULES.append(
85-
Extension(
86-
"metrohash",
87-
CXXSOURCES + ["src/metrohash.cpp"],
88-
depends=CXXHEADERS,
89-
language="c++",
90-
extra_compile_args=CXXFLAGS,
91-
include_dirs=INCLUDE_DIRS,
92-
)
93-
)
94-
95-
96-
VERSION = '0.1.1.post2'
83+
CMDCLASS = {}
84+
SRC_EXT = ".cpp"
85+
86+
87+
EXT_MODULES = [
88+
Extension(
89+
"metrohash",
90+
CXXSOURCES + ["src/metrohash" + SRC_EXT],
91+
depends=CXXHEADERS,
92+
language="c++",
93+
extra_compile_args=CXXFLAGS,
94+
include_dirs=INCLUDE_DIRS,
95+
),
96+
]
97+
98+
VERSION = '0.1.1.post3'
9799
URL = "https://github.com/escherba/python-metrohash"
98100

99101

File renamed without changes.

0 commit comments

Comments
 (0)