Skip to content

Commit 717e215

Browse files
authored
hierarchy (#7)
add hierarchical module
1 parent 81c892f commit 717e215

1 file changed

Lines changed: 61 additions & 0 deletions

File tree

dotplot/hierarchical.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import warnings
2+
import pandas as pd
3+
from scipy.cluster.hierarchy import linkage, dendrogram, cut_tree
4+
5+
6+
def fast_cluster(array, method, metric):
7+
import fastcluster
8+
euclidean_methods = ('centroid', 'median', 'ward')
9+
euclidean = metric == 'euclidean' and method in euclidean_methods
10+
if euclidean or method == 'single':
11+
_linkage = fastcluster.linkage_vector(array,
12+
method=method,
13+
metric=metric)
14+
else:
15+
_linkage = fastcluster.linkage(array, method=method,
16+
metric=metric)
17+
return _linkage
18+
19+
20+
def make_linkage(array, method, metric):
21+
try:
22+
return fast_cluster(array, method, metric)
23+
except ImportError:
24+
if len(array) >= 10000:
25+
msg = ("Clustering large matrix with scipy. Installing "
26+
"`fastcluster` may give better performance.")
27+
warnings.warn(msg)
28+
29+
_linkage = linkage(array, method=method, metric=metric)
30+
return _linkage
31+
32+
33+
def cluster_hierarchy(data, method, axis, metric='euclidean', n_clusters=None):
34+
"""
35+
data :pandas.DataFrame
36+
Rectangular data
37+
method :str, 'single', 'centroid', 'median', 'ward'
38+
axis : int, optional
39+
Which axis to use to calculate linkage. 0 is rows, 1 is columns.
40+
metric : "eulidean"
41+
n_cluster: int, optional
42+
return the cut tree.
43+
no_plot: bool, optional
44+
When True, the final rendering is not performed. This is
45+
useful if only the data structures computed for the rendering
46+
are needed or if matplotlib is not available.
47+
"""
48+
data = data.copy()
49+
if axis == 1:
50+
data = data.T
51+
array = data.values
52+
_linkage = make_linkage(array, method, metric)
53+
54+
if n_clusters is not None:
55+
cut_result = cut_tree(linkage, n_clusters=n_clusters)
56+
df_cut = pd.DataFrame(cut_result.flatten())
57+
label = df_cut.iloc[:, 0].sort_values(ascending=True, inplace=False).index.values
58+
return data.index.values[label]
59+
_result = dendrogram(_linkage, no_plot=True)
60+
_reordered_index = data.index.values[_result['leaves']]
61+
return _reordered_index

0 commit comments

Comments
 (0)