Skip to content

Commit 238f566

Browse files
committed
add multilens
1 parent b3fcb77 commit 238f566

9 files changed

Lines changed: 240 additions & 158 deletions

File tree

.DS_Store

0 Bytes
Binary file not shown.

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,6 @@ semb/methods/struc2vec/random_walks.txt
153153

154154
# drne intermediate files
155155
semb/methods/drne/eni/
156+
157+
# riwalk intermediate files
158+
semb/methods/riwalk/walks/

semb/.DS_Store

0 Bytes
Binary file not shown.

semb/methods/multilens/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def get_combined_feature_sequence(graph, rep_method, current_node, input_dense_m
6767

6868
def get_features(graph, rep_method, input_dense_matrix = None, nodes_to_embed = None):
6969

70+
num_buckets = rep_method.num_buckets
7071
feature_wid_sum, feature_wid_ind = get_feature_n_buckets(input_dense_matrix, num_buckets, rep_method.bucket_max_value)
7172
feature_matrix = np.zeros([graph.num_nodes, feature_wid_sum * len(graph.unique_cat)])
7273

semb/methods/multilens/method.py

Lines changed: 83 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,94 @@ def get_id(self):
1212
return "multilens"
1313

1414
def train(self):
15+
directed = True
16+
base_features = ['row', 'col', 'row_col']
1517
dim = self.params['dim']
1618
L = self.params['L']
1719
num_buckets = self.params['base']
1820
op = self.params['operators']
21+
22+
dict_id_idx = dict()
23+
dict_idx_id = dict()
24+
25+
raw_ = list()
26+
cur_count_ = 0
27+
for cur_edge in self.graph.edges():
28+
src = cur_edge[0]
29+
dst = cur_edge[1]
30+
if src not in dict_id_idx:
31+
dict_id_idx[src] = cur_count_
32+
dict_idx_id[cur_count_] = src
33+
cur_count_ += 1
34+
if dst not in dict_id_idx:
35+
dict_id_idx[dst] = cur_count_
36+
dict_idx_id[cur_count_] = dst
37+
cur_count_ += 1
38+
raw_ += [[dict_id_idx[src], dict_id_idx[dst]]]
39+
40+
raw = np.array(raw_)
41+
COL = raw.shape[1]
42+
43+
if COL < 2:
44+
sys.exit('[Input format error.]')
45+
elif COL == 2:
46+
print('[unweighted graph detected.]')
47+
rows = raw[:,0]
48+
cols = raw[:,1]
49+
weis = np.ones(len(rows))
50+
51+
elif COL == 3:
52+
print('[weighted graph detected.]')
53+
rows = raw[:,0]
54+
cols = raw[:,1]
55+
weis = raw[:,2]
56+
57+
check_eq = True
58+
max_id = int(max(max(rows), max(cols)))
59+
num_nodes = max_id + 1
60+
61+
nodes_to_embed = range(int(max_id)+1)
62+
63+
if max(rows) != max(cols):
64+
rows = np.append(rows,max(max(rows), max(cols)))
65+
cols = np.append(cols,max(max(rows), max(cols)))
66+
weis = np.append(weis, 0)
67+
check_eq = False
68+
69+
adj_matrix = sps.lil_matrix( sps.csc_matrix((weis, (rows, cols))))
70+
71+
CAT_DICT = defaultdict(set)
72+
ID_CAT_DICT = dict()
73+
for i in range(num_nodes):
74+
CAT_DICT[1].add(i)
75+
ID_CAT_DICT[i] = 1
76+
unique_cat = [1]
77+
1978
######################################################
2079
# Multi-Lens starts.
2180
######################################################
81+
2282
g_sums = []
23-
rep_method = RepMethod(method="hetero", bucket_max_value=30,
24-
num_buckets=num_buckets, operators=op, use_total=len(op))
83+
84+
neighbor_list = construct_neighbor_list(adj_matrix, nodes_to_embed)
85+
neighbor_list_r = construct_neighbor_list(adj_matrix.T, nodes_to_embed)
86+
87+
graph = Graph(adj_matrix = adj_matrix, max_id = max_id, num_nodes = num_nodes, base_features = base_features,
88+
neighbor_list = neighbor_list, directed = directed, cat_dict = CAT_DICT, id_cat_dict = ID_CAT_DICT, unique_cat = unique_cat, check_eq = check_eq)
89+
90+
rep_method = RepMethod(method = "hetero", bucket_max_value = 30, num_buckets = num_buckets, operators = op, use_total = len(op))
2591

2692
########################################
2793
# Step 1: get base features
2894
########################################
29-
init_feature_matrix = get_init_features(
30-
self.graph, base_features, nodes_to_embed)
31-
init_feature_matrix_seq = get_seq_features(
32-
self.graph, rep_method, input_dense_matrix=init_feature_matrix, nodes_to_embed=nodes_to_embed)
95+
init_feature_matrix = get_init_features(graph, base_features, nodes_to_embed)
96+
init_feature_matrix_seq = get_seq_features(graph, rep_method, input_dense_matrix = init_feature_matrix, nodes_to_embed = nodes_to_embed)
3397

34-
Kis = get_Kis(init_feature_matrix_seq, dim, L)
35-
print(Kis)
3698

37-
feature_matrix_emb, g_sum = feature_layer_evaluation_embedding(
38-
self.graph, rep_method, feature_matrix=init_feature_matrix_seq, k=Kis[0])
99+
Kis = get_Kis(init_feature_matrix_seq, dim, L)
100+
# print Kis
101+
102+
feature_matrix_emb, g_sum = feature_layer_evaluation_embedding(graph, rep_method, feature_matrix = init_feature_matrix_seq, k = Kis[0])
39103

40104
g_sums.append(g_sum)
41105

@@ -49,20 +113,20 @@ def train(self):
49113
feature_matrix = init_feature_matrix
50114

51115
for i in range(L):
52-
print('[Current layer] ' + str(i))
53-
print('[feature_matrix shape] ' + str(feature_matrix.shape))
116+
print('[Current layer]', str(i))
117+
print('[feature_matrix shape]', str(feature_matrix.shape))
54118

55-
feature_matrix_new = search_feature_layer(
56-
self.graph, rep_method, base_feature_matrix=feature_matrix)
57-
feature_matrix_new_seq = get_seq_features(
58-
self.graph, rep_method, input_dense_matrix=feature_matrix_new, nodes_to_embed=nodes_to_embed)
59-
feature_matrix_new_emb, g_new_sum = feature_layer_evaluation_embedding(
60-
self.graph, rep_method, feature_matrix=feature_matrix_new_seq, k=Kis[i+1])
119+
feature_matrix_new = search_feature_layer(graph, rep_method, base_feature_matrix = feature_matrix)
120+
feature_matrix_new_seq = get_seq_features(graph, rep_method, input_dense_matrix = feature_matrix_new, nodes_to_embed = nodes_to_embed)
121+
feature_matrix_new_emb, g_new_sum = feature_layer_evaluation_embedding(graph, rep_method, feature_matrix = feature_matrix_new_seq, k = Kis[i+1])
61122

62123
feature_matrix = feature_matrix_new
63124
rep_new = feature_matrix_new_emb
64125
rep = np.concatenate((rep, rep_new), axis=1)
65126

66127
g_sums.append(g_new_sum)
67128

68-
self.embeddings = g_sum
129+
N, K = rep.shape
130+
self.embeddings = dict()
131+
for i in range(N):
132+
self.embeddings[dict_idx_id[i]] = rep[i, :].tolist()

semb/methods/riwalk/RiWalkGraph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ def simple_log2(x):
110110

111111
def save_random_walks(walks, part, i):
112112
indexes = np.random.permutation(len(walks)).tolist()
113-
with open('lib/RiWalk/walks/__random_walks_{}_{}.txt'.format(part, i), 'w') as f:
113+
with open(WALK_FILES_DIR + '__random_walks_{}_{}.txt'.format(part, i), 'w') as f:
114114
for i in indexes:
115115
walk = walks[i]
116116
f.write(u"{}\n".format(u" ".join(str(v) for v in walk)))

semb/methods/riwalk/method.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __iter__(self):
4545
class Method(BaseMethod):
4646

4747
__PARAMS__ = dict(dim=128, walk_length=10, num_walks=80, window_size=10,
48-
until_k=4, iter=5, workers=5, flag='sp')
48+
until_k=4, iter=5, workers=5, flag='wl')
4949

5050
def get_id(self):
5151
return "riwalk"
@@ -55,7 +55,10 @@ def train(self):
5555
os.system('rm -rf %s' %
5656
os.path.join(WALK_FILES_DIR, "__random_walks_*.txt"))
5757
nx_g, mapping = self.preprocess_graph(self.graph)
58-
self.embeddings = self.learn(nx_g, mapping)
58+
reps = self.learn(nx_g, mapping)
59+
self.embeddings = dict()
60+
for cur_node in self.graph.nodes():
61+
self.embeddings[cur_node] = reps.get_vector(str(cur_node)).tolist()
5962

6063
def learn_embeddings(self):
6164
"""

semb/methods/riwalk/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from os import path
22

3-
WALK_FILES_DIR = path.join(path.dirname(__file__), 'walks')
3+
WALK_FILES_DIR = path.join(path.dirname(__file__), 'walks/')

0 commit comments

Comments
 (0)