|
| 1 | +# Copyright (c) Microsoft Corporation. |
| 2 | +# Licensed under the MIT License - see LICENSE file in this repo. |
| 3 | + |
| 4 | +import re |
| 5 | +import networkx as nx |
| 6 | +import sys |
| 7 | +from networkx.drawing.nx_pydot import write_dot |
| 8 | +from os.path import splitext |
| 9 | + |
| 10 | +def process_stack_file(input_path): |
| 11 | + # Compile regex patterns for symbolized frames and slot counts |
| 12 | + rgx_already_symbolized_frame = re.compile( |
| 13 | + r"((?P<framenum>\d+)\s+)*(?P<module>\w+)(\.(dll|exe))*!(?P<symbolizedfunc>.+?)\s*\+\s*(0[xX])*(?P<offset>[0-9a-fA-F]+)\s*" |
| 14 | + ) |
| 15 | + rgx_slot_count = re.compile( |
| 16 | + r"Slot_(?P<slotidx>\d+)\s+\[count\:(?P<slotcount>\d+)\]\:" |
| 17 | + ) |
| 18 | + |
| 19 | + captured_input = [] |
| 20 | + current_stack = [] |
| 21 | + |
| 22 | + # Read the input file and split into stacks based on slot count lines |
| 23 | + with open(input_path, "r", encoding="utf-8") as sr: |
| 24 | + for line in sr: |
| 25 | + line = line.replace("::", "--").strip() |
| 26 | + mtch = rgx_slot_count.match(line) |
| 27 | + if mtch: |
| 28 | + captured_input.extend(current_stack) |
| 29 | + current_stack = [] |
| 30 | + |
| 31 | + current_stack.append(line) |
| 32 | + |
| 33 | + # Handle last stack |
| 34 | + captured_input.extend(current_stack) |
| 35 | + |
| 36 | + # Initialize directed graph |
| 37 | + G = nx.DiGraph() |
| 38 | + prev_node = None |
| 39 | + slotidx = -1 |
| 40 | + slotcount = 0 |
| 41 | + |
| 42 | + # Helper to add or update a node with slot count |
| 43 | + def get_or_create_node(node_id, slotcount, slotidx): |
| 44 | + if node_id not in G: |
| 45 | + G.add_node(node_id, SlotCount=slotcount) |
| 46 | + else: |
| 47 | + # Node already exists, update weight |
| 48 | + G.nodes[node_id]["SlotCount"] += slotcount |
| 49 | + |
| 50 | + return G.nodes[node_id] |
| 51 | + |
| 52 | + idx = 0 |
| 53 | + # Iterate through captured input lines to build the graph |
| 54 | + while idx < len(captured_input): |
| 55 | + line = captured_input[idx].strip() |
| 56 | + idx += 1 |
| 57 | + if not line: |
| 58 | + prev_node = None |
| 59 | + continue |
| 60 | + |
| 61 | + mtch = rgx_slot_count.match(line) |
| 62 | + if mtch: |
| 63 | + slotidx = int(mtch.group("slotidx")) |
| 64 | + slotcount = int(mtch.group("slotcount")) |
| 65 | + |
| 66 | + mtch = rgx_already_symbolized_frame.match(line) |
| 67 | + if mtch: |
| 68 | + node_id = line |
| 69 | + node = get_or_create_node(node_id, slotcount, slotidx) |
| 70 | + |
| 71 | + if prev_node: |
| 72 | + edge_key = (node_id, prev_node) |
| 73 | + if not G.has_edge(node_id, prev_node): |
| 74 | + G.add_edge( |
| 75 | + node_id, |
| 76 | + prev_node, |
| 77 | + SlotCount=slotcount, |
| 78 | + ) |
| 79 | + else: |
| 80 | + G.edges[edge_key]["SlotCount"] += slotcount |
| 81 | + prev_node = node_id |
| 82 | + |
| 83 | + # Coalesce nodes: merge nodes with a single in-edge and their source has a single out-edge |
| 84 | + while True: |
| 85 | + nodes_to_remove = [] |
| 86 | + for n in list(G.nodes): |
| 87 | + in_edges = list(G.in_edges(n)) |
| 88 | + if len(in_edges) == 1: |
| 89 | + from_node = in_edges[0][0] |
| 90 | + out_edges = list(G.out_edges(from_node)) |
| 91 | + if len(out_edges) == 1: |
| 92 | + # Merge from_node into n |
| 93 | + G.nodes[n]["label"] = ( |
| 94 | + G.nodes[n].get("label", n) |
| 95 | + + "\n" |
| 96 | + + G.nodes[from_node].get("label", from_node) |
| 97 | + ) |
| 98 | + # Redirect in-edges of from_node to n |
| 99 | + for e in list(G.in_edges(from_node)): |
| 100 | + G.add_edge(e[0], n, **G.edges[e]) |
| 101 | + G.remove_edge(*e) |
| 102 | + G.remove_node(from_node) |
| 103 | + nodes_to_remove.append(from_node) |
| 104 | + if not nodes_to_remove: |
| 105 | + break |
| 106 | + |
| 107 | + # Calculate total slot counts for edges |
| 108 | + # total_slot_count_from_nodes = sum(n["SlotCount"] for n in G.nodes().values()) |
| 109 | + total_slot_count_from_edges = sum(e["SlotCount"] for e in G.edges().values()) |
| 110 | + |
| 111 | + # Add attributes to nodes and edges for DOT output |
| 112 | + for n in G.nodes: |
| 113 | + # include the slot count ("samples") in the node labels |
| 114 | + G.nodes[n]["label"] = ( |
| 115 | + f"{G.nodes[n].get('label', n)}\t({G.nodes[n].get('SlotCount', n)} samples)".replace( |
| 116 | + "--", "::" |
| 117 | + ) |
| 118 | + ) |
| 119 | + |
| 120 | + for e in G.edges: |
| 121 | + G.edges[e]["label"] = ( |
| 122 | + "<<I>" + f"{G.edges[e].get('SlotCount', 0)} switches" + "</I>>" |
| 123 | + ) |
| 124 | + pen_width = max( |
| 125 | + G.edges[e].get("SlotCount", 0) / total_slot_count_from_edges * 256.0, 1 |
| 126 | + ) |
| 127 | + G.edges[e]["penwidth"] = f"{pen_width}" |
| 128 | + |
| 129 | + # Set DOT graph, node, and edge attributes |
| 130 | + G.graph["graph"] = {"rankdir": "BT" }# , "size": "11.0,17.0", "ratio": 0.647} |
| 131 | + G.graph["node"] = { |
| 132 | + "shape": "rect", |
| 133 | + "style": "rounded", |
| 134 | + "fontname": "Segoe UI", |
| 135 | + "fontsize": 40.0, |
| 136 | + } |
| 137 | + G.graph["edge"] = { |
| 138 | + "color": "grey", |
| 139 | + "fillcolor": "grey", |
| 140 | + "fontname": "Segoe UI", |
| 141 | + "fontsize": 32.0, |
| 142 | + } |
| 143 | + |
| 144 | + return G |
| 145 | + |
| 146 | +G = process_stack_file(sys.argv[1]) |
| 147 | +out_filename_without_ext = splitext(sys.argv[1])[0] |
| 148 | +write_dot(G, out_filename_without_ext + ".dot") |
| 149 | +nx.nx_agraph.to_agraph(G).draw(path=out_filename_without_ext + ".svg", prog='dot',format='svg') |
| 150 | + |
| 151 | +# print all simple paths in the graph |
| 152 | +paths = [] |
| 153 | +for source in G.nodes: |
| 154 | + for target in G.nodes: |
| 155 | + if source != target: |
| 156 | + for path in nx.all_simple_paths(G, source=source, target=target): |
| 157 | + # get the total weight of the path and print it |
| 158 | + # print(" -> ".join(path) + f" SlotCount: {nx.path_weight(G, path, weight='SlotCount')}") |
| 159 | + # append the path, and it's length to a list |
| 160 | + paths.append((path, nx.path_weight(G, path, weight='SlotCount'))) |
| 161 | + |
| 162 | +# sort the paths by length, descending, and print the top 2 |
| 163 | +paths.sort(key=lambda x: x[1], reverse=True) |
| 164 | +print("\nTop 2 longest paths:") |
| 165 | +for path, length in paths[:2]: |
| 166 | + print(" -> ".join(path) + f" Length: {length}") |
0 commit comments