@@ -4,12 +4,22 @@ Multiprocessing version of memory profiling of Python programs.
44"""
55
66import os
7+ import re
78import time
89import glob
910import argparse
1011import subprocess
1112import memory_profiler as mp
1213
14+ from collections import defaultdict
15+
16+ try :
17+ import numpy as np
18+ import matplotlib .pyplot as plt
19+ except ImportError :
20+ plt = None
21+ np = None
22+
1323
1424# Command Descriptions and Constants
1525DESCRIPTION = "Multiprocessing memory profiling over time."
@@ -82,14 +92,130 @@ def run_action(args):
8292 # Sleep for the given interval
8393 time .sleep (args .interval )
8494
95+ # Return the results of the run action
8596 return "memory profile written to {}" .format (args .output )
8697
8798
8899def plot_action (args ):
89100 """
90101 Use matplotlib to draw the memory usage of a mprofile .dat file.
91102 """
92- raise NotImplementedError ("Not implemented yet." )
103+ if plt is None :
104+ raise ImportError (
105+ "matplotlib is needed for plotting."
106+ )
107+
108+ def read_mprofile_file (path ):
109+ """
110+ Reads the specialized version of the mprofile for multiprocessing
111+ """
112+ # Regular expression line parsers for parsing data
113+ cmdre = re .compile (r"^CMDLINE\s+(.+)$" )
114+ memre = re .compile (r"^MEM\s+([\d\.e]+)\s+([\d\.e]+)$" )
115+ cldre = re .compile (r"^CHLD(\d+)\s+([\d\.e]+)\s+([\d\.e]+)$" )
116+
117+ # Data structure returned is a series of names (mem, ts) tuples.
118+ series = defaultdict (list )
119+ command = None
120+
121+ with open (path , 'r' ) as f :
122+ for line in f :
123+
124+ # Match children memory usage lines
125+ match = cldre .match (line )
126+ if match :
127+ idx , mem , ts = match .groups ()
128+ series ["child " + idx ].append ((float (mem ), float (ts )))
129+ continue
130+
131+ # Match main process memory usage lines
132+ match = memre .match (line )
133+ if match :
134+ series ['main' ].append (tuple (map (float , match .groups ())))
135+ continue
136+
137+ # Match command line(s)
138+ # NOTE: mprofile files are openeded for appending, could be multiple
139+ match = cmdre .match (line )
140+ if match :
141+ command = match .groups ()[0 ]
142+
143+ return command , series
144+
145+
146+ def plot_mprofile_file (path , title = None ):
147+ """
148+ Plots an mprofile file that contains specialized child process data.
149+ """
150+ # Parse the mprofile file to get the data
151+ command , series = read_mprofile_file (path )
152+ title = title or command
153+
154+ # Create and configure the figure
155+ fig = plt .figure (figsize = (14 , 6 ), dpi = 90 )
156+ axe = fig .add_axes ([0.1 , 0.1 , 0.6 , 0.75 ])
157+ axe .set_xlabel ("time (in seconds)" )
158+ axe .set_ylabel ("memory used (in MiB)" )
159+ axe .set_title (title )
160+
161+ # Find the start timestamp for the process and track the maximal memory point
162+ # This currently assumes that the series were written in order
163+ start = series ['main' ][0 ][1 ]
164+ mpoint = (0 , 0 )
165+
166+ # Plot all of the series, the main process and the child.
167+ for proc , data in series .items ():
168+ # Create the numpy arrays from the series data
169+ ts = np .asarray ([item [1 ] for item in data ]) - start
170+ mem = np .asarray ([item [0 ] for item in data ])
171+
172+ # Plot the line to the figure
173+ plt .plot (ts , mem , "+-" , label = proc )
174+
175+ # Detect the maximal memory point
176+ max_mem = mem .max ()
177+ if max_mem > mpoint [1 ]:
178+ mpoint = (mem .argmax (), max_mem )
179+
180+ # Add the marker lines for the maximal memory usage
181+ plt .hlines (mpoint [1 ], plt .xlim ()[0 ]+ 0.001 , plt .xlim ()[1 ] - 0.001 , 'r' , '--' )
182+ plt .vlines (ts [mpoint [0 ]], plt .ylim ()[0 ]+ 0.001 , plt .ylim ()[1 ] - 0.001 , 'r' , '--' )
183+
184+ # Add the legend
185+ legend = axe .legend (loc = 'center left' , bbox_to_anchor = (1 , 0.5 ))
186+ legend .get_frame ().set_alpha (0.5 )
187+ axe .grid ()
188+
189+ # Get the latest profile if no profile files were passed in.
190+ if not args .profile :
191+
192+ # Glob profiles of our format and sort them.
193+ profiles = glob .glob ("mprofile_??????????????.dat" )
194+ profiles .sort ()
195+
196+ if not profiles :
197+ raise ValueError ((
198+ "No input file found.\n This program looks for mprofile_*.dat "
199+ "files generated by the `mpmprof run` command."
200+ ))
201+
202+ # Assign the latest profile to visualize
203+ args .profile = profiles [0 :1 ]
204+
205+ # Filter out any files that do not exist
206+ args .profile = list (filter (os .path .exists , args .profile ))
207+ if not args .profile :
208+ raise ValueError ("No input files found!" )
209+
210+ # For each passed in file, create a figure from the mprofile.
211+ for path in args .profile :
212+ axe = plot_mprofile_file (path , args .title )
213+ if args .output :
214+ plt .savefig (args .output )
215+ else :
216+ plt .show ()
217+
218+ return "{} memory profiles plotted." .format (len (args .profile ))
93219
94220
95221if __name__ == '__main__' :
0 commit comments