Comments on azakai's blog: massdiff - Diff for Massif Snapshots

Sorry I had to make several posts because the full...

2018-12-17T13:48:00.909-08:00

Sorry I had to make several posts because the full script is too large.

# Diff two snapshots print 'diffing...' d...

2018-12-17T13:47:07.075-08:00

# Diff two snapshots

print 'diffing...'
def commify(x):
sign = '+' if x >= 0 else '-'
ret = list(str(abs(x))[::-1])
for i in range(len(ret)):
if i % 3 == 2 and i != len(ret)-1:
ret[i] += ','
return sign+(''.join(ret))[::-1]

def diff_dicts(d1, d2):
keys = list(set(d1.keys() + d2.keys()))
data = [[key, 0] for key in keys]

for datum in data:
key = datum[0]
if key not in d2:
datum[1] = -d1[key].mem
elif key not in d1:
datum[1] = d2[key].mem
else:
datum[1] = d2[key].mem - d1[key].mem

data.sort(lambda x, y: y[1]-x[1])

for datum in data:
key = datum[0]
diff = datum[1]
if key not in d2:
print "-", d1[key]
elif key not in d1:
print "+", d2[key]
else:
if abs(diff) > diff_threshold:
print "-/+", d1[key]
print '[diff: %s]' % commify(diff)
diff_dicts(d1[key].children, d2[key].children)

print '-', snapshots[0].file, snapshots[0].root
print '[diff: %s]' % commify(snapshots[1].root.mem - snapshots[0].root.mem)
print '+', snapshots[1].file, snapshots[1].root
print '-------------'
diff_dicts(snapshots[0].roots, snapshots[1].roots)

import os, sys, re # define custom parameters #...

2018-12-17T13:46:46.455-08:00

import os, sys, re

# define custom parameters

# depth of the tree, 0 means only global memory
max_depth = 1
print 'maximum depth =', max_depth
# tolerance in octets for comparison, 1000=1ko
diff_threshold = 1000000
print 'tolerance =', diff_threshold, 'bytes'

# Read files

class Snapshot: pass
class SnapshotLine:
def __str__(self):
return ('%s%s - %d - %10s' % (' '*self.indent, self.addr, self.mem, self.text))[:130]

snapshots = []

def read_file(file):
print 'reading file', file, '...'
snapshot = Snapshot()
snapshot.file = file
snapshot.lines = []
snapshots.append(snapshot)

started = False
title_read = False
above_max_depth_ws='^' + ' '*(max_depth+1)
for line in open(file, 'r').readlines():
#print line
line = line.replace('\n', '')
if 'snapshot=0' in line:
started = True
continue
if not started: continue
if '#----' in line: continue

# Snapshot title lines
if not title_read:
found = False
for i in ['time', 'mem_heap_B', 'mem_heap_extra_B', 'mem_stacks_B']:
expr = '^' + i + '=(?P[\d]+)'
m = re.match(expr, line)
if m:
setattr(snapshot, i, int(m.group('value')))
found = True
break
if found: continue;
title_read = True

# Snapshot detail line
m = re.match(above_max_depth_ws, line)
if m: continue;

m = re.match('(?P[ ]*)n(?P[\d]+): (?P[\d]+) (?P0x[0-9A-F]*): (?P.*)', line)
if not m:
m = re.match('(?P[ ]*)n(?P[\d]+): (?P[\d]+) (?P.*)', line)
if m:
snapshot_line = SnapshotLine()
snapshot_line.indent = len(m.group('indent'))
snapshot_line.n = int(m.group('n'))
snapshot_line.mem = int(m.group('mem'))
try:
snapshot_line.addr = m.group('addr')
except:
snapshot_line.addr = 0
snapshot_line.text = m.group('text')
#print snapshot_line.__dict__
if snapshot_line.indent <= max_depth:
snapshot.lines.append(snapshot_line)

read_file(sys.argv[1])
read_file(sys.argv[2])

# Generate tree structure

print 'generating tree structure...'
for snapshot in snapshots:
snapshot.roots = {}
for i in range(snapshot.lines.__len__()):
line = snapshot.lines[i]
line.children = {}
indent = line.indent
if indent == 0: snapshot.root = line
elif indent == 1: snapshot.roots[line.addr] = line
else:
# Find parent
j = i-1
while snapshot.lines[j].indent != indent-1: j -= 1
snapshot.lines[j].children[line.text] = line

#print snapshot.file, snapshot.roots

# Dump tree

def dump_lines(lines):
def dump_line(line):
print line
for child in line.children.values():
dump_line(child)
for line in lines:
dump_line(line)

def dump_tree():
print 'Tree:'
for snapshot in snapshots:
print snapshot.file, snapshot.root, snapshot.roots
dump_lines(snapshot.roots.values())

#dump_tree()

''' Parse 2 massif snapshot files ====...

2018-12-17T13:46:30.361-08:00

'''
Parse 2 massif snapshot files
============================

Usage: this_script.py SNAPSHOT_1 SNAPSHOT 2

where

SNAPSHOT_1 and 2 are massif snapshots (not a ms_print dump)
typically generated by the macro in the code VALGRIND_MONITOR_COMMAND("detailed_snapshot")
Here is a sample of such a file:

>>>>>>>>>>>>>>
desc: --time-unit=ms --threshold=0.0
cmd: /my_prog.sh
time_unit: ms
#-----------
snapshot=0
#-----------
time=4078176
mem_heap_B=765203759
mem_heap_extra_B=58898945
mem_stacks_B=0
heap_tree=detailed
n29988: 765203759 (heap allocation functions) malloc/new/new[], --alloc-fns, etc.
n187: 230733840 0x1A303B27: ??? (in /usr/lib/x86_64-linux-gnu/libGLX_nvidia.so.390.77)
n3: 121525532 0x1B7CCA75: ??? (in /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.390.77)
n2: 123451053 0x1B7B7B6B: ??? (in /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.390.77)
n1: 123451050 0x1B89A702: ??? (in /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.390.77)
n1: 123451050 0x1B89C079: ??? (in /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.390.77)
n2: 123451050 0x1B4FFE67: ??? (in /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.390.77)
n2: 116592620 0x1B503A2B: ??? (in /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.390.77)
n2: 109734190 0x1B4D387A: ??? (in /usr/lib/x86_64-linux-gnu/libnvidia-glcore.so.390.77)
>>>>>>>>>>>>>>

As you can see, there is only one snapshot for the whole file, some header
lines, followed by n-lines describing the allocations in a tree-like structure
where indentation is relevant to the call stacks. The first n-line is the
allocated memory for the whole snapshot.
'''

Very useful script. My use case is slightly differ...

2018-12-17T13:45:20.668-08:00

Very useful script.
My use case is slightly different: my program loops 100 times over a memory consuming api and I want the details. For that, I use the macro VALGRIND_MONITOR_COMMAND("detailed_snapshot") at the beginning of the api, and it generates 100 snapshot files.
In the same idea, I want to compare 2 snapshot files. Inspired from your script, here is mine. Thanks for the idea.

Thanks Oliver!

2011-05-02T10:14:21.355-07:00

Thanks Oliver!

Your script is interesting, but doesn't work w...

2011-03-30T05:59:15.026-07:00

Your script is interesting, but doesn't work when valgrind collects more than 100 snapshots. The problem is on line 42 - when you attempt to match the snapshot title it should have zero or more spaces at the beginning, not one or more.

The script doesn't work when you take 100 or m...

2011-03-22T16:08:49.012-07:00

The script doesn't work when you take 100 or more snapshot as it matches one or more spaces at the beginning of the snapshot title - instead it should match zero or more spaces.

i.e.

# Snapshot title
- m = re.match(' +([..regex elided...]')
+ m = re.match(' *([..regex elided...]')
if m:
snapshot = Snapshot()
for i in ['n', 'time', 'total', 'useful_heap', 'extra_heap', 'stacks']: