1#!/usr/bin/python
2#
3# top-like utility for displaying kvm statistics
4#
5# Copyright 2006-2008 Qumranet Technologies
6# Copyright 2008-2011 Red Hat, Inc.
7#
8# Authors:
9#  Avi Kivity <avi@redhat.com>
10#
11# This work is licensed under the terms of the GNU GPL, version 2.  See
12# the COPYING file in the top-level directory.
13"""The kvm_stat module outputs statistics about running KVM VMs
14
15Three different ways of output formatting are available:
16- as a top-like text ui
17- in a key -> value format
18- in an all keys, all values format
19
20The data is sampled from the KVM's debugfs entries and its perf events.
21"""
22from __future__ import print_function
23
24import curses
25import sys
26import locale
27import os
28import time
29import optparse
30import ctypes
31import fcntl
32import resource
33import struct
34import re
35import subprocess
36from collections import defaultdict, namedtuple
37
38VMX_EXIT_REASONS = {
39    'EXCEPTION_NMI':        0,
40    'EXTERNAL_INTERRUPT':   1,
41    'TRIPLE_FAULT':         2,
42    'PENDING_INTERRUPT':    7,
43    'NMI_WINDOW':           8,
44    'TASK_SWITCH':          9,
45    'CPUID':                10,
46    'HLT':                  12,
47    'INVLPG':               14,
48    'RDPMC':                15,
49    'RDTSC':                16,
50    'VMCALL':               18,
51    'VMCLEAR':              19,
52    'VMLAUNCH':             20,
53    'VMPTRLD':              21,
54    'VMPTRST':              22,
55    'VMREAD':               23,
56    'VMRESUME':             24,
57    'VMWRITE':              25,
58    'VMOFF':                26,
59    'VMON':                 27,
60    'CR_ACCESS':            28,
61    'DR_ACCESS':            29,
62    'IO_INSTRUCTION':       30,
63    'MSR_READ':             31,
64    'MSR_WRITE':            32,
65    'INVALID_STATE':        33,
66    'MWAIT_INSTRUCTION':    36,
67    'MONITOR_INSTRUCTION':  39,
68    'PAUSE_INSTRUCTION':    40,
69    'MCE_DURING_VMENTRY':   41,
70    'TPR_BELOW_THRESHOLD':  43,
71    'APIC_ACCESS':          44,
72    'EPT_VIOLATION':        48,
73    'EPT_MISCONFIG':        49,
74    'WBINVD':               54,
75    'XSETBV':               55,
76    'APIC_WRITE':           56,
77    'INVPCID':              58,
78}
79
80SVM_EXIT_REASONS = {
81    'READ_CR0':       0x000,
82    'READ_CR3':       0x003,
83    'READ_CR4':       0x004,
84    'READ_CR8':       0x008,
85    'WRITE_CR0':      0x010,
86    'WRITE_CR3':      0x013,
87    'WRITE_CR4':      0x014,
88    'WRITE_CR8':      0x018,
89    'READ_DR0':       0x020,
90    'READ_DR1':       0x021,
91    'READ_DR2':       0x022,
92    'READ_DR3':       0x023,
93    'READ_DR4':       0x024,
94    'READ_DR5':       0x025,
95    'READ_DR6':       0x026,
96    'READ_DR7':       0x027,
97    'WRITE_DR0':      0x030,
98    'WRITE_DR1':      0x031,
99    'WRITE_DR2':      0x032,
100    'WRITE_DR3':      0x033,
101    'WRITE_DR4':      0x034,
102    'WRITE_DR5':      0x035,
103    'WRITE_DR6':      0x036,
104    'WRITE_DR7':      0x037,
105    'EXCP_BASE':      0x040,
106    'INTR':           0x060,
107    'NMI':            0x061,
108    'SMI':            0x062,
109    'INIT':           0x063,
110    'VINTR':          0x064,
111    'CR0_SEL_WRITE':  0x065,
112    'IDTR_READ':      0x066,
113    'GDTR_READ':      0x067,
114    'LDTR_READ':      0x068,
115    'TR_READ':        0x069,
116    'IDTR_WRITE':     0x06a,
117    'GDTR_WRITE':     0x06b,
118    'LDTR_WRITE':     0x06c,
119    'TR_WRITE':       0x06d,
120    'RDTSC':          0x06e,
121    'RDPMC':          0x06f,
122    'PUSHF':          0x070,
123    'POPF':           0x071,
124    'CPUID':          0x072,
125    'RSM':            0x073,
126    'IRET':           0x074,
127    'SWINT':          0x075,
128    'INVD':           0x076,
129    'PAUSE':          0x077,
130    'HLT':            0x078,
131    'INVLPG':         0x079,
132    'INVLPGA':        0x07a,
133    'IOIO':           0x07b,
134    'MSR':            0x07c,
135    'TASK_SWITCH':    0x07d,
136    'FERR_FREEZE':    0x07e,
137    'SHUTDOWN':       0x07f,
138    'VMRUN':          0x080,
139    'VMMCALL':        0x081,
140    'VMLOAD':         0x082,
141    'VMSAVE':         0x083,
142    'STGI':           0x084,
143    'CLGI':           0x085,
144    'SKINIT':         0x086,
145    'RDTSCP':         0x087,
146    'ICEBP':          0x088,
147    'WBINVD':         0x089,
148    'MONITOR':        0x08a,
149    'MWAIT':          0x08b,
150    'MWAIT_COND':     0x08c,
151    'XSETBV':         0x08d,
152    'NPF':            0x400,
153}
154
155# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
156AARCH64_EXIT_REASONS = {
157    'UNKNOWN':      0x00,
158    'WFI':          0x01,
159    'CP15_32':      0x03,
160    'CP15_64':      0x04,
161    'CP14_MR':      0x05,
162    'CP14_LS':      0x06,
163    'FP_ASIMD':     0x07,
164    'CP10_ID':      0x08,
165    'CP14_64':      0x0C,
166    'ILL_ISS':      0x0E,
167    'SVC32':        0x11,
168    'HVC32':        0x12,
169    'SMC32':        0x13,
170    'SVC64':        0x15,
171    'HVC64':        0x16,
172    'SMC64':        0x17,
173    'SYS64':        0x18,
174    'IABT':         0x20,
175    'IABT_HYP':     0x21,
176    'PC_ALIGN':     0x22,
177    'DABT':         0x24,
178    'DABT_HYP':     0x25,
179    'SP_ALIGN':     0x26,
180    'FP_EXC32':     0x28,
181    'FP_EXC64':     0x2C,
182    'SERROR':       0x2F,
183    'BREAKPT':      0x30,
184    'BREAKPT_HYP':  0x31,
185    'SOFTSTP':      0x32,
186    'SOFTSTP_HYP':  0x33,
187    'WATCHPT':      0x34,
188    'WATCHPT_HYP':  0x35,
189    'BKPT32':       0x38,
190    'VECTOR32':     0x3A,
191    'BRK64':        0x3C,
192}
193
194# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
195USERSPACE_EXIT_REASONS = {
196    'UNKNOWN':          0,
197    'EXCEPTION':        1,
198    'IO':               2,
199    'HYPERCALL':        3,
200    'DEBUG':            4,
201    'HLT':              5,
202    'MMIO':             6,
203    'IRQ_WINDOW_OPEN':  7,
204    'SHUTDOWN':         8,
205    'FAIL_ENTRY':       9,
206    'INTR':             10,
207    'SET_TPR':          11,
208    'TPR_ACCESS':       12,
209    'S390_SIEIC':       13,
210    'S390_RESET':       14,
211    'DCR':              15,
212    'NMI':              16,
213    'INTERNAL_ERROR':   17,
214    'OSI':              18,
215    'PAPR_HCALL':       19,
216    'S390_UCONTROL':    20,
217    'WATCHDOG':         21,
218    'S390_TSCH':        22,
219    'EPR':              23,
220    'SYSTEM_EVENT':     24,
221}
222
223IOCTL_NUMBERS = {
224    'SET_FILTER':  0x40082406,
225    'ENABLE':      0x00002400,
226    'DISABLE':     0x00002401,
227    'RESET':       0x00002403,
228}
229
230ENCODING = locale.getpreferredencoding(False)
231TRACE_FILTER = re.compile(r'^[^\(]*$')
232
233
234class Arch(object):
235    """Encapsulates global architecture specific data.
236
237    Contains the performance event open syscall and ioctl numbers, as
238    well as the VM exit reasons for the architecture it runs on.
239
240    """
241    @staticmethod
242    def get_arch():
243        machine = os.uname()[4]
244
245        if machine.startswith('ppc'):
246            return ArchPPC()
247        elif machine.startswith('aarch64'):
248            return ArchA64()
249        elif machine.startswith('s390'):
250            return ArchS390()
251        else:
252            # X86_64
253            for line in open('/proc/cpuinfo'):
254                if not line.startswith('flags'):
255                    continue
256
257                flags = line.split()
258                if 'vmx' in flags:
259                    return ArchX86(VMX_EXIT_REASONS)
260                if 'svm' in flags:
261                    return ArchX86(SVM_EXIT_REASONS)
262                return
263
264    def tracepoint_is_child(self, field):
265        if (TRACE_FILTER.match(field)):
266            return None
267        return field.split('(', 1)[0]
268
269
270class ArchX86(Arch):
271    def __init__(self, exit_reasons):
272        self.sc_perf_evt_open = 298
273        self.ioctl_numbers = IOCTL_NUMBERS
274        self.exit_reasons = exit_reasons
275
276    def debugfs_is_child(self, field):
277        """ Returns name of parent if 'field' is a child, None otherwise """
278        return None
279
280
281class ArchPPC(Arch):
282    def __init__(self):
283        self.sc_perf_evt_open = 319
284        self.ioctl_numbers = IOCTL_NUMBERS
285        self.ioctl_numbers['ENABLE'] = 0x20002400
286        self.ioctl_numbers['DISABLE'] = 0x20002401
287        self.ioctl_numbers['RESET'] = 0x20002403
288
289        # PPC comes in 32 and 64 bit and some generated ioctl
290        # numbers depend on the wordsize.
291        char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
292        self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
293        self.exit_reasons = {}
294
295    def debugfs_is_child(self, field):
296        """ Returns name of parent if 'field' is a child, None otherwise """
297        return None
298
299
300class ArchA64(Arch):
301    def __init__(self):
302        self.sc_perf_evt_open = 241
303        self.ioctl_numbers = IOCTL_NUMBERS
304        self.exit_reasons = AARCH64_EXIT_REASONS
305
306    def debugfs_is_child(self, field):
307        """ Returns name of parent if 'field' is a child, None otherwise """
308        return None
309
310
311class ArchS390(Arch):
312    def __init__(self):
313        self.sc_perf_evt_open = 331
314        self.ioctl_numbers = IOCTL_NUMBERS
315        self.exit_reasons = None
316
317    def debugfs_is_child(self, field):
318        """ Returns name of parent if 'field' is a child, None otherwise """
319        if field.startswith('instruction_'):
320            return 'exit_instruction'
321
322
323ARCH = Arch.get_arch()
324
325
326class perf_event_attr(ctypes.Structure):
327    """Struct that holds the necessary data to set up a trace event.
328
329    For an extensive explanation see perf_event_open(2) and
330    include/uapi/linux/perf_event.h, struct perf_event_attr
331
332    All fields that are not initialized in the constructor are 0.
333
334    """
335    _fields_ = [('type', ctypes.c_uint32),
336                ('size', ctypes.c_uint32),
337                ('config', ctypes.c_uint64),
338                ('sample_freq', ctypes.c_uint64),
339                ('sample_type', ctypes.c_uint64),
340                ('read_format', ctypes.c_uint64),
341                ('flags', ctypes.c_uint64),
342                ('wakeup_events', ctypes.c_uint32),
343                ('bp_type', ctypes.c_uint32),
344                ('bp_addr', ctypes.c_uint64),
345                ('bp_len', ctypes.c_uint64),
346                ]
347
348    def __init__(self):
349        super(self.__class__, self).__init__()
350        self.type = PERF_TYPE_TRACEPOINT
351        self.size = ctypes.sizeof(self)
352        self.read_format = PERF_FORMAT_GROUP
353
354
355PERF_TYPE_TRACEPOINT = 2
356PERF_FORMAT_GROUP = 1 << 3
357
358
359class Group(object):
360    """Represents a perf event group."""
361
362    def __init__(self):
363        self.events = []
364
365    def add_event(self, event):
366        self.events.append(event)
367
368    def read(self):
369        """Returns a dict with 'event name: value' for all events in the
370        group.
371
372        Values are read by reading from the file descriptor of the
373        event that is the group leader. See perf_event_open(2) for
374        details.
375
376        Read format for the used event configuration is:
377        struct read_format {
378            u64 nr; /* The number of events */
379            struct {
380                u64 value; /* The value of the event */
381            } values[nr];
382        };
383
384        """
385        length = 8 * (1 + len(self.events))
386        read_format = 'xxxxxxxx' + 'Q' * len(self.events)
387        return dict(zip([event.name for event in self.events],
388                        struct.unpack(read_format,
389                                      os.read(self.events[0].fd, length))))
390
391
392class Event(object):
393    """Represents a performance event and manages its life cycle."""
394    def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
395                 trace_filter, trace_set='kvm'):
396        self.libc = ctypes.CDLL('libc.so.6', use_errno=True)
397        self.syscall = self.libc.syscall
398        self.name = name
399        self.fd = None
400        self._setup_event(group, trace_cpu, trace_pid, trace_point,
401                          trace_filter, trace_set)
402
403    def __del__(self):
404        """Closes the event's file descriptor.
405
406        As no python file object was created for the file descriptor,
407        python will not reference count the descriptor and will not
408        close it itself automatically, so we do it.
409
410        """
411        if self.fd:
412            os.close(self.fd)
413
414    def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
415        """Wrapper for the sys_perf_evt_open() syscall.
416
417        Used to set up performance events, returns a file descriptor or -1
418        on error.
419
420        Attributes are:
421        - syscall number
422        - struct perf_event_attr *
423        - pid or -1 to monitor all pids
424        - cpu number or -1 to monitor all cpus
425        - The file descriptor of the group leader or -1 to create a group.
426        - flags
427
428        """
429        return self.syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
430                            ctypes.c_int(pid), ctypes.c_int(cpu),
431                            ctypes.c_int(group_fd), ctypes.c_long(flags))
432
433    def _setup_event_attribute(self, trace_set, trace_point):
434        """Returns an initialized ctype perf_event_attr struct."""
435
436        id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
437                               trace_point, 'id')
438
439        event_attr = perf_event_attr()
440        event_attr.config = int(open(id_path).read())
441        return event_attr
442
443    def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
444                     trace_filter, trace_set):
445        """Sets up the perf event in Linux.
446
447        Issues the syscall to register the event in the kernel and
448        then sets the optional filter.
449
450        """
451
452        event_attr = self._setup_event_attribute(trace_set, trace_point)
453
454        # First event will be group leader.
455        group_leader = -1
456
457        # All others have to pass the leader's descriptor instead.
458        if group.events:
459            group_leader = group.events[0].fd
460
461        fd = self._perf_event_open(event_attr, trace_pid,
462                                   trace_cpu, group_leader, 0)
463        if fd == -1:
464            err = ctypes.get_errno()
465            raise OSError(err, os.strerror(err),
466                          'while calling sys_perf_event_open().')
467
468        if trace_filter:
469            fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
470                        trace_filter)
471
472        self.fd = fd
473
474    def enable(self):
475        """Enables the trace event in the kernel.
476
477        Enabling the group leader makes reading counters from it and the
478        events under it possible.
479
480        """
481        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
482
483    def disable(self):
484        """Disables the trace event in the kernel.
485
486        Disabling the group leader makes reading all counters under it
487        impossible.
488
489        """
490        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
491
492    def reset(self):
493        """Resets the count of the trace event in the kernel."""
494        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
495
496
497class Provider(object):
498    """Encapsulates functionalities used by all providers."""
499    def __init__(self, pid):
500        self.child_events = False
501        self.pid = pid
502
503    @staticmethod
504    def is_field_wanted(fields_filter, field):
505        """Indicate whether field is valid according to fields_filter."""
506        if not fields_filter:
507            return True
508        return re.match(fields_filter, field) is not None
509
510    @staticmethod
511    def walkdir(path):
512        """Returns os.walk() data for specified directory.
513
514        As it is only a wrapper it returns the same 3-tuple of (dirpath,
515        dirnames, filenames).
516        """
517        return next(os.walk(path))
518
519
520class TracepointProvider(Provider):
521    """Data provider for the stats class.
522
523    Manages the events/groups from which it acquires its data.
524
525    """
526    def __init__(self, pid, fields_filter):
527        self.group_leaders = []
528        self.filters = self._get_filters()
529        self.update_fields(fields_filter)
530        super(TracepointProvider, self).__init__(pid)
531
532    @staticmethod
533    def _get_filters():
534        """Returns a dict of trace events, their filter ids and
535        the values that can be filtered.
536
537        Trace events can be filtered for special values by setting a
538        filter string via an ioctl. The string normally has the format
539        identifier==value. For each filter a new event will be created, to
540        be able to distinguish the events.
541
542        """
543        filters = {}
544        filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
545        if ARCH.exit_reasons:
546            filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
547        return filters
548
549    def _get_available_fields(self):
550        """Returns a list of available events of format 'event name(filter
551        name)'.
552
553        All available events have directories under
554        /sys/kernel/debug/tracing/events/ which export information
555        about the specific event. Therefore, listing the dirs gives us
556        a list of all available events.
557
558        Some events like the vm exit reasons can be filtered for
559        specific values. To take account for that, the routine below
560        creates special fields with the following format:
561        event name(filter name)
562
563        """
564        path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
565        fields = self.walkdir(path)[1]
566        extra = []
567        for field in fields:
568            if field in self.filters:
569                filter_name_, filter_dicts = self.filters[field]
570                for name in filter_dicts:
571                    extra.append(field + '(' + name + ')')
572        fields += extra
573        return fields
574
575    def update_fields(self, fields_filter):
576        """Refresh fields, applying fields_filter"""
577        self.fields = [field for field in self._get_available_fields()
578                       if self.is_field_wanted(fields_filter, field) or
579                       ARCH.tracepoint_is_child(field)]
580
581    @staticmethod
582    def _get_online_cpus():
583        """Returns a list of cpu id integers."""
584        def parse_int_list(list_string):
585            """Returns an int list from a string of comma separated integers and
586            integer ranges."""
587            integers = []
588            members = list_string.split(',')
589
590            for member in members:
591                if '-' not in member:
592                    integers.append(int(member))
593                else:
594                    int_range = member.split('-')
595                    integers.extend(range(int(int_range[0]),
596                                          int(int_range[1]) + 1))
597
598            return integers
599
600        with open('/sys/devices/system/cpu/online') as cpu_list:
601            cpu_string = cpu_list.readline()
602            return parse_int_list(cpu_string)
603
604    def _setup_traces(self):
605        """Creates all event and group objects needed to be able to retrieve
606        data."""
607        fields = self._get_available_fields()
608        if self._pid > 0:
609            # Fetch list of all threads of the monitored pid, as qemu
610            # starts a thread for each vcpu.
611            path = os.path.join('/proc', str(self._pid), 'task')
612            groupids = self.walkdir(path)[1]
613        else:
614            groupids = self._get_online_cpus()
615
616        # The constant is needed as a buffer for python libs, std
617        # streams and other files that the script opens.
618        newlim = len(groupids) * len(fields) + 50
619        try:
620            softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
621
622            if hardlim < newlim:
623                # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
624                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
625            else:
626                # Raising the soft limit is sufficient.
627                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
628
629        except ValueError:
630            sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
631
632        for groupid in groupids:
633            group = Group()
634            for name in fields:
635                tracepoint = name
636                tracefilter = None
637                match = re.match(r'(.*)\((.*)\)', name)
638                if match:
639                    tracepoint, sub = match.groups()
640                    tracefilter = ('%s==%d\0' %
641                                   (self.filters[tracepoint][0],
642                                    self.filters[tracepoint][1][sub]))
643
644                # From perf_event_open(2):
645                # pid > 0 and cpu == -1
646                # This measures the specified process/thread on any CPU.
647                #
648                # pid == -1 and cpu >= 0
649                # This measures all processes/threads on the specified CPU.
650                trace_cpu = groupid if self._pid == 0 else -1
651                trace_pid = int(groupid) if self._pid != 0 else -1
652
653                group.add_event(Event(name=name,
654                                      group=group,
655                                      trace_cpu=trace_cpu,
656                                      trace_pid=trace_pid,
657                                      trace_point=tracepoint,
658                                      trace_filter=tracefilter))
659
660            self.group_leaders.append(group)
661
662    @property
663    def fields(self):
664        return self._fields
665
666    @fields.setter
667    def fields(self, fields):
668        """Enables/disables the (un)wanted events"""
669        self._fields = fields
670        for group in self.group_leaders:
671            for index, event in enumerate(group.events):
672                if event.name in fields:
673                    event.reset()
674                    event.enable()
675                else:
676                    # Do not disable the group leader.
677                    # It would disable all of its events.
678                    if index != 0:
679                        event.disable()
680
681    @property
682    def pid(self):
683        return self._pid
684
685    @pid.setter
686    def pid(self, pid):
687        """Changes the monitored pid by setting new traces."""
688        self._pid = pid
689        # The garbage collector will get rid of all Event/Group
690        # objects and open files after removing the references.
691        self.group_leaders = []
692        self._setup_traces()
693        self.fields = self._fields
694
695    def read(self, by_guest=0):
696        """Returns 'event name: current value' for all enabled events."""
697        ret = defaultdict(int)
698        for group in self.group_leaders:
699            for name, val in group.read().items():
700                if name not in self._fields:
701                    continue
702                parent = ARCH.tracepoint_is_child(name)
703                if parent:
704                    name += ' ' + parent
705                ret[name] += val
706        return ret
707
708    def reset(self):
709        """Reset all field counters"""
710        for group in self.group_leaders:
711            for event in group.events:
712                event.reset()
713
714
715class DebugfsProvider(Provider):
716    """Provides data from the files that KVM creates in the kvm debugfs
717    folder."""
718    def __init__(self, pid, fields_filter, include_past):
719        self.update_fields(fields_filter)
720        self._baseline = {}
721        self.do_read = True
722        self.paths = []
723        super(DebugfsProvider, self).__init__(pid)
724        if include_past:
725            self._restore()
726
727    def _get_available_fields(self):
728        """"Returns a list of available fields.
729
730        The fields are all available KVM debugfs files
731
732        """
733        return self.walkdir(PATH_DEBUGFS_KVM)[2]
734
735    def update_fields(self, fields_filter):
736        """Refresh fields, applying fields_filter"""
737        self._fields = [field for field in self._get_available_fields()
738                        if self.is_field_wanted(fields_filter, field) or
739                        ARCH.debugfs_is_child(field)]
740
741    @property
742    def fields(self):
743        return self._fields
744
745    @fields.setter
746    def fields(self, fields):
747        self._fields = fields
748        self.reset()
749
750    @property
751    def pid(self):
752        return self._pid
753
754    @pid.setter
755    def pid(self, pid):
756        self._pid = pid
757        if pid != 0:
758            vms = self.walkdir(PATH_DEBUGFS_KVM)[1]
759            if len(vms) == 0:
760                self.do_read = False
761
762            self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
763
764        else:
765            self.paths = []
766            self.do_read = True
767
768    def _verify_paths(self):
769        """Remove invalid paths"""
770        for path in self.paths:
771            if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
772                self.paths.remove(path)
773                continue
774
775    def read(self, reset=0, by_guest=0):
776        """Returns a dict with format:'file name / field -> current value'.
777
778        Parameter 'reset':
779          0   plain read
780          1   reset field counts to 0
781          2   restore the original field counts
782
783        """
784        results = {}
785
786        # If no debugfs filtering support is available, then don't read.
787        if not self.do_read:
788            return results
789        self._verify_paths()
790
791        paths = self.paths
792        if self._pid == 0:
793            paths = []
794            for entry in os.walk(PATH_DEBUGFS_KVM):
795                for dir in entry[1]:
796                    paths.append(dir)
797        for path in paths:
798            for field in self._fields:
799                value = self._read_field(field, path)
800                key = path + field
801                if reset == 1:
802                    self._baseline[key] = value
803                if reset == 2:
804                    self._baseline[key] = 0
805                if self._baseline.get(key, -1) == -1:
806                    self._baseline[key] = value
807                parent = ARCH.debugfs_is_child(field)
808                if parent:
809                    field = field + ' ' + parent
810                else:
811                    if by_guest:
812                        field = key.split('-')[0]    # set 'field' to 'pid'
813                increment = value - self._baseline.get(key, 0)
814                if field in results:
815                    results[field] += increment
816                else:
817                    results[field] = increment
818
819        return results
820
821    def _read_field(self, field, path):
822        """Returns the value of a single field from a specific VM."""
823        try:
824            return int(open(os.path.join(PATH_DEBUGFS_KVM,
825                                         path,
826                                         field))
827                       .read())
828        except IOError:
829            return 0
830
831    def reset(self):
832        """Reset field counters"""
833        self._baseline = {}
834        self.read(1)
835
836    def _restore(self):
837        """Reset field counters"""
838        self._baseline = {}
839        self.read(2)
840
841
842EventStat = namedtuple('EventStat', ['value', 'delta'])
843
844
845class Stats(object):
846    """Manages the data providers and the data they provide.
847
848    It is used to set filters on the provider's data and collect all
849    provider data.
850
851    """
852    def __init__(self, options):
853        self.providers = self._get_providers(options)
854        self._pid_filter = options.pid
855        self._fields_filter = options.fields
856        self.values = {}
857        self._child_events = False
858
859    def _get_providers(self, options):
860        """Returns a list of data providers depending on the passed options."""
861        providers = []
862
863        if options.debugfs:
864            providers.append(DebugfsProvider(options.pid, options.fields,
865                                             options.dbgfs_include_past))
866        if options.tracepoints or not providers:
867            providers.append(TracepointProvider(options.pid, options.fields))
868
869        return providers
870
871    def _update_provider_filters(self):
872        """Propagates fields filters to providers."""
873        # As we reset the counters when updating the fields we can
874        # also clear the cache of old values.
875        self.values = {}
876        for provider in self.providers:
877            provider.update_fields(self._fields_filter)
878
879    def reset(self):
880        self.values = {}
881        for provider in self.providers:
882            provider.reset()
883
884    @property
885    def fields_filter(self):
886        return self._fields_filter
887
888    @fields_filter.setter
889    def fields_filter(self, fields_filter):
890        if fields_filter != self._fields_filter:
891            self._fields_filter = fields_filter
892            self._update_provider_filters()
893
894    @property
895    def pid_filter(self):
896        return self._pid_filter
897
898    @pid_filter.setter
899    def pid_filter(self, pid):
900        if pid != self._pid_filter:
901            self._pid_filter = pid
902            self.values = {}
903            for provider in self.providers:
904                provider.pid = self._pid_filter
905
906    @property
907    def child_events(self):
908        return self._child_events
909
910    @child_events.setter
911    def child_events(self, val):
912        self._child_events = val
913        for provider in self.providers:
914            provider.child_events = val
915
916    def get(self, by_guest=0):
917        """Returns a dict with field -> (value, delta to last value) of all
918        provider data.
919        Key formats:
920          * plain: 'key' is event name
921          * child-parent: 'key' is in format '<child> <parent>'
922          * pid: 'key' is the pid of the guest, and the record contains the
923               aggregated event data
924        These formats are generated by the providers, and handled in class TUI.
925        """
926        for provider in self.providers:
927            new = provider.read(by_guest=by_guest)
928            for key in new:
929                oldval = self.values.get(key, EventStat(0, 0)).value
930                newval = new.get(key, 0)
931                newdelta = newval - oldval
932                self.values[key] = EventStat(newval, newdelta)
933        return self.values
934
935    def toggle_display_guests(self, to_pid):
936        """Toggle between collection of stats by individual event and by
937        guest pid
938
939        Events reported by DebugfsProvider change when switching to/from
940        reading by guest values. Hence we have to remove the excess event
941        names from self.values.
942
943        """
944        if any(isinstance(ins, TracepointProvider) for ins in self.providers):
945            return 1
946        if to_pid:
947            for provider in self.providers:
948                if isinstance(provider, DebugfsProvider):
949                    for key in provider.fields:
950                        if key in self.values.keys():
951                            del self.values[key]
952        else:
953            oldvals = self.values.copy()
954            for key in oldvals:
955                if key.isdigit():
956                    del self.values[key]
957        # Update oldval (see get())
958        self.get(to_pid)
959        return 0
960
961
962DELAY_DEFAULT = 3.0
963MAX_GUEST_NAME_LEN = 48
964MAX_REGEX_LEN = 44
965SORT_DEFAULT = 0
966
967
968class Tui(object):
969    """Instruments curses to draw a nice text ui."""
970    def __init__(self, stats):
971        self.stats = stats
972        self.screen = None
973        self._delay_initial = 0.25
974        self._delay_regular = DELAY_DEFAULT
975        self._sorting = SORT_DEFAULT
976        self._display_guests = 0
977
978    def __enter__(self):
979        """Initialises curses for later use.  Based on curses.wrapper
980           implementation from the Python standard library."""
981        self.screen = curses.initscr()
982        curses.noecho()
983        curses.cbreak()
984
985        # The try/catch works around a minor bit of
986        # over-conscientiousness in the curses module, the error
987        # return from C start_color() is ignorable.
988        try:
989            curses.start_color()
990        except curses.error:
991            pass
992
993        # Hide cursor in extra statement as some monochrome terminals
994        # might support hiding but not colors.
995        try:
996            curses.curs_set(0)
997        except curses.error:
998            pass
999
1000        curses.use_default_colors()
1001        return self
1002
1003    def __exit__(self, *exception):
1004        """Resets the terminal to its normal state.  Based on curses.wrapper
1005           implementation from the Python standard library."""
1006        if self.screen:
1007            self.screen.keypad(0)
1008            curses.echo()
1009            curses.nocbreak()
1010            curses.endwin()
1011
1012    @staticmethod
1013    def get_all_gnames():
1014        """Returns a list of (pid, gname) tuples of all running guests"""
1015        res = []
1016        try:
1017            child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
1018                                     stdout=subprocess.PIPE)
1019        except:
1020            raise Exception
1021        for line in child.stdout:
1022            line = line.decode(ENCODING).lstrip().split(' ', 1)
1023            # perform a sanity check before calling the more expensive
1024            # function to possibly extract the guest name
1025            if ' -name ' in line[1]:
1026                res.append((line[0], Tui.get_gname_from_pid(line[0])))
1027        child.stdout.close()
1028
1029        return res
1030
1031    def _print_all_gnames(self, row):
1032        """Print a list of all running guests along with their pids."""
1033        self.screen.addstr(row, 2, '%8s  %-60s' %
1034                           ('Pid', 'Guest Name (fuzzy list, might be '
1035                            'inaccurate!)'),
1036                           curses.A_UNDERLINE)
1037        row += 1
1038        try:
1039            for line in self.get_all_gnames():
1040                self.screen.addstr(row, 2, '%8s  %-60s' % (line[0], line[1]))
1041                row += 1
1042                if row >= self.screen.getmaxyx()[0]:
1043                    break
1044        except Exception:
1045            self.screen.addstr(row + 1, 2, 'Not available')
1046
1047    @staticmethod
1048    def get_pid_from_gname(gname):
1049        """Fuzzy function to convert guest name to QEMU process pid.
1050
1051        Returns a list of potential pids, can be empty if no match found.
1052        Throws an exception on processing errors.
1053
1054        """
1055        pids = []
1056        for line in Tui.get_all_gnames():
1057            if gname == line[1]:
1058                pids.append(int(line[0]))
1059
1060        return pids
1061
1062    @staticmethod
1063    def get_gname_from_pid(pid):
1064        """Returns the guest name for a QEMU process pid.
1065
1066        Extracts the guest name from the QEMU comma line by processing the
1067        '-name' option. Will also handle names specified out of sequence.
1068
1069        """
1070        name = ''
1071        try:
1072            line = open('/proc/{}/cmdline'
1073                        .format(pid), 'r').read().split('\0')
1074            parms = line[line.index('-name') + 1].split(',')
1075            while '' in parms:
1076                # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results
1077                # in # ['foo', '', 'bar'], which we revert here
1078                idx = parms.index('')
1079                parms[idx - 1] += ',' + parms[idx + 1]
1080                del parms[idx:idx+2]
1081            # the '-name' switch allows for two ways to specify the guest name,
1082            # where the plain name overrides the name specified via 'guest='
1083            for arg in parms:
1084                if '=' not in arg:
1085                    name = arg
1086                    break
1087                if arg[:6] == 'guest=':
1088                    name = arg[6:]
1089        except (ValueError, IOError, IndexError):
1090            pass
1091
1092        return name
1093
1094    def _update_pid(self, pid):
1095        """Propagates pid selection to stats object."""
1096        self.screen.addstr(4, 1, 'Updating pid filter...')
1097        self.screen.refresh()
1098        self.stats.pid_filter = pid
1099
1100    def _refresh_header(self, pid=None):
1101        """Refreshes the header."""
1102        if pid is None:
1103            pid = self.stats.pid_filter
1104        self.screen.erase()
1105        gname = self.get_gname_from_pid(pid)
1106        self._gname = gname
1107        if gname:
1108            gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
1109                                   if len(gname) > MAX_GUEST_NAME_LEN
1110                                   else gname))
1111        if pid > 0:
1112            self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
1113        else:
1114            self._headline = 'kvm statistics - summary'
1115        self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
1116        if self.stats.fields_filter:
1117            regex = self.stats.fields_filter
1118            if len(regex) > MAX_REGEX_LEN:
1119                regex = regex[:MAX_REGEX_LEN] + '...'
1120            self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
1121        if self._display_guests:
1122            col_name = 'Guest Name'
1123        else:
1124            col_name = 'Event'
1125        self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
1126                           (col_name, 'Total', '%Total', 'CurAvg/s'),
1127                           curses.A_STANDOUT)
1128        self.screen.addstr(4, 1, 'Collecting data...')
1129        self.screen.refresh()
1130
1131    def _refresh_body(self, sleeptime):
1132        def insert_child(sorted_items, child, values, parent):
1133            num = len(sorted_items)
1134            for i in range(0, num):
1135                # only add child if parent is present
1136                if parent.startswith(sorted_items[i][0]):
1137                    sorted_items.insert(i + 1, ('  ' + child, values))
1138
1139        def get_sorted_events(self, stats):
1140            """ separate parent and child events """
1141            if self._sorting == SORT_DEFAULT:
1142                def sortkey(pair):
1143                    # sort by (delta value, overall value)
1144                    v = pair[1]
1145                    return (v.delta, v.value)
1146            else:
1147                def sortkey(pair):
1148                    # sort by overall value
1149                    v = pair[1]
1150                    return v.value
1151
1152            childs = []
1153            sorted_items = []
1154            # we can't rule out child events to appear prior to parents even
1155            # when sorted - separate out all children first, and add in later
1156            for key, values in sorted(stats.items(), key=sortkey,
1157                                      reverse=True):
1158                if values == (0, 0):
1159                    continue
1160                if key.find(' ') != -1:
1161                    if not self.stats.child_events:
1162                        continue
1163                    childs.insert(0, (key, values))
1164                else:
1165                    sorted_items.append((key, values))
1166            if self.stats.child_events:
1167                for key, values in childs:
1168                    (child, parent) = key.split(' ')
1169                    insert_child(sorted_items, child, values, parent)
1170
1171            return sorted_items
1172
1173        if not self._is_running_guest(self.stats.pid_filter):
1174            if self._gname:
1175                try: # ...to identify the guest by name in case it's back
1176                    pids = self.get_pid_from_gname(self._gname)
1177                    if len(pids) == 1:
1178                        self._refresh_header(pids[0])
1179                        self._update_pid(pids[0])
1180                        return
1181                except:
1182                    pass
1183            self._display_guest_dead()
1184            # leave final data on screen
1185            return
1186        row = 3
1187        self.screen.move(row, 0)
1188        self.screen.clrtobot()
1189        stats = self.stats.get(self._display_guests)
1190        total = 0.
1191        ctotal = 0.
1192        for key, values in stats.items():
1193            if self._display_guests:
1194                if self.get_gname_from_pid(key):
1195                    total += values.value
1196                continue
1197            if not key.find(' ') != -1:
1198                total += values.value
1199            else:
1200                ctotal += values.value
1201        if total == 0.:
1202            # we don't have any fields, or all non-child events are filtered
1203            total = ctotal
1204
1205        # print events
1206        tavg = 0
1207        tcur = 0
1208        guest_removed = False
1209        for key, values in get_sorted_events(self, stats):
1210            if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
1211                break
1212            if self._display_guests:
1213                key = self.get_gname_from_pid(key)
1214                if not key:
1215                    continue
1216            cur = int(round(values.delta / sleeptime)) if values.delta else 0
1217            if cur < 0:
1218                guest_removed = True
1219                continue
1220            if key[0] != ' ':
1221                if values.delta:
1222                    tcur += values.delta
1223                ptotal = values.value
1224                ltotal = total
1225            else:
1226                ltotal = ptotal
1227            self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
1228                               values.value,
1229                               values.value * 100 / float(ltotal), cur))
1230            row += 1
1231        if row == 3:
1232            if guest_removed:
1233                self.screen.addstr(4, 1, 'Guest removed, updating...')
1234            else:
1235                self.screen.addstr(4, 1, 'No matching events reported yet')
1236        if row > 4:
1237            tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
1238            self.screen.addstr(row, 1, '%-40s %10d        %8s' %
1239                               ('Total', total, tavg), curses.A_BOLD)
1240        self.screen.refresh()
1241
1242    def _display_guest_dead(self):
1243        marker = '   Guest is DEAD   '
1244        y = min(len(self._headline), 80 - len(marker))
1245        self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
1246
1247    def _show_msg(self, text):
1248        """Display message centered text and exit on key press"""
1249        hint = 'Press any key to continue'
1250        curses.cbreak()
1251        self.screen.erase()
1252        (x, term_width) = self.screen.getmaxyx()
1253        row = 2
1254        for line in text:
1255            start = (term_width - len(line)) // 2
1256            self.screen.addstr(row, start, line)
1257            row += 1
1258        self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
1259                           curses.A_STANDOUT)
1260        self.screen.getkey()
1261
1262    def _show_help_interactive(self):
1263        """Display help with list of interactive commands"""
1264        msg = ('   b     toggle events by guests (debugfs only, honors'
1265               ' filters)',
1266               '   c     clear filter',
1267               '   f     filter by regular expression',
1268               '   g     filter by guest name/PID',
1269               '   h     display interactive commands reference',
1270               '   o     toggle sorting order (Total vs CurAvg/s)',
1271               '   p     filter by guest name/PID',
1272               '   q     quit',
1273               '   r     reset stats',
1274               '   s     set update interval',
1275               '   x     toggle reporting of stats for individual child trace'
1276               ' events',
1277               'Any other key refreshes statistics immediately')
1278        curses.cbreak()
1279        self.screen.erase()
1280        self.screen.addstr(0, 0, "Interactive commands reference",
1281                           curses.A_BOLD)
1282        self.screen.addstr(2, 0, "Press any key to exit", curses.A_STANDOUT)
1283        row = 4
1284        for line in msg:
1285            self.screen.addstr(row, 0, line)
1286            row += 1
1287        self.screen.getkey()
1288        self._refresh_header()
1289
1290    def _show_filter_selection(self):
1291        """Draws filter selection mask.
1292
1293        Asks for a valid regex and sets the fields filter accordingly.
1294
1295        """
1296        msg = ''
1297        while True:
1298            self.screen.erase()
1299            self.screen.addstr(0, 0,
1300                               "Show statistics for events matching a regex.",
1301                               curses.A_BOLD)
1302            self.screen.addstr(2, 0,
1303                               "Current regex: {0}"
1304                               .format(self.stats.fields_filter))
1305            self.screen.addstr(5, 0, msg)
1306            self.screen.addstr(3, 0, "New regex: ")
1307            curses.echo()
1308            regex = self.screen.getstr().decode(ENCODING)
1309            curses.noecho()
1310            if len(regex) == 0:
1311                self.stats.fields_filter = ''
1312                self._refresh_header()
1313                return
1314            try:
1315                re.compile(regex)
1316                self.stats.fields_filter = regex
1317                self._refresh_header()
1318                return
1319            except re.error:
1320                msg = '"' + regex + '": Not a valid regular expression'
1321                continue
1322
1323    def _show_set_update_interval(self):
1324        """Draws update interval selection mask."""
1325        msg = ''
1326        while True:
1327            self.screen.erase()
1328            self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' %
1329                               DELAY_DEFAULT, curses.A_BOLD)
1330            self.screen.addstr(4, 0, msg)
1331            self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
1332                               self._delay_regular)
1333            curses.echo()
1334            val = self.screen.getstr().decode(ENCODING)
1335            curses.noecho()
1336
1337            try:
1338                if len(val) > 0:
1339                    delay = float(val)
1340                    if delay < 0.1:
1341                        msg = '"' + str(val) + '": Value must be >=0.1'
1342                        continue
1343                    if delay > 25.5:
1344                        msg = '"' + str(val) + '": Value must be <=25.5'
1345                        continue
1346                else:
1347                    delay = DELAY_DEFAULT
1348                self._delay_regular = delay
1349                break
1350
1351            except ValueError:
1352                msg = '"' + str(val) + '": Invalid value'
1353        self._refresh_header()
1354
1355    def _is_running_guest(self, pid):
1356        """Check if pid is still a running process."""
1357        if not pid:
1358            return True
1359        return os.path.isdir(os.path.join('/proc/', str(pid)))
1360
1361    def _show_vm_selection_by_guest(self):
1362        """Draws guest selection mask.
1363
1364        Asks for a guest name or pid until a valid guest name or '' is entered.
1365
1366        """
1367        msg = ''
1368        while True:
1369            self.screen.erase()
1370            self.screen.addstr(0, 0,
1371                               'Show statistics for specific guest or pid.',
1372                               curses.A_BOLD)
1373            self.screen.addstr(1, 0,
1374                               'This might limit the shown data to the trace '
1375                               'statistics.')
1376            self.screen.addstr(5, 0, msg)
1377            self._print_all_gnames(7)
1378            curses.echo()
1379            curses.curs_set(1)
1380            self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
1381            guest = self.screen.getstr().decode(ENCODING)
1382            curses.noecho()
1383
1384            pid = 0
1385            if not guest or guest == '0':
1386                break
1387            if guest.isdigit():
1388                if not self._is_running_guest(guest):
1389                    msg = '"' + guest + '": Not a running process'
1390                    continue
1391                pid = int(guest)
1392                break
1393            pids = []
1394            try:
1395                pids = self.get_pid_from_gname(guest)
1396            except:
1397                msg = '"' + guest + '": Internal error while searching, ' \
1398                      'use pid filter instead'
1399                continue
1400            if len(pids) == 0:
1401                msg = '"' + guest + '": Not an active guest'
1402                continue
1403            if len(pids) > 1:
1404                msg = '"' + guest + '": Multiple matches found, use pid ' \
1405                      'filter instead'
1406                continue
1407            pid = pids[0]
1408            break
1409        curses.curs_set(0)
1410        self._refresh_header(pid)
1411        self._update_pid(pid)
1412
1413    def show_stats(self):
1414        """Refreshes the screen and processes user input."""
1415        sleeptime = self._delay_initial
1416        self._refresh_header()
1417        start = 0.0  # result based on init value never appears on screen
1418        while True:
1419            self._refresh_body(time.time() - start)
1420            curses.halfdelay(int(sleeptime * 10))
1421            start = time.time()
1422            sleeptime = self._delay_regular
1423            try:
1424                char = self.screen.getkey()
1425                if char == 'b':
1426                    self._display_guests = not self._display_guests
1427                    if self.stats.toggle_display_guests(self._display_guests):
1428                        self._show_msg(['Command not available with '
1429                                        'tracepoints enabled', 'Restart with '
1430                                        'debugfs only (see option \'-d\') and '
1431                                        'try again!'])
1432                        self._display_guests = not self._display_guests
1433                    self._refresh_header()
1434                if char == 'c':
1435                    self.stats.fields_filter = ''
1436                    self._refresh_header(0)
1437                    self._update_pid(0)
1438                if char == 'f':
1439                    curses.curs_set(1)
1440                    self._show_filter_selection()
1441                    curses.curs_set(0)
1442                    sleeptime = self._delay_initial
1443                if char == 'g' or char == 'p':
1444                    self._show_vm_selection_by_guest()
1445                    sleeptime = self._delay_initial
1446                if char == 'h':
1447                    self._show_help_interactive()
1448                if char == 'o':
1449                    self._sorting = not self._sorting
1450                if char == 'q':
1451                    break
1452                if char == 'r':
1453                    self.stats.reset()
1454                if char == 's':
1455                    curses.curs_set(1)
1456                    self._show_set_update_interval()
1457                    curses.curs_set(0)
1458                    sleeptime = self._delay_initial
1459                if char == 'x':
1460                    self.stats.child_events = not self.stats.child_events
1461            except KeyboardInterrupt:
1462                break
1463            except curses.error:
1464                continue
1465
1466
1467def batch(stats):
1468    """Prints statistics in a key, value format."""
1469    try:
1470        s = stats.get()
1471        time.sleep(1)
1472        s = stats.get()
1473        for key, values in sorted(s.items()):
1474            print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
1475                  values.delta))
1476    except KeyboardInterrupt:
1477        pass
1478
1479
1480def log(stats):
1481    """Prints statistics as reiterating key block, multiple value blocks."""
1482    keys = sorted(stats.get().keys())
1483
1484    def banner():
1485        for key in keys:
1486            print(key.split(' ')[0], end=' ')
1487        print()
1488
1489    def statline():
1490        s = stats.get()
1491        for key in keys:
1492            print(' %9d' % s[key].delta, end=' ')
1493        print()
1494    line = 0
1495    banner_repeat = 20
1496    while True:
1497        try:
1498            time.sleep(1)
1499            if line % banner_repeat == 0:
1500                banner()
1501            statline()
1502            line += 1
1503        except KeyboardInterrupt:
1504            break
1505
1506
1507def get_options():
1508    """Returns processed program arguments."""
1509    description_text = """
1510This script displays various statistics about VMs running under KVM.
1511The statistics are gathered from the KVM debugfs entries and / or the
1512currently available perf traces.
1513
1514The monitoring takes additional cpu cycles and might affect the VM's
1515performance.
1516
1517Requirements:
1518- Access to:
1519    %s
1520    %s/events/*
1521    /proc/pid/task
1522- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1523  CAP_SYS_ADMIN and perf events are used.
1524- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1525  the large number of files that are possibly opened.
1526
1527Interactive Commands:
1528   b     toggle events by guests (debugfs only, honors filters)
1529   c     clear filter
1530   f     filter by regular expression
1531   g     filter by guest name
1532   h     display interactive commands reference
1533   o     toggle sorting order (Total vs CurAvg/s)
1534   p     filter by PID
1535   q     quit
1536   r     reset stats
1537   s     set update interval
1538   x     toggle reporting of stats for individual child trace events
1539Press any other key to refresh statistics immediately.
1540""" % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
1541
1542    class PlainHelpFormatter(optparse.IndentedHelpFormatter):
1543        def format_description(self, description):
1544            if description:
1545                return description + "\n"
1546            else:
1547                return ""
1548
1549    def cb_guest_to_pid(option, opt, val, parser):
1550        try:
1551            pids = Tui.get_pid_from_gname(val)
1552        except:
1553            sys.exit('Error while searching for guest "{}". Use "-p" to '
1554                     'specify a pid instead?'.format(val))
1555        if len(pids) == 0:
1556            sys.exit('Error: No guest by the name "{}" found'.format(val))
1557        if len(pids) > 1:
1558            sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
1559                     'to specify the desired pid'.format(" ".join(pids)))
1560        parser.values.pid = pids[0]
1561
1562    optparser = optparse.OptionParser(description=description_text,
1563                                      formatter=PlainHelpFormatter())
1564    optparser.add_option('-1', '--once', '--batch',
1565                         action='store_true',
1566                         default=False,
1567                         dest='once',
1568                         help='run in batch mode for one second',
1569                         )
1570    optparser.add_option('-i', '--debugfs-include-past',
1571                         action='store_true',
1572                         default=False,
1573                         dest='dbgfs_include_past',
1574                         help='include all available data on past events for '
1575                              'debugfs',
1576                         )
1577    optparser.add_option('-l', '--log',
1578                         action='store_true',
1579                         default=False,
1580                         dest='log',
1581                         help='run in logging mode (like vmstat)',
1582                         )
1583    optparser.add_option('-t', '--tracepoints',
1584                         action='store_true',
1585                         default=False,
1586                         dest='tracepoints',
1587                         help='retrieve statistics from tracepoints',
1588                         )
1589    optparser.add_option('-d', '--debugfs',
1590                         action='store_true',
1591                         default=False,
1592                         dest='debugfs',
1593                         help='retrieve statistics from debugfs',
1594                         )
1595    optparser.add_option('-f', '--fields',
1596                         action='store',
1597                         default='',
1598                         dest='fields',
1599                         help='''fields to display (regex)
1600                                 "-f help" for a list of available events''',
1601                         )
1602    optparser.add_option('-p', '--pid',
1603                         action='store',
1604                         default=0,
1605                         type='int',
1606                         dest='pid',
1607                         help='restrict statistics to pid',
1608                         )
1609    optparser.add_option('-g', '--guest',
1610                         action='callback',
1611                         type='string',
1612                         dest='pid',
1613                         metavar='GUEST',
1614                         help='restrict statistics to guest by name',
1615                         callback=cb_guest_to_pid,
1616                         )
1617    options, unkn = optparser.parse_args(sys.argv)
1618    if len(unkn) != 1:
1619        sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
1620    try:
1621        # verify that we were passed a valid regex up front
1622        re.compile(options.fields)
1623    except re.error:
1624        sys.exit('Error: "' + options.fields + '" is not a valid regular '
1625                 'expression')
1626
1627    return options
1628
1629
1630def check_access(options):
1631    """Exits if the current user can't access all needed directories."""
1632    if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1633                                                     not options.debugfs):
1634        sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1635                         "when using the option -t (default).\n"
1636                         "If it is enabled, make {0} readable by the "
1637                         "current user.\n"
1638                         .format(PATH_DEBUGFS_TRACING))
1639        if options.tracepoints:
1640            sys.exit(1)
1641
1642        sys.stderr.write("Falling back to debugfs statistics!\n")
1643        options.debugfs = True
1644        time.sleep(5)
1645
1646    return options
1647
1648
1649def assign_globals():
1650    global PATH_DEBUGFS_KVM
1651    global PATH_DEBUGFS_TRACING
1652
1653    debugfs = ''
1654    for line in open('/proc/mounts'):
1655        if line.split(' ')[0] == 'debugfs':
1656            debugfs = line.split(' ')[1]
1657            break
1658    if debugfs == '':
1659        sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
1660                         "your kernel, mounted and\nreadable by the current "
1661                         "user:\n"
1662                         "('mount -t debugfs debugfs /sys/kernel/debug')\n")
1663        sys.exit(1)
1664
1665    PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
1666    PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
1667
1668    if not os.path.exists(PATH_DEBUGFS_KVM):
1669        sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
1670                         "your kernel and that the modules are loaded.\n")
1671        sys.exit(1)
1672
1673
1674def main():
1675    assign_globals()
1676    options = get_options()
1677    options = check_access(options)
1678
1679    if (options.pid > 0 and
1680        not os.path.isdir(os.path.join('/proc/',
1681                                       str(options.pid)))):
1682        sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1683        sys.exit('Specified pid does not exist.')
1684
1685    stats = Stats(options)
1686
1687    if options.fields == 'help':
1688        stats.fields_filter = None
1689        event_list = []
1690        for key in stats.get().keys():
1691            event_list.append(key.split('(', 1)[0])
1692        sys.stdout.write('  ' + '\n  '.join(sorted(set(event_list))) + '\n')
1693        sys.exit(0)
1694
1695    if options.log:
1696        log(stats)
1697    elif not options.once:
1698        with Tui(stats) as tui:
1699            tui.show_stats()
1700    else:
1701        batch(stats)
1702
1703if __name__ == "__main__":
1704    main()
1705