From 5641a0feaecd0016cfa479155a7947839499aacd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 18 May 2020 17:22:41 +0200 Subject: [PATCH] Add perf uncore events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- docs/runtime_options.md | 131 +++++++++- info/v1/container.go | 30 +++ info/v1/machine.go | 13 + info/v2/container.go | 6 + info/v2/conversion.go | 6 + info/v2/conversion_test.go | 17 ++ machine/info.go | 4 +- manager/manager.go | 2 +- metrics/prometheus.go | 44 +++- metrics/prometheus_fake.go | 16 ++ metrics/prometheus_test.go | 8 +- metrics/testdata/prometheus_metrics | 32 ++- perf/collector_libpfm.go | 147 +++++++---- perf/collector_libpfm_test.go | 26 +- perf/config.go | 17 +- perf/config_test.go | 32 ++- perf/manager_libpfm.go | 14 +- perf/manager_libpfm_test.go | 17 +- perf/manager_no_libpfm.go | 3 +- perf/testing/grouped.json | 26 +- perf/testing/perf-non-hardware.json | 28 +- perf/testing/perf.json | 44 +++- perf/uncore_libpfm.go | 392 ++++++++++++++++++++++++++++ perf/uncore_libpfm_test.go | 201 ++++++++++++++ utils/sysinfo/sysinfo.go | 11 + utils/sysinfo/sysinfo_test.go | 51 ++++ 26 files changed, 1153 insertions(+), 165 deletions(-) create mode 100644 perf/uncore_libpfm.go create mode 100644 perf/uncore_libpfm_test.go diff --git a/docs/runtime_options.md b/docs/runtime_options.md index 3198ba18..377e817e 100644 --- a/docs/runtime_options.md +++ b/docs/runtime_options.md @@ -158,6 +158,106 @@ automatically. * `grouping` - in scenario when accounted for events are used to calculate derivative metrics, it is reasonable to measure them in transactional manner: all the events in a group must be accounted for in the same period of time. Keep in mind that it is impossible to group more events that there are counters available. +* `uncore events` - events which can be counted by PMUs outside core. +* `PMU` - Performance Monitoring Unit + +#### Getting config values +Using perf tools: +* Identify the event in `perf list` output. +* Execute command: `perf stat -I 5000 -vvv -e EVENT_NAME` +* Find `perf_event_attr` section on `perf stat` output, copy config and type field to configuration file. + +``` +------------------------------------------------------------ +perf_event_attr: + type 18 + size 112 + config 0x304 + sample_type IDENTIFIER + read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING + disabled 1 + inherit 1 + exclude_guest 1 +------------------------------------------------------------ +``` +* Configuration file should look like: +```json +{ + "core": { + "events": [ + ["EVENT_NAME"] + ], + "custom_events": [ + { + "type": 18, + "config": [ + "0x304" + ], + "name": "EVENT_NAME" + } + ] + }, + "uncore": { + "events": [ + ["EVENT_NAME"] + ], + "custom_events": [ + { + "type": 18, + "config": [ + "0x304" + ], + "name": "EVENT_NAME" + } + ] + } +} +``` + +Config values can be also obtain from: +* [Intel® 64 and IA32 Architectures Performance Monitoring Events](https://software.intel.com/content/www/us/en/develop/download/intel-64-and-ia32-architectures-performance-monitoring-events.html) + + +##### Uncore Events configuration +Uncore Event name should be in form `PMU_PREFIX/event_name` where **PMU_PREFIX** mean +that statistics would be counted on all PMUs with that prefix in name. + +Let's explain this by example: + +```json +{ + "uncore": { + "events": [ + ["uncore_imc/cas_count_read"], + ["uncore_imc_0/cas_count_write"], + ["cas_count_all"] + ], + "custom_events": [ + { + "config": [ + "0x304" + ], + "name": "uncore_imc_0/cas_count_write" + }, + { + "type": 19, + "config": [ + "0x304" + ], + "name": "cas_count_all" + } + ] + } +} +``` + +- `uncore_imc/cas_count_read` - because of `uncore_imc` type and no entry in custom events, + it would be counted by **all** Integrated Memory Controller PMUs with config provided from libpfm package. + (using this function: https://man7.org/linux/man-pages/man3/pfm_get_os_event_encoding.3.html) + +- `uncore_imc_0/cas_count_write` - because of `uncore_imc_0` type and entry in custom events it would be counted by `uncore_imc_0` PMU with provided config. + +- `uncore_imc_1/cas_count_all` - because of entry in custom events with type field, event would be counted by PMU with **19** type and provided config. ### Further reading @@ -165,16 +265,17 @@ in mind that it is impossible to group more events that there are counters avail * [Kernel Perf Wiki](https://perf.wiki.kernel.org/index.php/Main_Page) * `man perf_event_open` * [perf subsystem](https://github.com/torvalds/linux/tree/v5.6/kernel/events) in Linux kernel +* [Uncore Performance Monitoring Reference Manuals](https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html#uncore) See example configuration below: ```json { - "events": [ - ["instructions"], - ["instructions_retired"] - ], - "custom_events": [ - [ + "core": { + "events": [ + ["instructions"], + ["instructions_retired"] + ], + "custom_events": [ { "type": 4, "config": [ @@ -183,7 +284,20 @@ See example configuration below: "name": "instructions_retired" } ] - ] + }, + "uncore": { + "events": [ + ["uncore_imc/cas_count_read"] + ], + "custom_events": [ + { + "config": [ + "0xc04" + ], + "name": "uncore_imc/cas_count_read" + } + ] + } } ``` @@ -194,6 +308,9 @@ interface that majority of users will rely on. * `instructions_retired` will be measured as non-grouped event and is specified using an advanced API that allows to specify any perf event available (some of them are not named and can't be specified with plain string). Event name should be a human readable string that will become a metric name. +* `cas_count_read` will be measured as uncore non-grouped event on all Integrated Memory Controllers Performance Monitoring Units because of unset `type` field and +`uncore_imc` prefix. + ## Storage driver specific instructions: diff --git a/info/v1/container.go b/info/v1/container.go index 32d85d54..46ac7050 100644 --- a/info/v1/container.go +++ b/info/v1/container.go @@ -874,6 +874,32 @@ type ResctrlStats struct { Cache []CacheStats `json:"cache,omitempty"` } +// PerfUncoreStat represents value of a single monitored perf uncore event. +type PerfUncoreStat struct { + // Indicates scaling ratio for an event: time_running/time_enabled + // (amount of time that event was being measured divided by + // amount of time that event was enabled for). + // value 1.0 indicates that no multiplexing occurred. Value close + // to 0 indicates that event was measured for short time and event's + // value might be inaccurate. + // See: https://lwn.net/Articles/324756/ + ScalingRatio float64 `json:"scaling_ratio"` + + // Value represents value of perf event retrieved from OS. It is + // normalized against ScalingRatio and takes multiplexing into + // consideration. + Value uint64 `json:"value"` + + // Name is human readable name of an event. + Name string `json:"name"` + + // Socket that perf event was measured on. + Socket int `json:"socket"` + + // PMU is Performance Monitoring Unit which collected these stats. + PMU string `json:"pmu"` +} + type UlimitSpec struct { Name string `json:"name"` SoftLimit int64 `json:"soft_limit"` @@ -926,6 +952,10 @@ type ContainerStats struct { // Statistics originating from perf events PerfStats []PerfStat `json:"perf_stats,omitempty"` + // Statistics originating from perf uncore events. + // Applies only for root container. + PerfUncoreStats []PerfUncoreStat `json:"perf_uncore_stats,omitempty"` + // Referenced memory ReferencedMemory uint64 `json:"referenced_memory,omitempty"` diff --git a/info/v1/machine.go b/info/v1/machine.go index ed5c953d..22c9ff8c 100644 --- a/info/v1/machine.go +++ b/info/v1/machine.go @@ -71,6 +71,19 @@ func (n *Node) FindCore(id int) (bool, int) { return false, -1 } +// FindCoreByThread returns bool if found Core with same thread as provided and it's index in Node Core array. +// If it's not found, returns false and -1. +func (n *Node) FindCoreByThread(thread int) (bool, int) { + for i, n := range n.Cores { + for _, t := range n.Threads { + if t == thread { + return true, i + } + } + } + return false, -1 +} + func (n *Node) AddThread(thread int, core int) { var coreIdx int if core == -1 { diff --git a/info/v2/container.go b/info/v2/container.go index da24a1bc..145c8462 100644 --- a/info/v2/container.go +++ b/info/v2/container.go @@ -139,6 +139,9 @@ type DeprecatedContainerStats struct { CustomMetrics map[string][]v1.MetricVal `json:"custom_metrics,omitempty"` // Perf events counters PerfStats []v1.PerfStat `json:"perf_stats,omitempty"` + // Statistics originating from perf uncore events. + // Applies only for root container. + PerfUncoreStats []v1.PerfUncoreStat `json:"perf_uncore_stats,omitempty"` // Referenced memory ReferencedMemory uint64 `json:"referenced_memory,omitempty"` // Resource Control (resctrl) statistics @@ -173,6 +176,9 @@ type ContainerStats struct { CustomMetrics map[string][]v1.MetricVal `json:"custom_metrics,omitempty"` // Perf events counters PerfStats []v1.PerfStat `json:"perf_stats,omitempty"` + // Statistics originating from perf uncore events. + // Applies only for root container. + PerfUncoreStats []v1.PerfUncoreStat `json:"perf_uncore_stats,omitempty"` // Referenced memory ReferencedMemory uint64 `json:"referenced_memory,omitempty"` // Resource Control (resctrl) statistics diff --git a/info/v2/conversion.go b/info/v2/conversion.go index 6e2523c9..f9a95a03 100644 --- a/info/v2/conversion.go +++ b/info/v2/conversion.go @@ -155,6 +155,9 @@ func ContainerStatsFromV1(containerName string, spec *v1.ContainerSpec, stats [] if len(val.PerfStats) > 0 { stat.PerfStats = val.PerfStats } + if len(val.PerfUncoreStats) > 0 { + stat.PerfUncoreStats = val.PerfUncoreStats + } if len(val.Resctrl.MemoryBandwidth) > 0 || len(val.Resctrl.Cache) > 0 { stat.Resctrl = val.Resctrl } @@ -213,6 +216,9 @@ func DeprecatedStatsFromV1(cont *v1.ContainerInfo) []DeprecatedContainerStats { if len(val.PerfStats) > 0 { stat.PerfStats = val.PerfStats } + if len(val.PerfUncoreStats) > 0 { + stat.PerfUncoreStats = val.PerfUncoreStats + } if len(val.Resctrl.MemoryBandwidth) > 0 || len(val.Resctrl.Cache) > 0 { stat.Resctrl = val.Resctrl } diff --git a/info/v2/conversion_test.go b/info/v2/conversion_test.go index 04ee9062..9dcf042b 100644 --- a/info/v2/conversion_test.go +++ b/info/v2/conversion_test.go @@ -208,6 +208,22 @@ func TestContainerStatsFromV1(t *testing.T) { Name: "cycles", }, }, + PerfUncoreStats: []v1.PerfUncoreStat{ + { + ScalingRatio: 1.0, + Value: 123456, + Name: "uncore_imc_0/cas_count_write", + Socket: 0, + PMU: "17", + }, + { + ScalingRatio: 1.0, + Value: 654321, + Name: "uncore_imc_0/cas_count_write", + Socket: 1, + PMU: "17", + }, + }, ReferencedMemory: uint64(1234), Resctrl: v1.ResctrlStats{ MemoryBandwidth: []v1.MemoryBandwidthStats{ @@ -247,6 +263,7 @@ func TestContainerStatsFromV1(t *testing.T) { }, Accelerators: v1Stats.Accelerators, PerfStats: v1Stats.PerfStats, + PerfUncoreStats: v1Stats.PerfUncoreStats, ReferencedMemory: v1Stats.ReferencedMemory, Resctrl: v1Stats.Resctrl, } diff --git a/machine/info.go b/machine/info.go index 72a77130..8db8a7e7 100644 --- a/machine/info.go +++ b/machine/info.go @@ -22,6 +22,8 @@ import ( "strings" "time" + "golang.org/x/sys/unix" + "github.com/google/cadvisor/fs" info "github.com/google/cadvisor/info/v1" "github.com/google/cadvisor/nvm" @@ -30,8 +32,6 @@ import ( "github.com/google/cadvisor/utils/sysinfo" "k8s.io/klog/v2" - - "golang.org/x/sys/unix" ) const hugepagesDirectory = "/sys/kernel/mm/hugepages/" diff --git a/manager/manager.go b/manager/manager.go index 67a424ad..6d0c344a 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -212,7 +212,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig newManager.machineInfo = *machineInfo klog.V(1).Infof("Machine: %+v", newManager.machineInfo) - newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.NumCores) + newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.NumCores, machineInfo.Topology) if err != nil { return nil, err } diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 1455fd8d..369c2074 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -1545,11 +1545,11 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri }, }...) } - if c.includedMetrics.Has(container.PerfMetrics) { + if includedMetrics.Has(container.PerfMetrics) { c.containerMetrics = append(c.containerMetrics, []containerMetric{ { - name: "container_perf_metric", - help: "Perf event metric", + name: "container_perf_events_total", + help: "Perf event metric.", valueType: prometheus.CounterValue, extraLabels: []string{"cpu", "event"}, getValues: func(s *info.ContainerStats) metricValues { @@ -1565,8 +1565,8 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri }, }, { - name: "container_perf_metric_scaling_ratio", - help: "Perf event metric scaling ratio", + name: "container_perf_events_scaling_ratio", + help: "Perf event metric scaling ratio.", valueType: prometheus.GaugeValue, extraLabels: []string{"cpu", "event"}, getValues: func(s *info.ContainerStats) metricValues { @@ -1581,6 +1581,40 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri return values }, }, + { + name: "container_perf_uncore_events_total", + help: "Perf uncore event metric.", + valueType: prometheus.CounterValue, + extraLabels: []string{"socket", "event", "pmu"}, + getValues: func(s *info.ContainerStats) metricValues { + values := make(metricValues, 0, len(s.PerfUncoreStats)) + for _, metric := range s.PerfUncoreStats { + values = append(values, metricValue{ + value: float64(metric.Value), + labels: []string{strconv.Itoa(metric.Socket), metric.Name, metric.PMU}, + timestamp: s.Timestamp, + }) + } + return values + }, + }, + { + name: "container_perf_uncore_events_scaling_ratio", + help: "Perf uncore event metric scaling ratio.", + valueType: prometheus.GaugeValue, + extraLabels: []string{"socket", "event", "pmu"}, + getValues: func(s *info.ContainerStats) metricValues { + values := make(metricValues, 0, len(s.PerfUncoreStats)) + for _, metric := range s.PerfUncoreStats { + values = append(values, metricValue{ + value: metric.ScalingRatio, + labels: []string{strconv.Itoa(metric.Socket), metric.Name, metric.PMU}, + timestamp: s.Timestamp, + }) + } + return values + }, + }, }...) } if includedMetrics.Has(container.ReferencedMemoryMetrics) { diff --git a/metrics/prometheus_fake.go b/metrics/prometheus_fake.go index d0d1c953..535fd5c9 100644 --- a/metrics/prometheus_fake.go +++ b/metrics/prometheus_fake.go @@ -648,6 +648,22 @@ func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.Container Cpu: 1, }, }, + PerfUncoreStats: []info.PerfUncoreStat{ + { + ScalingRatio: 1.0, + Value: 1231231512.0, + Name: "cas_count_read", + Socket: 0, + PMU: "uncore_imc_0", + }, + { + ScalingRatio: 1.0, + Value: 1111231331.0, + Name: "cas_count_read", + Socket: 1, + PMU: "uncore_imc_0", + }, + }, ReferencedMemory: 1234, Resctrl: info.ResctrlStats{ MemoryBandwidth: []info.MemoryBandwidthStats{ diff --git a/metrics/prometheus_test.go b/metrics/prometheus_test.go index a853666f..efc894bd 100644 --- a/metrics/prometheus_test.go +++ b/metrics/prometheus_test.go @@ -77,14 +77,16 @@ func TestPrometheusCollector_scrapeFailure(t *testing.T) { func TestNewPrometheusCollectorWithPerf(t *testing.T) { c := NewPrometheusCollector(mockInfoProvider{}, mockLabelFunc, container.MetricSet{container.PerfMetrics: struct{}{}}, now) - assert.Len(t, c.containerMetrics, 3) + assert.Len(t, c.containerMetrics, 5) names := []string{} for _, m := range c.containerMetrics { names = append(names, m.name) } assert.Contains(t, names, "container_last_seen") - assert.Contains(t, names, "container_perf_metric") - assert.Contains(t, names, "container_perf_metric_scaling_ratio") + assert.Contains(t, names, "container_perf_events_total") + assert.Contains(t, names, "container_perf_events_scaling_ratio") + assert.Contains(t, names, "container_perf_uncore_events_total") + assert.Contains(t, names, "container_perf_uncore_events_scaling_ratio") } type mockInfoProvider struct{} diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics index 6a831059..be1993c6 100644 --- a/metrics/testdata/prometheus_metrics +++ b/metrics/testdata/prometheus_metrics @@ -327,18 +327,26 @@ container_network_udp_usage_total{container_env_foo_env="prod",container_label_f container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="listen",zone_name="hello"} 0 1395066363000 container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="rxqueued",zone_name="hello"} 0 1395066363000 container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="txqueued",zone_name="hello"} 0 1395066363000 -# HELP container_perf_metric Perf event metric -# TYPE container_perf_metric counter -container_perf_metric{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 123 1395066363000 -container_perf_metric{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 321 1395066363000 -container_perf_metric{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 456 1395066363000 -container_perf_metric{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 789 1395066363000 -# HELP container_perf_metric_scaling_ratio Perf event metric scaling ratio -# TYPE container_perf_metric_scaling_ratio gauge -container_perf_metric_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000 -container_perf_metric_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.66666666666 1395066363000 -container_perf_metric_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 1395066363000 -container_perf_metric_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.33333333333 1395066363000 +# HELP container_perf_events_total Perf event metric. +# TYPE container_perf_events_total counter +container_perf_events_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 123 1395066363000 +container_perf_events_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 321 1395066363000 +container_perf_events_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 456 1395066363000 +container_perf_events_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 789 1395066363000 +# HELP container_perf_events_scaling_ratio Perf event metric scaling ratio. +# TYPE container_perf_events_scaling_ratio gauge +container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000 +container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.66666666666 1395066363000 +container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 1395066363000 +container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.33333333333 1395066363000 +# HELP container_perf_uncore_events_total Perf uncore event metric. +# TYPE container_perf_uncore_events_total counter +container_perf_uncore_events_total{container_env_foo_env="prod",container_label_foo_label="bar",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="0",zone_name="hello"} 1.231231512e+09 1395066363000 +container_perf_uncore_events_total{container_env_foo_env="prod",container_label_foo_label="bar",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="1",zone_name="hello"} 1.111231331e+09 1395066363000 +# HELP container_perf_uncore_events_scaling_ratio Perf uncore event metric scaling ratio. +# TYPE container_perf_uncore_events_scaling_ratio gauge +container_perf_uncore_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="0",zone_name="hello"} 1 1395066363000 +container_perf_uncore_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="1",zone_name="hello"} 1 1395066363000 # HELP container_processes Number of processes running inside the container. # TYPE container_processes gauge container_processes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000 diff --git a/perf/collector_libpfm.go b/perf/collector_libpfm.go index 1063e7df..01ae2943 100644 --- a/perf/collector_libpfm.go +++ b/perf/collector_libpfm.go @@ -31,18 +31,21 @@ import ( "sync" "unsafe" - info "github.com/google/cadvisor/info/v1" "golang.org/x/sys/unix" "k8s.io/klog/v2" + + info "github.com/google/cadvisor/info/v1" + "github.com/google/cadvisor/stats" ) type collector struct { cgroupPath string - events Events + events PerfEvents cpuFiles map[string]map[int]readerCloser cpuFilesLock sync.Mutex numCores int eventToCustomEvent map[Event]*CustomEvent + uncore stats.Collector } var ( @@ -61,50 +64,68 @@ func init() { isLibpfmInitialized = true } -func newCollector(cgroupPath string, events Events, numCores int) *collector { - collector := &collector{cgroupPath: cgroupPath, events: events, cpuFiles: map[string]map[int]readerCloser{}, numCores: numCores} +func newCollector(cgroupPath string, events PerfEvents, numCores int, topology []info.Node) *collector { + collector := &collector{cgroupPath: cgroupPath, events: events, cpuFiles: map[string]map[int]readerCloser{}, numCores: numCores, uncore: NewUncoreCollector(cgroupPath, events, topology)} mapEventsToCustomEvents(collector) + return collector } func (c *collector) UpdateStats(stats *info.ContainerStats) error { + err := c.uncore.UpdateStats(stats) + if err != nil { + klog.Errorf("Failed to get uncore perf event stats: %v", err) + } + c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() stats.PerfStats = []info.PerfStat{} klog.V(5).Infof("Attempting to update perf_event stats from cgroup %q", c.cgroupPath) - for name, files := range c.cpuFiles { - for cpu, file := range files { - buf := make([]byte, 32) - _, err := file.Read(buf) + for name, cpus := range c.cpuFiles { + for cpu, file := range cpus { + stat, err := readPerfStat(file, name, cpu) if err != nil { - klog.Warningf("Unable to read from perf_event file (event: %q, CPU: %d) for %q", name, cpu, c.cgroupPath) + klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", name, cpu, c.cgroupPath, err.Error()) continue } - perfData := &ReadFormat{} - reader := bytes.NewReader(buf) - err = binary.Read(reader, binary.LittleEndian, perfData) - if err != nil { - klog.Warningf("Unable to decode from binary format read from perf_event file (event: %q, CPU: %d) for %q", name, cpu, c.cgroupPath) - continue - } - klog.V(5).Infof("Read metric for event %q for cpu %d from cgroup %q: %d", name, cpu, c.cgroupPath, perfData.Value) - scalingRatio := 1.0 - if perfData.TimeEnabled != 0 { - scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled) - } - stat := info.PerfStat{ - Value: uint64(float64(perfData.Value) / scalingRatio), - Name: name, - ScalingRatio: scalingRatio, - Cpu: cpu, - } - stats.PerfStats = append(stats.PerfStats, stat) + klog.V(5).Infof("Read perf event (event: %q, CPU: %d) for %q: %d", name, cpu, c.cgroupPath, stat.Value) + + stats.PerfStats = append(stats.PerfStats, *stat) } } + return nil } +func readPerfStat(file readerCloser, name string, cpu int) (*info.PerfStat, error) { + buf := make([]byte, 32) + _, err := file.Read(buf) + if err != nil { + return nil, err + } + perfData := &ReadFormat{} + reader := bytes.NewReader(buf) + err = binary.Read(reader, binary.LittleEndian, perfData) + if err != nil { + return nil, err + } + + scalingRatio := 1.0 + if perfData.TimeEnabled != 0 { + scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled) + } + + stat := info.PerfStat{ + Value: uint64(float64(perfData.Value) / scalingRatio), + Name: name, + ScalingRatio: scalingRatio, + Cpu: cpu, + } + + return &stat, nil +} + func (c *collector) setup() error { cgroup, err := os.Open(c.cgroupPath) if err != nil { @@ -115,7 +136,7 @@ func (c *collector) setup() error { c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() cgroupFd := int(cgroup.Fd()) - for _, group := range c.events.Events { + for _, group := range c.events.Core.Events { customEvent, ok := c.eventToCustomEvent[group[0]] var err error if ok { @@ -127,6 +148,7 @@ func (c *collector) setup() error { return err } } + return nil } @@ -141,10 +163,10 @@ func (c *collector) setupRawNonGrouped(event *CustomEvent, cgroup int) error { return nil } -func (c *collector) registerEvent(config *unix.PerfEventAttr, name string, cgroup int) error { +func (c *collector) registerEvent(config *unix.PerfEventAttr, name string, pid int) error { var cpu int for cpu = 0; cpu < c.numCores; cpu++ { - pid, groupFd, flags := cgroup, -1, unix.PERF_FLAG_FD_CLOEXEC|unix.PERF_FLAG_PID_CGROUP + groupFd, flags := -1, unix.PERF_FLAG_FD_CLOEXEC|unix.PERF_FLAG_PID_CGROUP fd, err := unix.PerfEventOpen(config, pid, cpu, groupFd, flags) if err != nil { return fmt.Errorf("setting up perf event %#v failed: %q", config, err) @@ -164,35 +186,18 @@ func (c *collector) addEventFile(name string, cpu int, perfFile *os.File) { if !ok { c.cpuFiles[name] = map[int]readerCloser{} } + c.cpuFiles[name][cpu] = perfFile } func (c *collector) setupNonGrouped(name string, cgroup int) error { - if !isLibpfmInitialized { - return fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up") + perfEventAttr, err := getPerfEventAttr(name) + if err != nil { + return err } + defer C.free(unsafe.Pointer(perfEventAttr)) - klog.V(5).Infof("Setting up non-grouped perf event %s", name) - - perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{}))) - defer C.free(perfEventAttrMemory) - event := pfmPerfEncodeArgT{} - - perfEventAttr := (*unix.PerfEventAttr)(perfEventAttrMemory) - fstr := C.CString("") - event.fstr = unsafe.Pointer(fstr) - event.attr = perfEventAttrMemory - event.size = C.ulong(unsafe.Sizeof(event)) - - cSafeName := C.CString(name) - pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event)) - if pErr != C.PFM_SUCCESS { - return fmt.Errorf("unable to transform event name %s to perf_event_attr: %d", name, int(pErr)) - } - - klog.V(5).Infof("perf_event_attr: %#v", perfEventAttr) - setAttributes(perfEventAttr) - return c.registerEvent(perfEventAttr, string(name), cgroup) + return c.registerEvent(perfEventAttr, name, cgroup) } func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr { @@ -214,6 +219,34 @@ func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr { return config } +func getPerfEventAttr(name string) (*unix.PerfEventAttr, error) { + if !isLibpfmInitialized { + return nil, fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up") + } + + perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{}))) + event := pfmPerfEncodeArgT{} + + perfEventAttr := (*unix.PerfEventAttr)(perfEventAttrMemory) + fstr := C.CString("") + event.fstr = unsafe.Pointer(fstr) + event.attr = perfEventAttrMemory + event.size = C.ulong(unsafe.Sizeof(event)) + + cSafeName := C.CString(name) + + pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event)) + if pErr != C.PFM_SUCCESS { + return nil, fmt.Errorf("unable to transform event name %s to perf_event_attr: %v", name, int(pErr)) + } + + klog.V(5).Infof("perf_event_attr: %#v", perfEventAttr) + + setAttributes(perfEventAttr) + + return perfEventAttr, nil +} + func setAttributes(config *unix.PerfEventAttr) { config.Sample_type = perfSampleIdentifier config.Read_format = unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_ID @@ -222,6 +255,7 @@ func setAttributes(config *unix.PerfEventAttr) { } func (c *collector) Destroy() { + c.uncore.Destroy() c.cpuFilesLock.Lock() defer c.cpuFilesLock.Unlock() @@ -233,7 +267,6 @@ func (c *collector) Destroy() { klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu) } } - delete(c.cpuFiles, name) } } @@ -255,7 +288,7 @@ func Finalize() { func mapEventsToCustomEvents(collector *collector) { collector.eventToCustomEvent = map[Event]*CustomEvent{} - for key, event := range collector.events.CustomEvents { - collector.eventToCustomEvent[event.Name] = &collector.events.CustomEvents[key] + for key, event := range collector.events.Core.CustomEvents { + collector.eventToCustomEvent[event.Name] = &collector.events.Core.CustomEvents[key] } } diff --git a/perf/collector_libpfm_test.go b/perf/collector_libpfm_test.go index ef4c45f9..8cc4632f 100644 --- a/perf/collector_libpfm_test.go +++ b/perf/collector_libpfm_test.go @@ -20,10 +20,12 @@ package perf import ( "bytes" "encoding/binary" - "github.com/stretchr/testify/assert" "testing" + "github.com/stretchr/testify/assert" + info "github.com/google/cadvisor/info/v1" + "github.com/google/cadvisor/stats" ) type buffer struct { @@ -35,7 +37,7 @@ func (b buffer) Close() error { } func TestCollector_UpdateStats(t *testing.T) { - collector := collector{} + collector := collector{uncore: &stats.NoopCollector{}} notScaledBuffer := buffer{bytes.NewBuffer([]byte{})} scaledBuffer := buffer{bytes.NewBuffer([]byte{})} err := binary.Write(notScaledBuffer, binary.LittleEndian, ReadFormat{ @@ -96,15 +98,17 @@ func TestCreatePerfEventAttr(t *testing.T) { } func TestNewCollector(t *testing.T) { - perfCollector := newCollector("cgroup", Events{ - Events: [][]Event{{"event_1"}, {"event_2"}}, - CustomEvents: []CustomEvent{{ - Type: 0, - Config: []uint64{1, 2, 3}, - Name: "event_2", - }}, - }, 1) + perfCollector := newCollector("cgroup", PerfEvents{ + Core: Events{ + Events: [][]Event{{"event_1"}, {"event_2"}}, + CustomEvents: []CustomEvent{{ + Type: 0, + Config: []uint64{1, 2, 3}, + Name: "event_2", + }}, + }, + }, 1, []info.Node{}) assert.Len(t, perfCollector.eventToCustomEvent, 1) assert.Nil(t, perfCollector.eventToCustomEvent[Event("event_1")]) - assert.Same(t, &perfCollector.events.CustomEvents[0], perfCollector.eventToCustomEvent[Event("event_2")]) + assert.Same(t, &perfCollector.events.Core.CustomEvents[0], perfCollector.eventToCustomEvent[Event("event_2")]) } diff --git a/perf/config.go b/perf/config.go index 20de527c..efdf370a 100644 --- a/perf/config.go +++ b/perf/config.go @@ -24,9 +24,16 @@ import ( "k8s.io/klog/v2" ) +type PerfEvents struct { + // Core perf events to be measured. + Core Events `json:"core,omitempty"` + + // Uncore perf events to be measured. + Uncore Events `json:"uncore,omitempty"` +} + type Events struct { - // List of perf events' names to be measured. Any value found in - // output of perf list can be used. + // List of perf events' names to be measured. Events [][]Event `json:"events"` // List of custom perf events' to be measured. It is impossible to @@ -40,7 +47,7 @@ type Event string type CustomEvent struct { // Type of the event. See perf_event_attr documentation // at man perf_event_open. - Type uint32 `json:"type"` + Type uint32 `json:"type,omitempty"` // Symbolically formed event like: // pmu/config=PerfEvent.Config[0],config1=PerfEvent.Config[1],config2=PerfEvent.Config[2] @@ -73,11 +80,11 @@ func (c *Config) UnmarshalJSON(b []byte) error { return nil } -func parseConfig(file *os.File) (events Events, err error) { +func parseConfig(file *os.File) (events PerfEvents, err error) { decoder := json.NewDecoder(file) err = decoder.Decode(&events) if err != nil { - err = fmt.Errorf("unable to load perf events cofiguration from %q: %q", file.Name(), err) + err = fmt.Errorf("unable to load perf events configuration from %q: %q", file.Name(), err) return } return diff --git a/perf/config_test.go b/perf/config_test.go index f2284089..d5ac482c 100644 --- a/perf/config_test.go +++ b/perf/config_test.go @@ -15,9 +15,10 @@ package perf import ( - "github.com/stretchr/testify/assert" "os" "testing" + + "github.com/stretchr/testify/assert" ) func TestConfigParsing(t *testing.T) { @@ -28,14 +29,25 @@ func TestConfigParsing(t *testing.T) { events, err := parseConfig(file) assert.Nil(t, err) - assert.Len(t, events.Events, 2) - assert.Len(t, events.Events[0], 1) - assert.Equal(t, Event("instructions"), events.Events[0][0]) - assert.Len(t, events.Events[1], 1) - assert.Equal(t, Event("instructions_retired"), events.Events[1][0]) + assert.Len(t, events.Core.Events, 2) + assert.Len(t, events.Core.Events[0], 1) + assert.Equal(t, Event("instructions"), events.Core.Events[0][0]) + assert.Len(t, events.Core.Events[1], 1) + assert.Equal(t, Event("instructions_retired"), events.Core.Events[1][0]) + + assert.Len(t, events.Core.CustomEvents, 1) + assert.Equal(t, Config{0x5300c0}, events.Core.CustomEvents[0].Config) + assert.Equal(t, uint32(0x04), events.Core.CustomEvents[0].Type) + assert.Equal(t, Event("instructions_retired"), events.Core.CustomEvents[0].Name) + + assert.Len(t, events.Uncore.Events, 3) + assert.Equal(t, Event("cas_count_write"), events.Uncore.Events[0][0]) + assert.Equal(t, Event("uncore_imc_0/UNC_M_CAS_COUNT:RD"), events.Uncore.Events[1][0]) + assert.Equal(t, Event("uncore_ubox/UNC_U_EVENT_MSG"), events.Uncore.Events[2][0]) + + assert.Len(t, events.Uncore.CustomEvents, 1) + assert.Equal(t, Config{0x5300}, events.Uncore.CustomEvents[0].Config) + assert.Equal(t, uint32(0x12), events.Uncore.CustomEvents[0].Type) + assert.Equal(t, Event("cas_count_write"), events.Uncore.CustomEvents[0].Name) - assert.Len(t, events.CustomEvents, 1) - assert.Equal(t, Config{5439680}, events.CustomEvents[0].Config) - assert.Equal(t, uint32(4), events.CustomEvents[0].Type) - assert.Equal(t, Event("instructions_retired"), events.CustomEvents[0].Name) } diff --git a/perf/manager_libpfm.go b/perf/manager_libpfm.go index 06471789..1edbaf93 100644 --- a/perf/manager_libpfm.go +++ b/perf/manager_libpfm.go @@ -21,16 +21,18 @@ import ( "fmt" "os" + info "github.com/google/cadvisor/info/v1" "github.com/google/cadvisor/stats" ) type manager struct { - events Events + events PerfEvents numCores int + topology []info.Node stats.NoopDestroy } -func NewManager(configFile string, numCores int) (stats.Manager, error) { +func NewManager(configFile string, numCores int, topology []info.Node) (stats.Manager, error) { if configFile == "" { return &stats.NoopManager{}, nil } @@ -49,11 +51,11 @@ func NewManager(configFile string, numCores int) (stats.Manager, error) { return nil, fmt.Errorf("event grouping is not supported you must modify config file at %s", configFile) } - return &manager{events: config, numCores: numCores}, nil + return &manager{events: config, numCores: numCores, topology: topology}, nil } -func areGroupedEventsUsed(events Events) bool { - for _, group := range events.Events { +func areGroupedEventsUsed(events PerfEvents) bool { + for _, group := range events.Core.Events { if len(group) > 1 { return true } @@ -62,7 +64,7 @@ func areGroupedEventsUsed(events Events) bool { } func (m *manager) GetCollector(cgroupPath string) (stats.Collector, error) { - collector := newCollector(cgroupPath, m.events, m.numCores) + collector := newCollector(cgroupPath, m.events, m.numCores, m.topology) err := collector.setup() if err != nil { collector.Destroy() diff --git a/perf/manager_libpfm_test.go b/perf/manager_libpfm_test.go index d15d208d..eb341b84 100644 --- a/perf/manager_libpfm_test.go +++ b/perf/manager_libpfm_test.go @@ -18,13 +18,16 @@ package perf import ( - "github.com/google/cadvisor/stats" - "github.com/stretchr/testify/assert" "testing" + + info "github.com/google/cadvisor/info/v1" + "github.com/google/cadvisor/stats" + + "github.com/stretchr/testify/assert" ) func TestNoConfigFilePassed(t *testing.T) { - manager, err := NewManager("", 1) + manager, err := NewManager("", 1, []info.Node{}) assert.Nil(t, err) _, ok := manager.(*stats.NoopManager) @@ -32,28 +35,28 @@ func TestNoConfigFilePassed(t *testing.T) { } func TestNonExistentFile(t *testing.T) { - manager, err := NewManager("this-file-is-so-non-existent", 1) + manager, err := NewManager("this-file-is-so-non-existent", 1, []info.Node{}) assert.NotNil(t, err) assert.Nil(t, manager) } func TestMalformedJsonFile(t *testing.T) { - manager, err := NewManager("testing/this-is-some-random.json", 1) + manager, err := NewManager("testing/this-is-some-random.json", 1, []info.Node{}) assert.NotNil(t, err) assert.Nil(t, manager) } func TestGroupedEvents(t *testing.T) { - manager, err := NewManager("testing/grouped.json", 1) + manager, err := NewManager("testing/grouped.json", 1, []info.Node{}) assert.NotNil(t, err) assert.Nil(t, manager) } func TestNewManager(t *testing.T) { - managerInstance, err := NewManager("testing/perf.json", 1) + managerInstance, err := NewManager("testing/perf.json", 1, []info.Node{}) assert.Nil(t, err) _, ok := managerInstance.(*manager) diff --git a/perf/manager_no_libpfm.go b/perf/manager_no_libpfm.go index 2f409a2b..b0d7b9ae 100644 --- a/perf/manager_no_libpfm.go +++ b/perf/manager_no_libpfm.go @@ -18,12 +18,13 @@ package perf import ( + info "github.com/google/cadvisor/info/v1" "github.com/google/cadvisor/stats" "k8s.io/klog/v2" ) -func NewManager(configFile string, numCores int) (stats.Manager, error) { +func NewManager(configFile string, numCores int, topology []info.Node) (stats.Manager, error) { klog.V(1).Info("cAdvisor is build without cgo and/or libpfm support. Perf event counters are not available.") return &stats.NoopManager{}, nil } diff --git a/perf/testing/grouped.json b/perf/testing/grouped.json index bd2a402a..635322e9 100644 --- a/perf/testing/grouped.json +++ b/perf/testing/grouped.json @@ -1,14 +1,16 @@ { - "events": [ - ["instructions", "instructions_retired"] - ], - "custom_events": [ - { - "type": 4, - "config": [ - "0x5300c0" - ], - "name": "instructions_retired" - } - ] + "core": { + "events": [ + ["instructions", "instructions_retired"] + ], + "custom_events": [ + { + "type": 4, + "config": [ + "0x5300c0" + ], + "name": "instructions_retired" + } + ] + } } diff --git a/perf/testing/perf-non-hardware.json b/perf/testing/perf-non-hardware.json index 04d26990..dc410874 100644 --- a/perf/testing/perf-non-hardware.json +++ b/perf/testing/perf-non-hardware.json @@ -1,15 +1,17 @@ { - "events": [ - ["context-switches"], - ["cpu-migrations-custom"] - ], - "custom_events": [ - { - "type": 1, - "config": [ - "0x4" - ], - "name": "cpu-migrations-custom" - } - ] + "core": { + "events": [ + ["context-switches"], + ["cpu-migrations-custom"] + ], + "custom_events": [ + { + "type": 1, + "config": [ + "0x4" + ], + "name": "cpu-migrations-custom" + } + ] + } } diff --git a/perf/testing/perf.json b/perf/testing/perf.json index 916be88b..3207901f 100644 --- a/perf/testing/perf.json +++ b/perf/testing/perf.json @@ -1,15 +1,33 @@ { - "events": [ - ["instructions"], - ["instructions_retired"] - ], - "custom_events": [ - { - "type": 4, - "config": [ - "0x5300c0" - ], - "name": "instructions_retired" - } - ] + "core": { + "events": [ + ["instructions"], + ["instructions_retired"] + ], + "custom_events": [ + { + "type": 4, + "config": [ + "0x5300c0" + ], + "name": "instructions_retired" + } + ] + }, + "uncore": { + "events": [ + ["cas_count_write"], + ["uncore_imc_0/UNC_M_CAS_COUNT:RD"], + ["uncore_ubox/UNC_U_EVENT_MSG"] + ], + "custom_events": [ + { + "type": 18, + "config": [ + "0x5300" + ], + "name": "cas_count_write" + } + ] + } } diff --git a/perf/uncore_libpfm.go b/perf/uncore_libpfm.go new file mode 100644 index 00000000..cb2919fe --- /dev/null +++ b/perf/uncore_libpfm.go @@ -0,0 +1,392 @@ +// +build libpfm,cgo + +// Copyright 2020 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Uncore perf events logic. +package perf + +// #cgo CFLAGS: -I/usr/include +// #cgo LDFLAGS: -lpfm +// #include +// #include +import "C" +import ( + "bytes" + "encoding/binary" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "sync" + "unsafe" + + "golang.org/x/sys/unix" + "k8s.io/klog/v2" + + info "github.com/google/cadvisor/info/v1" + "github.com/google/cadvisor/stats" + "github.com/google/cadvisor/utils/sysinfo" +) + +type pmu struct { + name string + typeOf uint32 + cpus []uint32 +} + +const ( + uncorePMUPrefix = "uncore" + pmuTypeFilename = "type" + pmuCpumaskFilename = "cpumask" + systemDevicesPath = "/sys/devices" + rootPerfEventPath = "/sys/fs/cgroup/perf_event" +) + +func getPMU(pmus []pmu, gotType uint32) (*pmu, error) { + for _, pmu := range pmus { + if pmu.typeOf == gotType { + return &pmu, nil + } + } + + return nil, fmt.Errorf("there is no pmu with event type: %#v", gotType) +} + +type uncorePMUs map[string]pmu + +func readUncorePMU(path string, name string, cpumaskRegexp *regexp.Regexp) (*pmu, error) { + buf, err := ioutil.ReadFile(filepath.Join(path, pmuTypeFilename)) + if err != nil { + return nil, err + } + typeString := strings.TrimSpace(string(buf)) + eventType, err := strconv.ParseUint(typeString, 0, 32) + if err != nil { + return nil, err + } + + buf, err = ioutil.ReadFile(filepath.Join(path, pmuCpumaskFilename)) + if err != nil { + return nil, err + } + var cpus []uint32 + cpumask := strings.TrimSpace(string(buf)) + for _, cpu := range cpumaskRegexp.Split(cpumask, -1) { + parsedCPU, err := strconv.ParseUint(cpu, 0, 32) + if err != nil { + return nil, err + } + cpus = append(cpus, uint32(parsedCPU)) + } + + return &pmu{name: name, typeOf: uint32(eventType), cpus: cpus}, nil +} + +func getUncorePMUs(devicesPath string) (uncorePMUs, error) { + pmus := make(uncorePMUs, 0) + + // Depends on platform, cpu mask could be for example in form "0-1" or "0,1". + cpumaskRegexp := regexp.MustCompile("[-,\n]") + err := filepath.Walk(devicesPath, func(path string, info os.FileInfo, err error) error { + // Skip root path. + if path == devicesPath { + return nil + } + if info.IsDir() { + if strings.HasPrefix(info.Name(), uncorePMUPrefix) { + pmu, err := readUncorePMU(path, info.Name(), cpumaskRegexp) + if err != nil { + return err + } + pmus[info.Name()] = *pmu + } + } + return nil + }) + if err != nil { + return nil, err + } + + return pmus, nil +} + +type uncoreCollector struct { + cpuFiles map[string]map[string]map[int]readerCloser + cpuFilesLock sync.Mutex + events [][]Event + eventToCustomEvent map[Event]*CustomEvent + topology []info.Node + + // Handle for mocking purposes. + perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) +} + +func NewUncoreCollector(cgroupPath string, events PerfEvents, topology []info.Node) stats.Collector { + + if cgroupPath != rootPerfEventPath { + // Uncore metric doesn't exists for cgroups, only for entire platform. + return &stats.NoopCollector{} + } + + collector := &uncoreCollector{topology: topology} + + // Default implementation of Linux perf_event_open function. + collector.perfEventOpen = unix.PerfEventOpen + + err := collector.setup(events, systemDevicesPath) + if err != nil { + formatedError := fmt.Errorf("unable to setup uncore perf event collector: %v", err) + klog.V(5).Infof("Perf uncore metrics will not be available: %s", formatedError) + return &stats.NoopCollector{} + } + + return collector +} + +func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error { + var err error + readUncorePMUs, err := getUncorePMUs(devicesPath) + if err != nil { + return err + } + + // Maping from event name, pmu type, cpu. + c.cpuFiles = make(map[string]map[string]map[int]readerCloser) + c.events = events.Uncore.Events + c.eventToCustomEvent = parseUncoreEvents(events.Uncore) + c.cpuFilesLock.Lock() + defer c.cpuFilesLock.Unlock() + + for _, group := range c.events { + if len(group) > 1 { + klog.Warning("grouping uncore perf events is not supported!") + continue + } + + eventName, pmuPrefix := parseEventName(string(group[0])) + + var err error + customEvent, ok := c.eventToCustomEvent[group[0]] + if ok { + if customEvent.Type != 0 { + pmus := obtainPMUs("uncore", readUncorePMUs) + err = c.setupRawNonGroupedUncore(customEvent, pmus) + } else { + pmus := obtainPMUs(pmuPrefix, readUncorePMUs) + err = c.setupRawNonGroupedUncore(customEvent, pmus) + } + } else { + pmus := obtainPMUs(pmuPrefix, readUncorePMUs) + err = c.setupNonGroupedUncore(eventName, pmus) + } + if err != nil { + return err + } + } + + return nil +} + +func parseEventName(eventName string) (string, string) { + // First "/" separate pmu prefix and event name + // ex. "uncore_imc_0/cas_count_read" -> uncore_imc_0 and cas_count_read. + splittedEvent := strings.SplitN(eventName, "/", 2) + var pmuPrefix = "" + if len(splittedEvent) == 2 { + pmuPrefix = splittedEvent[0] + eventName = splittedEvent[1] + } + return eventName, pmuPrefix +} + +func obtainPMUs(want string, gotPMUs uncorePMUs) []pmu { + var pmus []pmu + if want == "" { + return pmus + } + for _, pmu := range gotPMUs { + if strings.HasPrefix(pmu.name, want) { + pmus = append(pmus, pmu) + } + } + + return pmus +} + +func parseUncoreEvents(events Events) map[Event]*CustomEvent { + eventToCustomEvent := map[Event]*CustomEvent{} + for _, uncoreEvent := range events.Events { + for _, customEvent := range events.CustomEvents { + if uncoreEvent[0] == customEvent.Name { + eventToCustomEvent[customEvent.Name] = &customEvent + break + } + } + } + + return eventToCustomEvent +} + +func (c *uncoreCollector) Destroy() { + c.cpuFilesLock.Lock() + defer c.cpuFilesLock.Unlock() + + for name, pmus := range c.cpuFiles { + for pmu, cpus := range pmus { + for cpu, file := range cpus { + klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + err := file.Close() + if err != nil { + klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu) + } + } + delete(pmus, pmu) + } + delete(c.cpuFiles, name) + } +} + +func (c *uncoreCollector) UpdateStats(stats *info.ContainerStats) error { + klog.V(5).Info("Attempting to update uncore perf_event stats") + + for name, pmus := range c.cpuFiles { + for pmu, cpus := range pmus { + for cpu, file := range cpus { + stat, err := readPerfUncoreStat(file, name, cpu, pmu, c.topology) + if err != nil { + return fmt.Errorf("unable to read from uncore perf_event_file (event: %q, CPU: %d, PMU: %s): %q", name, cpu, pmu, err.Error()) + } + klog.V(5).Infof("Read uncore perf event (event: %q, CPU: %d, PMU: %s): %d", name, cpu, pmu, stat.Value) + + stats.PerfUncoreStats = append(stats.PerfUncoreStats, *stat) + } + } + } + + return nil +} + +func (c *uncoreCollector) setupRawNonGroupedUncore(event *CustomEvent, pmus []pmu) error { + klog.V(5).Infof("Setting up non-grouped raw perf uncore event %#v", event) + + if event.Type == 0 { + // PMU isn't set. Register event for all PMUs. + for _, pmu := range pmus { + newEvent := CustomEvent{ + Type: pmu.typeOf, + Config: event.Config, + Name: event.Name, + } + config := createPerfEventAttr(newEvent) + err := c.registerUncoreEvent(config, string(newEvent.Name), pmu.cpus, pmu.name) + if err != nil { + return err + } + } + return nil + } else { + // Register event for the PMU. + config := createPerfEventAttr(*event) + pmu, err := getPMU(pmus, event.Type) + if err != nil { + return err + } + return c.registerUncoreEvent(config, string(event.Name), pmu.cpus, pmu.name) + } +} + +func (c *uncoreCollector) setupNonGroupedUncore(name string, pmus []pmu) error { + perfEventAttr, err := getPerfEventAttr(name) + if err != nil { + return err + } + defer C.free(unsafe.Pointer(perfEventAttr)) + + klog.V(5).Infof("Setting up non-grouped uncore perf event %s", name) + + // Register event for all memory controllers. + for _, pmu := range pmus { + perfEventAttr.Type = pmu.typeOf + err = c.registerUncoreEvent(perfEventAttr, name, pmu.cpus, pmu.name) + if err != nil { + return err + } + } + return nil +} + +func (c *uncoreCollector) registerUncoreEvent(config *unix.PerfEventAttr, name string, cpus []uint32, pmu string) error { + for _, cpu := range cpus { + groupFd, pid, flags := -1, -1, 0 + fd, err := c.perfEventOpen(config, pid, int(cpu), groupFd, flags) + if err != nil { + return fmt.Errorf("setting up perf event %#v failed: %q", config, err) + } + perfFile := os.NewFile(uintptr(fd), name) + if perfFile == nil { + return fmt.Errorf("unable to create os.File from file descriptor %#v", fd) + } + + c.addEventFile(name, pmu, int(cpu), perfFile) + } + + return nil +} + +func (c *uncoreCollector) addEventFile(name string, pmu string, cpu int, perfFile *os.File) { + _, ok := c.cpuFiles[name] + if !ok { + c.cpuFiles[name] = map[string]map[int]readerCloser{} + } + + _, ok = c.cpuFiles[name][pmu] + if !ok { + c.cpuFiles[name][pmu] = map[int]readerCloser{} + } + + c.cpuFiles[name][pmu][cpu] = perfFile +} + +func readPerfUncoreStat(file readerCloser, name string, cpu int, pmu string, topology []info.Node) (*info.PerfUncoreStat, error) { + buf := make([]byte, 32) + _, err := file.Read(buf) + if err != nil { + return nil, err + } + perfData := &ReadFormat{} + reader := bytes.NewReader(buf) + err = binary.Read(reader, binary.LittleEndian, perfData) + if err != nil { + return nil, err + } + + scalingRatio := 1.0 + if perfData.TimeEnabled != 0 { + scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled) + } + + stat := info.PerfUncoreStat{ + Value: uint64(float64(perfData.Value) / scalingRatio), + Name: name, + ScalingRatio: scalingRatio, + Socket: sysinfo.GetSocketFromCPU(topology, cpu), + PMU: pmu, + } + + return &stat, nil +} diff --git a/perf/uncore_libpfm_test.go b/perf/uncore_libpfm_test.go new file mode 100644 index 00000000..7592a027 --- /dev/null +++ b/perf/uncore_libpfm_test.go @@ -0,0 +1,201 @@ +// +build libpfm,cgo + +// Copyright 2020 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Uncore perf events logic tests. +package perf + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" + + "golang.org/x/sys/unix" + + "github.com/stretchr/testify/assert" +) + +func mockSystemDevices() (string, error) { + testDir, err := ioutil.TempDir("", "uncore_imc_test") + if err != nil { + return "", err + } + + // First Uncore IMC PMU. + firstPMUPath := filepath.Join(testDir, "uncore_imc_0") + err = os.MkdirAll(firstPMUPath, os.ModePerm) + if err != nil { + return "", err + } + err = ioutil.WriteFile(filepath.Join(firstPMUPath, "cpumask"), []byte("0-1"), 777) + if err != nil { + return "", err + } + err = ioutil.WriteFile(filepath.Join(firstPMUPath, "type"), []byte("18"), 777) + if err != nil { + return "", err + } + + // Second Uncore IMC PMU. + secondPMUPath := filepath.Join(testDir, "uncore_imc_1") + err = os.MkdirAll(secondPMUPath, os.ModePerm) + if err != nil { + return "", err + } + err = ioutil.WriteFile(filepath.Join(secondPMUPath, "cpumask"), []byte("0,1"), 777) + if err != nil { + return "", err + } + err = ioutil.WriteFile(filepath.Join(secondPMUPath, "type"), []byte("19"), 777) + if err != nil { + return "", err + } + + return testDir, nil +} + +func TestUncore(t *testing.T) { + path, err := mockSystemDevices() + assert.Nil(t, err) + defer func() { + err := os.RemoveAll(path) + assert.Nil(t, err) + }() + + actual, err := getUncorePMUs(path) + assert.Nil(t, err) + expected := uncorePMUs{ + "uncore_imc_0": {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}}, + "uncore_imc_1": {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}}, + } + assert.Equal(t, expected, actual) + + pmuSet := []pmu{ + actual["uncore_imc_0"], + actual["uncore_imc_1"], + } + actualPMU, err := getPMU(pmuSet, expected["uncore_imc_0"].typeOf) + assert.Nil(t, err) + assert.Equal(t, expected["uncore_imc_0"], *actualPMU) +} + +func TestUncoreCollectorSetup(t *testing.T) { + path, err := mockSystemDevices() + assert.Nil(t, err) + defer func() { + err := os.RemoveAll(path) + assert.Nil(t, err) + }() + + events := PerfEvents{ + Core: Events{ + Events: [][]Event{ + {"cache-misses"}, + }, + }, + Uncore: Events{ + Events: [][]Event{ + {"uncore_imc_0/cas_count_read"}, + {"uncore_imc/cas_count_write"}, + }, + CustomEvents: []CustomEvent{ + {18, Config{0x01, 0x02}, "uncore_imc_0/cas_count_read"}, + {0, Config{0x01, 0x03}, "uncore_imc/cas_count_write"}, + }, + }, + } + + collector := &uncoreCollector{} + collector.perfEventOpen = func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { + return 0, nil + } + + err = collector.setup(events, path) + // There are no errors. + assert.Nil(t, err) + + // For "cas_count_write", collector has two registered PMUs, + // `uncore_imc_0 (of 18 type) and `uncore_imc_1` (of 19 type). + // Both of them has two cpus which corresponds to sockets. + assert.Equal(t, len(collector.cpuFiles["uncore_imc/cas_count_write"]["uncore_imc_0"]), 2) + assert.Equal(t, len(collector.cpuFiles["uncore_imc/cas_count_write"]["uncore_imc_1"]), 2) + + // For "cas_count_read", has only one registered PMU and it's `uncore_imc_0` (of 18 type) with two cpus which + // correspond to two sockets. + assert.Equal(t, len(collector.cpuFiles["uncore_imc_0/cas_count_read"]), 1) + assert.Equal(t, len(collector.cpuFiles["uncore_imc_0/cas_count_read"]["uncore_imc_0"]), 2) + + // For "cache-misses" it shouldn't register any PMU. + assert.Nil(t, collector.cpuFiles["cache-misses"]) +} + +func TestParseUncoreEvents(t *testing.T) { + events := PerfEvents{ + Uncore: Events{ + Events: [][]Event{ + {"cas_count_read"}, + {"cas_count_write"}, + }, + CustomEvents: []CustomEvent{ + { + Type: 17, + Config: Config{0x50, 0x60}, + Name: "cas_count_read", + }, + }, + }, + } + eventToCustomEvent := parseUncoreEvents(events.Uncore) + assert.Len(t, eventToCustomEvent, 1) + assert.Equal(t, eventToCustomEvent["cas_count_read"].Name, Event("cas_count_read")) + assert.Equal(t, eventToCustomEvent["cas_count_read"].Type, uint32(17)) + assert.Equal(t, eventToCustomEvent["cas_count_read"].Config, Config{0x50, 0x60}) +} + +func TestObtainPMUs(t *testing.T) { + got := uncorePMUs{ + "uncore_imc_0": {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}}, + "uncore_imc_1": {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}}, + } + + expected := []pmu{ + {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}}, + {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}}, + } + + actual := obtainPMUs("uncore_imc_0", got) + assert.Equal(t, []pmu{expected[0]}, actual) + + actual = obtainPMUs("uncore_imc_1", got) + assert.Equal(t, []pmu{expected[1]}, actual) + + actual = obtainPMUs("", got) + assert.Equal(t, []pmu(nil), actual) +} + +func TestUncoreParseEventName(t *testing.T) { + eventName, pmuPrefix := parseEventName("some_event") + assert.Equal(t, "some_event", eventName) + assert.Empty(t, pmuPrefix) + + eventName, pmuPrefix = parseEventName("some_pmu/some_event") + assert.Equal(t, "some_pmu", pmuPrefix) + assert.Equal(t, "some_event", eventName) + + eventName, pmuPrefix = parseEventName("some_pmu/some_event/first_slash/second_slash") + assert.Equal(t, "some_pmu", pmuPrefix) + assert.Equal(t, "some_event/first_slash/second_slash", eventName) +} diff --git a/utils/sysinfo/sysinfo.go b/utils/sysinfo/sysinfo.go index d1153417..9ef62b85 100644 --- a/utils/sysinfo/sysinfo.go +++ b/utils/sysinfo/sysinfo.go @@ -512,3 +512,14 @@ func getMatchedInt(rgx *regexp.Regexp, str string) (int, error) { } return valInt, nil } + +// GetSocketFromCPU returns Socket ID of passed CPU. If is not present, returns -1. +func GetSocketFromCPU(topology []info.Node, cpu int) int { + for _, node := range topology { + found, coreID := node.FindCoreByThread(cpu) + if found { + return node.Cores[coreID].SocketID + } + } + return -1 +} diff --git a/utils/sysinfo/sysinfo_test.go b/utils/sysinfo/sysinfo_test.go index 532afab5..25d18bc7 100644 --- a/utils/sysinfo/sysinfo_test.go +++ b/utils/sysinfo/sysinfo_test.go @@ -1235,3 +1235,54 @@ func TestGetNetworkStats(t *testing.T) { t.Errorf("expected to get stats %+v, got %+v", expectedStats, netStats) } } + +func TestGetSocketFromCPU(t *testing.T) { + topology := []info.Node{ + { + Id: 0, + Memory: 0, + HugePages: nil, + Cores: []info.Core{ + { + Id: 0, + Threads: []int{0, 1}, + Caches: nil, + SocketID: 0, + }, + { + Id: 1, + Threads: []int{2, 3}, + Caches: nil, + SocketID: 0, + }, + }, + Caches: nil, + }, + { + Id: 1, + Memory: 0, + HugePages: nil, + Cores: []info.Core{ + { + Id: 0, + Threads: []int{4, 5}, + Caches: nil, + SocketID: 1, + }, + { + Id: 1, + Threads: []int{6, 7}, + Caches: nil, + SocketID: 1, + }, + }, + Caches: nil, + }, + } + socket := GetSocketFromCPU(topology, 6) + assert.Equal(t, socket, 1) + + // Check if return "-1" when there is no data about passed CPU. + socket = GetSocketFromCPU(topology, 8) + assert.Equal(t, socket, -1) +}