Add perf uncore events

Signed-off-by: Paweł Szulik <pawel.szulik@intel.com>
This commit is contained in:
Paweł Szulik 2020-05-18 17:22:41 +02:00
parent 0ac6b77bee
commit 5641a0feae
26 changed files with 1153 additions and 165 deletions

View File

@ -158,6 +158,106 @@ automatically.
* `grouping` - in scenario when accounted for events are used to calculate derivative metrics, it is reasonable to
measure them in transactional manner: all the events in a group must be accounted for in the same period of time. Keep
in mind that it is impossible to group more events that there are counters available.
* `uncore events` - events which can be counted by PMUs outside core.
* `PMU` - Performance Monitoring Unit
#### Getting config values
Using perf tools:
* Identify the event in `perf list` output.
* Execute command: `perf stat -I 5000 -vvv -e EVENT_NAME`
* Find `perf_event_attr` section on `perf stat` output, copy config and type field to configuration file.
```
------------------------------------------------------------
perf_event_attr:
type 18
size 112
config 0x304
sample_type IDENTIFIER
read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING
disabled 1
inherit 1
exclude_guest 1
------------------------------------------------------------
```
* Configuration file should look like:
```json
{
"core": {
"events": [
["EVENT_NAME"]
],
"custom_events": [
{
"type": 18,
"config": [
"0x304"
],
"name": "EVENT_NAME"
}
]
},
"uncore": {
"events": [
["EVENT_NAME"]
],
"custom_events": [
{
"type": 18,
"config": [
"0x304"
],
"name": "EVENT_NAME"
}
]
}
}
```
Config values can be also obtain from:
* [Intel® 64 and IA32 Architectures Performance Monitoring Events](https://software.intel.com/content/www/us/en/develop/download/intel-64-and-ia32-architectures-performance-monitoring-events.html)
##### Uncore Events configuration
Uncore Event name should be in form `PMU_PREFIX/event_name` where **PMU_PREFIX** mean
that statistics would be counted on all PMUs with that prefix in name.
Let's explain this by example:
```json
{
"uncore": {
"events": [
["uncore_imc/cas_count_read"],
["uncore_imc_0/cas_count_write"],
["cas_count_all"]
],
"custom_events": [
{
"config": [
"0x304"
],
"name": "uncore_imc_0/cas_count_write"
},
{
"type": 19,
"config": [
"0x304"
],
"name": "cas_count_all"
}
]
}
}
```
- `uncore_imc/cas_count_read` - because of `uncore_imc` type and no entry in custom events,
it would be counted by **all** Integrated Memory Controller PMUs with config provided from libpfm package.
(using this function: https://man7.org/linux/man-pages/man3/pfm_get_os_event_encoding.3.html)
- `uncore_imc_0/cas_count_write` - because of `uncore_imc_0` type and entry in custom events it would be counted by `uncore_imc_0` PMU with provided config.
- `uncore_imc_1/cas_count_all` - because of entry in custom events with type field, event would be counted by PMU with **19** type and provided config.
### Further reading
@ -165,16 +265,17 @@ in mind that it is impossible to group more events that there are counters avail
* [Kernel Perf Wiki](https://perf.wiki.kernel.org/index.php/Main_Page)
* `man perf_event_open`
* [perf subsystem](https://github.com/torvalds/linux/tree/v5.6/kernel/events) in Linux kernel
* [Uncore Performance Monitoring Reference Manuals](https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html#uncore)
See example configuration below:
```json
{
"core": {
"events": [
["instructions"],
["instructions_retired"]
],
"custom_events": [
[
{
"type": 4,
"config": [
@ -183,8 +284,21 @@ See example configuration below:
"name": "instructions_retired"
}
]
},
"uncore": {
"events": [
["uncore_imc/cas_count_read"]
],
"custom_events": [
{
"config": [
"0xc04"
],
"name": "uncore_imc/cas_count_read"
}
]
}
}
```
In the example above:
@ -194,6 +308,9 @@ interface that majority of users will rely on.
* `instructions_retired` will be measured as non-grouped event and is specified using an advanced API that allows
to specify any perf event available (some of them are not named and can't be specified with plain string). Event name
should be a human readable string that will become a metric name.
* `cas_count_read` will be measured as uncore non-grouped event on all Integrated Memory Controllers Performance Monitoring Units because of unset `type` field and
`uncore_imc` prefix.
## Storage driver specific instructions:

View File

@ -874,6 +874,32 @@ type ResctrlStats struct {
Cache []CacheStats `json:"cache,omitempty"`
}
// PerfUncoreStat represents value of a single monitored perf uncore event.
type PerfUncoreStat struct {
// Indicates scaling ratio for an event: time_running/time_enabled
// (amount of time that event was being measured divided by
// amount of time that event was enabled for).
// value 1.0 indicates that no multiplexing occurred. Value close
// to 0 indicates that event was measured for short time and event's
// value might be inaccurate.
// See: https://lwn.net/Articles/324756/
ScalingRatio float64 `json:"scaling_ratio"`
// Value represents value of perf event retrieved from OS. It is
// normalized against ScalingRatio and takes multiplexing into
// consideration.
Value uint64 `json:"value"`
// Name is human readable name of an event.
Name string `json:"name"`
// Socket that perf event was measured on.
Socket int `json:"socket"`
// PMU is Performance Monitoring Unit which collected these stats.
PMU string `json:"pmu"`
}
type UlimitSpec struct {
Name string `json:"name"`
SoftLimit int64 `json:"soft_limit"`
@ -926,6 +952,10 @@ type ContainerStats struct {
// Statistics originating from perf events
PerfStats []PerfStat `json:"perf_stats,omitempty"`
// Statistics originating from perf uncore events.
// Applies only for root container.
PerfUncoreStats []PerfUncoreStat `json:"perf_uncore_stats,omitempty"`
// Referenced memory
ReferencedMemory uint64 `json:"referenced_memory,omitempty"`

View File

@ -71,6 +71,19 @@ func (n *Node) FindCore(id int) (bool, int) {
return false, -1
}
// FindCoreByThread returns bool if found Core with same thread as provided and it's index in Node Core array.
// If it's not found, returns false and -1.
func (n *Node) FindCoreByThread(thread int) (bool, int) {
for i, n := range n.Cores {
for _, t := range n.Threads {
if t == thread {
return true, i
}
}
}
return false, -1
}
func (n *Node) AddThread(thread int, core int) {
var coreIdx int
if core == -1 {

View File

@ -139,6 +139,9 @@ type DeprecatedContainerStats struct {
CustomMetrics map[string][]v1.MetricVal `json:"custom_metrics,omitempty"`
// Perf events counters
PerfStats []v1.PerfStat `json:"perf_stats,omitempty"`
// Statistics originating from perf uncore events.
// Applies only for root container.
PerfUncoreStats []v1.PerfUncoreStat `json:"perf_uncore_stats,omitempty"`
// Referenced memory
ReferencedMemory uint64 `json:"referenced_memory,omitempty"`
// Resource Control (resctrl) statistics
@ -173,6 +176,9 @@ type ContainerStats struct {
CustomMetrics map[string][]v1.MetricVal `json:"custom_metrics,omitempty"`
// Perf events counters
PerfStats []v1.PerfStat `json:"perf_stats,omitempty"`
// Statistics originating from perf uncore events.
// Applies only for root container.
PerfUncoreStats []v1.PerfUncoreStat `json:"perf_uncore_stats,omitempty"`
// Referenced memory
ReferencedMemory uint64 `json:"referenced_memory,omitempty"`
// Resource Control (resctrl) statistics

View File

@ -155,6 +155,9 @@ func ContainerStatsFromV1(containerName string, spec *v1.ContainerSpec, stats []
if len(val.PerfStats) > 0 {
stat.PerfStats = val.PerfStats
}
if len(val.PerfUncoreStats) > 0 {
stat.PerfUncoreStats = val.PerfUncoreStats
}
if len(val.Resctrl.MemoryBandwidth) > 0 || len(val.Resctrl.Cache) > 0 {
stat.Resctrl = val.Resctrl
}
@ -213,6 +216,9 @@ func DeprecatedStatsFromV1(cont *v1.ContainerInfo) []DeprecatedContainerStats {
if len(val.PerfStats) > 0 {
stat.PerfStats = val.PerfStats
}
if len(val.PerfUncoreStats) > 0 {
stat.PerfUncoreStats = val.PerfUncoreStats
}
if len(val.Resctrl.MemoryBandwidth) > 0 || len(val.Resctrl.Cache) > 0 {
stat.Resctrl = val.Resctrl
}

View File

@ -208,6 +208,22 @@ func TestContainerStatsFromV1(t *testing.T) {
Name: "cycles",
},
},
PerfUncoreStats: []v1.PerfUncoreStat{
{
ScalingRatio: 1.0,
Value: 123456,
Name: "uncore_imc_0/cas_count_write",
Socket: 0,
PMU: "17",
},
{
ScalingRatio: 1.0,
Value: 654321,
Name: "uncore_imc_0/cas_count_write",
Socket: 1,
PMU: "17",
},
},
ReferencedMemory: uint64(1234),
Resctrl: v1.ResctrlStats{
MemoryBandwidth: []v1.MemoryBandwidthStats{
@ -247,6 +263,7 @@ func TestContainerStatsFromV1(t *testing.T) {
},
Accelerators: v1Stats.Accelerators,
PerfStats: v1Stats.PerfStats,
PerfUncoreStats: v1Stats.PerfUncoreStats,
ReferencedMemory: v1Stats.ReferencedMemory,
Resctrl: v1Stats.Resctrl,
}

View File

@ -22,6 +22,8 @@ import (
"strings"
"time"
"golang.org/x/sys/unix"
"github.com/google/cadvisor/fs"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/nvm"
@ -30,8 +32,6 @@ import (
"github.com/google/cadvisor/utils/sysinfo"
"k8s.io/klog/v2"
"golang.org/x/sys/unix"
)
const hugepagesDirectory = "/sys/kernel/mm/hugepages/"

View File

@ -212,7 +212,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig
newManager.machineInfo = *machineInfo
klog.V(1).Infof("Machine: %+v", newManager.machineInfo)
newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.NumCores)
newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.NumCores, machineInfo.Topology)
if err != nil {
return nil, err
}

View File

@ -1545,11 +1545,11 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
},
}...)
}
if c.includedMetrics.Has(container.PerfMetrics) {
if includedMetrics.Has(container.PerfMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_perf_metric",
help: "Perf event metric",
name: "container_perf_events_total",
help: "Perf event metric.",
valueType: prometheus.CounterValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
@ -1565,8 +1565,8 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
},
},
{
name: "container_perf_metric_scaling_ratio",
help: "Perf event metric scaling ratio",
name: "container_perf_events_scaling_ratio",
help: "Perf event metric scaling ratio.",
valueType: prometheus.GaugeValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
@ -1581,6 +1581,40 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
return values
},
},
{
name: "container_perf_uncore_events_total",
help: "Perf uncore event metric.",
valueType: prometheus.CounterValue,
extraLabels: []string{"socket", "event", "pmu"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfUncoreStats))
for _, metric := range s.PerfUncoreStats {
values = append(values, metricValue{
value: float64(metric.Value),
labels: []string{strconv.Itoa(metric.Socket), metric.Name, metric.PMU},
timestamp: s.Timestamp,
})
}
return values
},
},
{
name: "container_perf_uncore_events_scaling_ratio",
help: "Perf uncore event metric scaling ratio.",
valueType: prometheus.GaugeValue,
extraLabels: []string{"socket", "event", "pmu"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfUncoreStats))
for _, metric := range s.PerfUncoreStats {
values = append(values, metricValue{
value: metric.ScalingRatio,
labels: []string{strconv.Itoa(metric.Socket), metric.Name, metric.PMU},
timestamp: s.Timestamp,
})
}
return values
},
},
}...)
}
if includedMetrics.Has(container.ReferencedMemoryMetrics) {

View File

@ -648,6 +648,22 @@ func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.Container
Cpu: 1,
},
},
PerfUncoreStats: []info.PerfUncoreStat{
{
ScalingRatio: 1.0,
Value: 1231231512.0,
Name: "cas_count_read",
Socket: 0,
PMU: "uncore_imc_0",
},
{
ScalingRatio: 1.0,
Value: 1111231331.0,
Name: "cas_count_read",
Socket: 1,
PMU: "uncore_imc_0",
},
},
ReferencedMemory: 1234,
Resctrl: info.ResctrlStats{
MemoryBandwidth: []info.MemoryBandwidthStats{

View File

@ -77,14 +77,16 @@ func TestPrometheusCollector_scrapeFailure(t *testing.T) {
func TestNewPrometheusCollectorWithPerf(t *testing.T) {
c := NewPrometheusCollector(mockInfoProvider{}, mockLabelFunc, container.MetricSet{container.PerfMetrics: struct{}{}}, now)
assert.Len(t, c.containerMetrics, 3)
assert.Len(t, c.containerMetrics, 5)
names := []string{}
for _, m := range c.containerMetrics {
names = append(names, m.name)
}
assert.Contains(t, names, "container_last_seen")
assert.Contains(t, names, "container_perf_metric")
assert.Contains(t, names, "container_perf_metric_scaling_ratio")
assert.Contains(t, names, "container_perf_events_total")
assert.Contains(t, names, "container_perf_events_scaling_ratio")
assert.Contains(t, names, "container_perf_uncore_events_total")
assert.Contains(t, names, "container_perf_uncore_events_scaling_ratio")
}
type mockInfoProvider struct{}

View File

@ -327,18 +327,26 @@ container_network_udp_usage_total{container_env_foo_env="prod",container_label_f
container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="listen",zone_name="hello"} 0 1395066363000
container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="rxqueued",zone_name="hello"} 0 1395066363000
container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="txqueued",zone_name="hello"} 0 1395066363000
# HELP container_perf_metric Perf event metric
# TYPE container_perf_metric counter
container_perf_metric{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 123 1395066363000
container_perf_metric{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 321 1395066363000
container_perf_metric{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 456 1395066363000
container_perf_metric{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 789 1395066363000
# HELP container_perf_metric_scaling_ratio Perf event metric scaling ratio
# TYPE container_perf_metric_scaling_ratio gauge
container_perf_metric_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000
container_perf_metric_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.66666666666 1395066363000
container_perf_metric_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 1395066363000
container_perf_metric_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.33333333333 1395066363000
# HELP container_perf_events_total Perf event metric.
# TYPE container_perf_events_total counter
container_perf_events_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 123 1395066363000
container_perf_events_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 321 1395066363000
container_perf_events_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 456 1395066363000
container_perf_events_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 789 1395066363000
# HELP container_perf_events_scaling_ratio Perf event metric scaling ratio.
# TYPE container_perf_events_scaling_ratio gauge
container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000
container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.66666666666 1395066363000
container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 1395066363000
container_perf_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",event="instructions_retired",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.33333333333 1395066363000
# HELP container_perf_uncore_events_total Perf uncore event metric.
# TYPE container_perf_uncore_events_total counter
container_perf_uncore_events_total{container_env_foo_env="prod",container_label_foo_label="bar",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="0",zone_name="hello"} 1.231231512e+09 1395066363000
container_perf_uncore_events_total{container_env_foo_env="prod",container_label_foo_label="bar",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="1",zone_name="hello"} 1.111231331e+09 1395066363000
# HELP container_perf_uncore_events_scaling_ratio Perf uncore event metric scaling ratio.
# TYPE container_perf_uncore_events_scaling_ratio gauge
container_perf_uncore_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="0",zone_name="hello"} 1 1395066363000
container_perf_uncore_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="1",zone_name="hello"} 1 1395066363000
# HELP container_processes Number of processes running inside the container.
# TYPE container_processes gauge
container_processes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000

View File

@ -31,18 +31,21 @@ import (
"sync"
"unsafe"
info "github.com/google/cadvisor/info/v1"
"golang.org/x/sys/unix"
"k8s.io/klog/v2"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
)
type collector struct {
cgroupPath string
events Events
events PerfEvents
cpuFiles map[string]map[int]readerCloser
cpuFilesLock sync.Mutex
numCores int
eventToCustomEvent map[Event]*CustomEvent
uncore stats.Collector
}
var (
@ -61,48 +64,66 @@ func init() {
isLibpfmInitialized = true
}
func newCollector(cgroupPath string, events Events, numCores int) *collector {
collector := &collector{cgroupPath: cgroupPath, events: events, cpuFiles: map[string]map[int]readerCloser{}, numCores: numCores}
func newCollector(cgroupPath string, events PerfEvents, numCores int, topology []info.Node) *collector {
collector := &collector{cgroupPath: cgroupPath, events: events, cpuFiles: map[string]map[int]readerCloser{}, numCores: numCores, uncore: NewUncoreCollector(cgroupPath, events, topology)}
mapEventsToCustomEvents(collector)
return collector
}
func (c *collector) UpdateStats(stats *info.ContainerStats) error {
err := c.uncore.UpdateStats(stats)
if err != nil {
klog.Errorf("Failed to get uncore perf event stats: %v", err)
}
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
stats.PerfStats = []info.PerfStat{}
klog.V(5).Infof("Attempting to update perf_event stats from cgroup %q", c.cgroupPath)
for name, files := range c.cpuFiles {
for cpu, file := range files {
for name, cpus := range c.cpuFiles {
for cpu, file := range cpus {
stat, err := readPerfStat(file, name, cpu)
if err != nil {
klog.Warningf("Unable to read from perf_event_file (event: %q, CPU: %d) for %q: %q", name, cpu, c.cgroupPath, err.Error())
continue
}
klog.V(5).Infof("Read perf event (event: %q, CPU: %d) for %q: %d", name, cpu, c.cgroupPath, stat.Value)
stats.PerfStats = append(stats.PerfStats, *stat)
}
}
return nil
}
func readPerfStat(file readerCloser, name string, cpu int) (*info.PerfStat, error) {
buf := make([]byte, 32)
_, err := file.Read(buf)
if err != nil {
klog.Warningf("Unable to read from perf_event file (event: %q, CPU: %d) for %q", name, cpu, c.cgroupPath)
continue
return nil, err
}
perfData := &ReadFormat{}
reader := bytes.NewReader(buf)
err = binary.Read(reader, binary.LittleEndian, perfData)
if err != nil {
klog.Warningf("Unable to decode from binary format read from perf_event file (event: %q, CPU: %d) for %q", name, cpu, c.cgroupPath)
continue
return nil, err
}
klog.V(5).Infof("Read metric for event %q for cpu %d from cgroup %q: %d", name, cpu, c.cgroupPath, perfData.Value)
scalingRatio := 1.0
if perfData.TimeEnabled != 0 {
scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled)
}
stat := info.PerfStat{
Value: uint64(float64(perfData.Value) / scalingRatio),
Name: name,
ScalingRatio: scalingRatio,
Cpu: cpu,
}
stats.PerfStats = append(stats.PerfStats, stat)
}
}
return nil
return &stat, nil
}
func (c *collector) setup() error {
@ -115,7 +136,7 @@ func (c *collector) setup() error {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
cgroupFd := int(cgroup.Fd())
for _, group := range c.events.Events {
for _, group := range c.events.Core.Events {
customEvent, ok := c.eventToCustomEvent[group[0]]
var err error
if ok {
@ -127,6 +148,7 @@ func (c *collector) setup() error {
return err
}
}
return nil
}
@ -141,10 +163,10 @@ func (c *collector) setupRawNonGrouped(event *CustomEvent, cgroup int) error {
return nil
}
func (c *collector) registerEvent(config *unix.PerfEventAttr, name string, cgroup int) error {
func (c *collector) registerEvent(config *unix.PerfEventAttr, name string, pid int) error {
var cpu int
for cpu = 0; cpu < c.numCores; cpu++ {
pid, groupFd, flags := cgroup, -1, unix.PERF_FLAG_FD_CLOEXEC|unix.PERF_FLAG_PID_CGROUP
groupFd, flags := -1, unix.PERF_FLAG_FD_CLOEXEC|unix.PERF_FLAG_PID_CGROUP
fd, err := unix.PerfEventOpen(config, pid, cpu, groupFd, flags)
if err != nil {
return fmt.Errorf("setting up perf event %#v failed: %q", config, err)
@ -164,35 +186,18 @@ func (c *collector) addEventFile(name string, cpu int, perfFile *os.File) {
if !ok {
c.cpuFiles[name] = map[int]readerCloser{}
}
c.cpuFiles[name][cpu] = perfFile
}
func (c *collector) setupNonGrouped(name string, cgroup int) error {
if !isLibpfmInitialized {
return fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up")
perfEventAttr, err := getPerfEventAttr(name)
if err != nil {
return err
}
defer C.free(unsafe.Pointer(perfEventAttr))
klog.V(5).Infof("Setting up non-grouped perf event %s", name)
perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{})))
defer C.free(perfEventAttrMemory)
event := pfmPerfEncodeArgT{}
perfEventAttr := (*unix.PerfEventAttr)(perfEventAttrMemory)
fstr := C.CString("")
event.fstr = unsafe.Pointer(fstr)
event.attr = perfEventAttrMemory
event.size = C.ulong(unsafe.Sizeof(event))
cSafeName := C.CString(name)
pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event))
if pErr != C.PFM_SUCCESS {
return fmt.Errorf("unable to transform event name %s to perf_event_attr: %d", name, int(pErr))
}
klog.V(5).Infof("perf_event_attr: %#v", perfEventAttr)
setAttributes(perfEventAttr)
return c.registerEvent(perfEventAttr, string(name), cgroup)
return c.registerEvent(perfEventAttr, name, cgroup)
}
func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
@ -214,6 +219,34 @@ func createPerfEventAttr(event CustomEvent) *unix.PerfEventAttr {
return config
}
func getPerfEventAttr(name string) (*unix.PerfEventAttr, error) {
if !isLibpfmInitialized {
return nil, fmt.Errorf("libpfm4 is not initialized, cannot proceed with setting perf events up")
}
perfEventAttrMemory := C.malloc(C.ulong(unsafe.Sizeof(unix.PerfEventAttr{})))
event := pfmPerfEncodeArgT{}
perfEventAttr := (*unix.PerfEventAttr)(perfEventAttrMemory)
fstr := C.CString("")
event.fstr = unsafe.Pointer(fstr)
event.attr = perfEventAttrMemory
event.size = C.ulong(unsafe.Sizeof(event))
cSafeName := C.CString(name)
pErr := C.pfm_get_os_event_encoding(cSafeName, C.PFM_PLM0|C.PFM_PLM3, C.PFM_OS_PERF_EVENT, unsafe.Pointer(&event))
if pErr != C.PFM_SUCCESS {
return nil, fmt.Errorf("unable to transform event name %s to perf_event_attr: %v", name, int(pErr))
}
klog.V(5).Infof("perf_event_attr: %#v", perfEventAttr)
setAttributes(perfEventAttr)
return perfEventAttr, nil
}
func setAttributes(config *unix.PerfEventAttr) {
config.Sample_type = perfSampleIdentifier
config.Read_format = unix.PERF_FORMAT_TOTAL_TIME_ENABLED | unix.PERF_FORMAT_TOTAL_TIME_RUNNING | unix.PERF_FORMAT_ID
@ -222,6 +255,7 @@ func setAttributes(config *unix.PerfEventAttr) {
}
func (c *collector) Destroy() {
c.uncore.Destroy()
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
@ -233,7 +267,6 @@ func (c *collector) Destroy() {
klog.Warningf("Unable to close perf_event file descriptor for cgroup %q, event %q and CPU %d", c.cgroupPath, name, cpu)
}
}
delete(c.cpuFiles, name)
}
}
@ -255,7 +288,7 @@ func Finalize() {
func mapEventsToCustomEvents(collector *collector) {
collector.eventToCustomEvent = map[Event]*CustomEvent{}
for key, event := range collector.events.CustomEvents {
collector.eventToCustomEvent[event.Name] = &collector.events.CustomEvents[key]
for key, event := range collector.events.Core.CustomEvents {
collector.eventToCustomEvent[event.Name] = &collector.events.Core.CustomEvents[key]
}
}

View File

@ -20,10 +20,12 @@ package perf
import (
"bytes"
"encoding/binary"
"github.com/stretchr/testify/assert"
"testing"
"github.com/stretchr/testify/assert"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
)
type buffer struct {
@ -35,7 +37,7 @@ func (b buffer) Close() error {
}
func TestCollector_UpdateStats(t *testing.T) {
collector := collector{}
collector := collector{uncore: &stats.NoopCollector{}}
notScaledBuffer := buffer{bytes.NewBuffer([]byte{})}
scaledBuffer := buffer{bytes.NewBuffer([]byte{})}
err := binary.Write(notScaledBuffer, binary.LittleEndian, ReadFormat{
@ -96,15 +98,17 @@ func TestCreatePerfEventAttr(t *testing.T) {
}
func TestNewCollector(t *testing.T) {
perfCollector := newCollector("cgroup", Events{
perfCollector := newCollector("cgroup", PerfEvents{
Core: Events{
Events: [][]Event{{"event_1"}, {"event_2"}},
CustomEvents: []CustomEvent{{
Type: 0,
Config: []uint64{1, 2, 3},
Name: "event_2",
}},
}, 1)
},
}, 1, []info.Node{})
assert.Len(t, perfCollector.eventToCustomEvent, 1)
assert.Nil(t, perfCollector.eventToCustomEvent[Event("event_1")])
assert.Same(t, &perfCollector.events.CustomEvents[0], perfCollector.eventToCustomEvent[Event("event_2")])
assert.Same(t, &perfCollector.events.Core.CustomEvents[0], perfCollector.eventToCustomEvent[Event("event_2")])
}

View File

@ -24,9 +24,16 @@ import (
"k8s.io/klog/v2"
)
type PerfEvents struct {
// Core perf events to be measured.
Core Events `json:"core,omitempty"`
// Uncore perf events to be measured.
Uncore Events `json:"uncore,omitempty"`
}
type Events struct {
// List of perf events' names to be measured. Any value found in
// output of perf list can be used.
// List of perf events' names to be measured.
Events [][]Event `json:"events"`
// List of custom perf events' to be measured. It is impossible to
@ -40,7 +47,7 @@ type Event string
type CustomEvent struct {
// Type of the event. See perf_event_attr documentation
// at man perf_event_open.
Type uint32 `json:"type"`
Type uint32 `json:"type,omitempty"`
// Symbolically formed event like:
// pmu/config=PerfEvent.Config[0],config1=PerfEvent.Config[1],config2=PerfEvent.Config[2]
@ -73,11 +80,11 @@ func (c *Config) UnmarshalJSON(b []byte) error {
return nil
}
func parseConfig(file *os.File) (events Events, err error) {
func parseConfig(file *os.File) (events PerfEvents, err error) {
decoder := json.NewDecoder(file)
err = decoder.Decode(&events)
if err != nil {
err = fmt.Errorf("unable to load perf events cofiguration from %q: %q", file.Name(), err)
err = fmt.Errorf("unable to load perf events configuration from %q: %q", file.Name(), err)
return
}
return

View File

@ -15,9 +15,10 @@
package perf
import (
"github.com/stretchr/testify/assert"
"os"
"testing"
"github.com/stretchr/testify/assert"
)
func TestConfigParsing(t *testing.T) {
@ -28,14 +29,25 @@ func TestConfigParsing(t *testing.T) {
events, err := parseConfig(file)
assert.Nil(t, err)
assert.Len(t, events.Events, 2)
assert.Len(t, events.Events[0], 1)
assert.Equal(t, Event("instructions"), events.Events[0][0])
assert.Len(t, events.Events[1], 1)
assert.Equal(t, Event("instructions_retired"), events.Events[1][0])
assert.Len(t, events.Core.Events, 2)
assert.Len(t, events.Core.Events[0], 1)
assert.Equal(t, Event("instructions"), events.Core.Events[0][0])
assert.Len(t, events.Core.Events[1], 1)
assert.Equal(t, Event("instructions_retired"), events.Core.Events[1][0])
assert.Len(t, events.Core.CustomEvents, 1)
assert.Equal(t, Config{0x5300c0}, events.Core.CustomEvents[0].Config)
assert.Equal(t, uint32(0x04), events.Core.CustomEvents[0].Type)
assert.Equal(t, Event("instructions_retired"), events.Core.CustomEvents[0].Name)
assert.Len(t, events.Uncore.Events, 3)
assert.Equal(t, Event("cas_count_write"), events.Uncore.Events[0][0])
assert.Equal(t, Event("uncore_imc_0/UNC_M_CAS_COUNT:RD"), events.Uncore.Events[1][0])
assert.Equal(t, Event("uncore_ubox/UNC_U_EVENT_MSG"), events.Uncore.Events[2][0])
assert.Len(t, events.Uncore.CustomEvents, 1)
assert.Equal(t, Config{0x5300}, events.Uncore.CustomEvents[0].Config)
assert.Equal(t, uint32(0x12), events.Uncore.CustomEvents[0].Type)
assert.Equal(t, Event("cas_count_write"), events.Uncore.CustomEvents[0].Name)
assert.Len(t, events.CustomEvents, 1)
assert.Equal(t, Config{5439680}, events.CustomEvents[0].Config)
assert.Equal(t, uint32(4), events.CustomEvents[0].Type)
assert.Equal(t, Event("instructions_retired"), events.CustomEvents[0].Name)
}

View File

@ -21,16 +21,18 @@ import (
"fmt"
"os"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
)
type manager struct {
events Events
events PerfEvents
numCores int
topology []info.Node
stats.NoopDestroy
}
func NewManager(configFile string, numCores int) (stats.Manager, error) {
func NewManager(configFile string, numCores int, topology []info.Node) (stats.Manager, error) {
if configFile == "" {
return &stats.NoopManager{}, nil
}
@ -49,11 +51,11 @@ func NewManager(configFile string, numCores int) (stats.Manager, error) {
return nil, fmt.Errorf("event grouping is not supported you must modify config file at %s", configFile)
}
return &manager{events: config, numCores: numCores}, nil
return &manager{events: config, numCores: numCores, topology: topology}, nil
}
func areGroupedEventsUsed(events Events) bool {
for _, group := range events.Events {
func areGroupedEventsUsed(events PerfEvents) bool {
for _, group := range events.Core.Events {
if len(group) > 1 {
return true
}
@ -62,7 +64,7 @@ func areGroupedEventsUsed(events Events) bool {
}
func (m *manager) GetCollector(cgroupPath string) (stats.Collector, error) {
collector := newCollector(cgroupPath, m.events, m.numCores)
collector := newCollector(cgroupPath, m.events, m.numCores, m.topology)
err := collector.setup()
if err != nil {
collector.Destroy()

View File

@ -18,13 +18,16 @@
package perf
import (
"github.com/google/cadvisor/stats"
"github.com/stretchr/testify/assert"
"testing"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
"github.com/stretchr/testify/assert"
)
func TestNoConfigFilePassed(t *testing.T) {
manager, err := NewManager("", 1)
manager, err := NewManager("", 1, []info.Node{})
assert.Nil(t, err)
_, ok := manager.(*stats.NoopManager)
@ -32,28 +35,28 @@ func TestNoConfigFilePassed(t *testing.T) {
}
func TestNonExistentFile(t *testing.T) {
manager, err := NewManager("this-file-is-so-non-existent", 1)
manager, err := NewManager("this-file-is-so-non-existent", 1, []info.Node{})
assert.NotNil(t, err)
assert.Nil(t, manager)
}
func TestMalformedJsonFile(t *testing.T) {
manager, err := NewManager("testing/this-is-some-random.json", 1)
manager, err := NewManager("testing/this-is-some-random.json", 1, []info.Node{})
assert.NotNil(t, err)
assert.Nil(t, manager)
}
func TestGroupedEvents(t *testing.T) {
manager, err := NewManager("testing/grouped.json", 1)
manager, err := NewManager("testing/grouped.json", 1, []info.Node{})
assert.NotNil(t, err)
assert.Nil(t, manager)
}
func TestNewManager(t *testing.T) {
managerInstance, err := NewManager("testing/perf.json", 1)
managerInstance, err := NewManager("testing/perf.json", 1, []info.Node{})
assert.Nil(t, err)
_, ok := managerInstance.(*manager)

View File

@ -18,12 +18,13 @@
package perf
import (
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
"k8s.io/klog/v2"
)
func NewManager(configFile string, numCores int) (stats.Manager, error) {
func NewManager(configFile string, numCores int, topology []info.Node) (stats.Manager, error) {
klog.V(1).Info("cAdvisor is build without cgo and/or libpfm support. Perf event counters are not available.")
return &stats.NoopManager{}, nil
}

View File

@ -1,4 +1,5 @@
{
"core": {
"events": [
["instructions", "instructions_retired"]
],
@ -12,3 +13,4 @@
}
]
}
}

View File

@ -1,4 +1,5 @@
{
"core": {
"events": [
["context-switches"],
["cpu-migrations-custom"]
@ -13,3 +14,4 @@
}
]
}
}

View File

@ -1,4 +1,5 @@
{
"core": {
"events": [
["instructions"],
["instructions_retired"]
@ -12,4 +13,21 @@
"name": "instructions_retired"
}
]
},
"uncore": {
"events": [
["cas_count_write"],
["uncore_imc_0/UNC_M_CAS_COUNT:RD"],
["uncore_ubox/UNC_U_EVENT_MSG"]
],
"custom_events": [
{
"type": 18,
"config": [
"0x5300"
],
"name": "cas_count_write"
}
]
}
}

392
perf/uncore_libpfm.go Normal file
View File

@ -0,0 +1,392 @@
// +build libpfm,cgo
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Uncore perf events logic.
package perf
// #cgo CFLAGS: -I/usr/include
// #cgo LDFLAGS: -lpfm
// #include <perfmon/pfmlib.h>
// #include <stdlib.h>
import "C"
import (
"bytes"
"encoding/binary"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"unsafe"
"golang.org/x/sys/unix"
"k8s.io/klog/v2"
info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/stats"
"github.com/google/cadvisor/utils/sysinfo"
)
type pmu struct {
name string
typeOf uint32
cpus []uint32
}
const (
uncorePMUPrefix = "uncore"
pmuTypeFilename = "type"
pmuCpumaskFilename = "cpumask"
systemDevicesPath = "/sys/devices"
rootPerfEventPath = "/sys/fs/cgroup/perf_event"
)
func getPMU(pmus []pmu, gotType uint32) (*pmu, error) {
for _, pmu := range pmus {
if pmu.typeOf == gotType {
return &pmu, nil
}
}
return nil, fmt.Errorf("there is no pmu with event type: %#v", gotType)
}
type uncorePMUs map[string]pmu
func readUncorePMU(path string, name string, cpumaskRegexp *regexp.Regexp) (*pmu, error) {
buf, err := ioutil.ReadFile(filepath.Join(path, pmuTypeFilename))
if err != nil {
return nil, err
}
typeString := strings.TrimSpace(string(buf))
eventType, err := strconv.ParseUint(typeString, 0, 32)
if err != nil {
return nil, err
}
buf, err = ioutil.ReadFile(filepath.Join(path, pmuCpumaskFilename))
if err != nil {
return nil, err
}
var cpus []uint32
cpumask := strings.TrimSpace(string(buf))
for _, cpu := range cpumaskRegexp.Split(cpumask, -1) {
parsedCPU, err := strconv.ParseUint(cpu, 0, 32)
if err != nil {
return nil, err
}
cpus = append(cpus, uint32(parsedCPU))
}
return &pmu{name: name, typeOf: uint32(eventType), cpus: cpus}, nil
}
func getUncorePMUs(devicesPath string) (uncorePMUs, error) {
pmus := make(uncorePMUs, 0)
// Depends on platform, cpu mask could be for example in form "0-1" or "0,1".
cpumaskRegexp := regexp.MustCompile("[-,\n]")
err := filepath.Walk(devicesPath, func(path string, info os.FileInfo, err error) error {
// Skip root path.
if path == devicesPath {
return nil
}
if info.IsDir() {
if strings.HasPrefix(info.Name(), uncorePMUPrefix) {
pmu, err := readUncorePMU(path, info.Name(), cpumaskRegexp)
if err != nil {
return err
}
pmus[info.Name()] = *pmu
}
}
return nil
})
if err != nil {
return nil, err
}
return pmus, nil
}
type uncoreCollector struct {
cpuFiles map[string]map[string]map[int]readerCloser
cpuFilesLock sync.Mutex
events [][]Event
eventToCustomEvent map[Event]*CustomEvent
topology []info.Node
// Handle for mocking purposes.
perfEventOpen func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error)
}
func NewUncoreCollector(cgroupPath string, events PerfEvents, topology []info.Node) stats.Collector {
if cgroupPath != rootPerfEventPath {
// Uncore metric doesn't exists for cgroups, only for entire platform.
return &stats.NoopCollector{}
}
collector := &uncoreCollector{topology: topology}
// Default implementation of Linux perf_event_open function.
collector.perfEventOpen = unix.PerfEventOpen
err := collector.setup(events, systemDevicesPath)
if err != nil {
formatedError := fmt.Errorf("unable to setup uncore perf event collector: %v", err)
klog.V(5).Infof("Perf uncore metrics will not be available: %s", formatedError)
return &stats.NoopCollector{}
}
return collector
}
func (c *uncoreCollector) setup(events PerfEvents, devicesPath string) error {
var err error
readUncorePMUs, err := getUncorePMUs(devicesPath)
if err != nil {
return err
}
// Maping from event name, pmu type, cpu.
c.cpuFiles = make(map[string]map[string]map[int]readerCloser)
c.events = events.Uncore.Events
c.eventToCustomEvent = parseUncoreEvents(events.Uncore)
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
for _, group := range c.events {
if len(group) > 1 {
klog.Warning("grouping uncore perf events is not supported!")
continue
}
eventName, pmuPrefix := parseEventName(string(group[0]))
var err error
customEvent, ok := c.eventToCustomEvent[group[0]]
if ok {
if customEvent.Type != 0 {
pmus := obtainPMUs("uncore", readUncorePMUs)
err = c.setupRawNonGroupedUncore(customEvent, pmus)
} else {
pmus := obtainPMUs(pmuPrefix, readUncorePMUs)
err = c.setupRawNonGroupedUncore(customEvent, pmus)
}
} else {
pmus := obtainPMUs(pmuPrefix, readUncorePMUs)
err = c.setupNonGroupedUncore(eventName, pmus)
}
if err != nil {
return err
}
}
return nil
}
func parseEventName(eventName string) (string, string) {
// First "/" separate pmu prefix and event name
// ex. "uncore_imc_0/cas_count_read" -> uncore_imc_0 and cas_count_read.
splittedEvent := strings.SplitN(eventName, "/", 2)
var pmuPrefix = ""
if len(splittedEvent) == 2 {
pmuPrefix = splittedEvent[0]
eventName = splittedEvent[1]
}
return eventName, pmuPrefix
}
func obtainPMUs(want string, gotPMUs uncorePMUs) []pmu {
var pmus []pmu
if want == "" {
return pmus
}
for _, pmu := range gotPMUs {
if strings.HasPrefix(pmu.name, want) {
pmus = append(pmus, pmu)
}
}
return pmus
}
func parseUncoreEvents(events Events) map[Event]*CustomEvent {
eventToCustomEvent := map[Event]*CustomEvent{}
for _, uncoreEvent := range events.Events {
for _, customEvent := range events.CustomEvents {
if uncoreEvent[0] == customEvent.Name {
eventToCustomEvent[customEvent.Name] = &customEvent
break
}
}
}
return eventToCustomEvent
}
func (c *uncoreCollector) Destroy() {
c.cpuFilesLock.Lock()
defer c.cpuFilesLock.Unlock()
for name, pmus := range c.cpuFiles {
for pmu, cpus := range pmus {
for cpu, file := range cpus {
klog.V(5).Infof("Closing uncore perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
err := file.Close()
if err != nil {
klog.Warningf("Unable to close perf_event file descriptor for event %q, PMU %s and CPU %d", name, pmu, cpu)
}
}
delete(pmus, pmu)
}
delete(c.cpuFiles, name)
}
}
func (c *uncoreCollector) UpdateStats(stats *info.ContainerStats) error {
klog.V(5).Info("Attempting to update uncore perf_event stats")
for name, pmus := range c.cpuFiles {
for pmu, cpus := range pmus {
for cpu, file := range cpus {
stat, err := readPerfUncoreStat(file, name, cpu, pmu, c.topology)
if err != nil {
return fmt.Errorf("unable to read from uncore perf_event_file (event: %q, CPU: %d, PMU: %s): %q", name, cpu, pmu, err.Error())
}
klog.V(5).Infof("Read uncore perf event (event: %q, CPU: %d, PMU: %s): %d", name, cpu, pmu, stat.Value)
stats.PerfUncoreStats = append(stats.PerfUncoreStats, *stat)
}
}
}
return nil
}
func (c *uncoreCollector) setupRawNonGroupedUncore(event *CustomEvent, pmus []pmu) error {
klog.V(5).Infof("Setting up non-grouped raw perf uncore event %#v", event)
if event.Type == 0 {
// PMU isn't set. Register event for all PMUs.
for _, pmu := range pmus {
newEvent := CustomEvent{
Type: pmu.typeOf,
Config: event.Config,
Name: event.Name,
}
config := createPerfEventAttr(newEvent)
err := c.registerUncoreEvent(config, string(newEvent.Name), pmu.cpus, pmu.name)
if err != nil {
return err
}
}
return nil
} else {
// Register event for the PMU.
config := createPerfEventAttr(*event)
pmu, err := getPMU(pmus, event.Type)
if err != nil {
return err
}
return c.registerUncoreEvent(config, string(event.Name), pmu.cpus, pmu.name)
}
}
func (c *uncoreCollector) setupNonGroupedUncore(name string, pmus []pmu) error {
perfEventAttr, err := getPerfEventAttr(name)
if err != nil {
return err
}
defer C.free(unsafe.Pointer(perfEventAttr))
klog.V(5).Infof("Setting up non-grouped uncore perf event %s", name)
// Register event for all memory controllers.
for _, pmu := range pmus {
perfEventAttr.Type = pmu.typeOf
err = c.registerUncoreEvent(perfEventAttr, name, pmu.cpus, pmu.name)
if err != nil {
return err
}
}
return nil
}
func (c *uncoreCollector) registerUncoreEvent(config *unix.PerfEventAttr, name string, cpus []uint32, pmu string) error {
for _, cpu := range cpus {
groupFd, pid, flags := -1, -1, 0
fd, err := c.perfEventOpen(config, pid, int(cpu), groupFd, flags)
if err != nil {
return fmt.Errorf("setting up perf event %#v failed: %q", config, err)
}
perfFile := os.NewFile(uintptr(fd), name)
if perfFile == nil {
return fmt.Errorf("unable to create os.File from file descriptor %#v", fd)
}
c.addEventFile(name, pmu, int(cpu), perfFile)
}
return nil
}
func (c *uncoreCollector) addEventFile(name string, pmu string, cpu int, perfFile *os.File) {
_, ok := c.cpuFiles[name]
if !ok {
c.cpuFiles[name] = map[string]map[int]readerCloser{}
}
_, ok = c.cpuFiles[name][pmu]
if !ok {
c.cpuFiles[name][pmu] = map[int]readerCloser{}
}
c.cpuFiles[name][pmu][cpu] = perfFile
}
func readPerfUncoreStat(file readerCloser, name string, cpu int, pmu string, topology []info.Node) (*info.PerfUncoreStat, error) {
buf := make([]byte, 32)
_, err := file.Read(buf)
if err != nil {
return nil, err
}
perfData := &ReadFormat{}
reader := bytes.NewReader(buf)
err = binary.Read(reader, binary.LittleEndian, perfData)
if err != nil {
return nil, err
}
scalingRatio := 1.0
if perfData.TimeEnabled != 0 {
scalingRatio = float64(perfData.TimeRunning) / float64(perfData.TimeEnabled)
}
stat := info.PerfUncoreStat{
Value: uint64(float64(perfData.Value) / scalingRatio),
Name: name,
ScalingRatio: scalingRatio,
Socket: sysinfo.GetSocketFromCPU(topology, cpu),
PMU: pmu,
}
return &stat, nil
}

201
perf/uncore_libpfm_test.go Normal file
View File

@ -0,0 +1,201 @@
// +build libpfm,cgo
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Uncore perf events logic tests.
package perf
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
"golang.org/x/sys/unix"
"github.com/stretchr/testify/assert"
)
func mockSystemDevices() (string, error) {
testDir, err := ioutil.TempDir("", "uncore_imc_test")
if err != nil {
return "", err
}
// First Uncore IMC PMU.
firstPMUPath := filepath.Join(testDir, "uncore_imc_0")
err = os.MkdirAll(firstPMUPath, os.ModePerm)
if err != nil {
return "", err
}
err = ioutil.WriteFile(filepath.Join(firstPMUPath, "cpumask"), []byte("0-1"), 777)
if err != nil {
return "", err
}
err = ioutil.WriteFile(filepath.Join(firstPMUPath, "type"), []byte("18"), 777)
if err != nil {
return "", err
}
// Second Uncore IMC PMU.
secondPMUPath := filepath.Join(testDir, "uncore_imc_1")
err = os.MkdirAll(secondPMUPath, os.ModePerm)
if err != nil {
return "", err
}
err = ioutil.WriteFile(filepath.Join(secondPMUPath, "cpumask"), []byte("0,1"), 777)
if err != nil {
return "", err
}
err = ioutil.WriteFile(filepath.Join(secondPMUPath, "type"), []byte("19"), 777)
if err != nil {
return "", err
}
return testDir, nil
}
func TestUncore(t *testing.T) {
path, err := mockSystemDevices()
assert.Nil(t, err)
defer func() {
err := os.RemoveAll(path)
assert.Nil(t, err)
}()
actual, err := getUncorePMUs(path)
assert.Nil(t, err)
expected := uncorePMUs{
"uncore_imc_0": {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}},
"uncore_imc_1": {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}},
}
assert.Equal(t, expected, actual)
pmuSet := []pmu{
actual["uncore_imc_0"],
actual["uncore_imc_1"],
}
actualPMU, err := getPMU(pmuSet, expected["uncore_imc_0"].typeOf)
assert.Nil(t, err)
assert.Equal(t, expected["uncore_imc_0"], *actualPMU)
}
func TestUncoreCollectorSetup(t *testing.T) {
path, err := mockSystemDevices()
assert.Nil(t, err)
defer func() {
err := os.RemoveAll(path)
assert.Nil(t, err)
}()
events := PerfEvents{
Core: Events{
Events: [][]Event{
{"cache-misses"},
},
},
Uncore: Events{
Events: [][]Event{
{"uncore_imc_0/cas_count_read"},
{"uncore_imc/cas_count_write"},
},
CustomEvents: []CustomEvent{
{18, Config{0x01, 0x02}, "uncore_imc_0/cas_count_read"},
{0, Config{0x01, 0x03}, "uncore_imc/cas_count_write"},
},
},
}
collector := &uncoreCollector{}
collector.perfEventOpen = func(attr *unix.PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) {
return 0, nil
}
err = collector.setup(events, path)
// There are no errors.
assert.Nil(t, err)
// For "cas_count_write", collector has two registered PMUs,
// `uncore_imc_0 (of 18 type) and `uncore_imc_1` (of 19 type).
// Both of them has two cpus which corresponds to sockets.
assert.Equal(t, len(collector.cpuFiles["uncore_imc/cas_count_write"]["uncore_imc_0"]), 2)
assert.Equal(t, len(collector.cpuFiles["uncore_imc/cas_count_write"]["uncore_imc_1"]), 2)
// For "cas_count_read", has only one registered PMU and it's `uncore_imc_0` (of 18 type) with two cpus which
// correspond to two sockets.
assert.Equal(t, len(collector.cpuFiles["uncore_imc_0/cas_count_read"]), 1)
assert.Equal(t, len(collector.cpuFiles["uncore_imc_0/cas_count_read"]["uncore_imc_0"]), 2)
// For "cache-misses" it shouldn't register any PMU.
assert.Nil(t, collector.cpuFiles["cache-misses"])
}
func TestParseUncoreEvents(t *testing.T) {
events := PerfEvents{
Uncore: Events{
Events: [][]Event{
{"cas_count_read"},
{"cas_count_write"},
},
CustomEvents: []CustomEvent{
{
Type: 17,
Config: Config{0x50, 0x60},
Name: "cas_count_read",
},
},
},
}
eventToCustomEvent := parseUncoreEvents(events.Uncore)
assert.Len(t, eventToCustomEvent, 1)
assert.Equal(t, eventToCustomEvent["cas_count_read"].Name, Event("cas_count_read"))
assert.Equal(t, eventToCustomEvent["cas_count_read"].Type, uint32(17))
assert.Equal(t, eventToCustomEvent["cas_count_read"].Config, Config{0x50, 0x60})
}
func TestObtainPMUs(t *testing.T) {
got := uncorePMUs{
"uncore_imc_0": {name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}},
"uncore_imc_1": {name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}},
}
expected := []pmu{
{name: "uncore_imc_0", typeOf: 18, cpus: []uint32{0, 1}},
{name: "uncore_imc_1", typeOf: 19, cpus: []uint32{0, 1}},
}
actual := obtainPMUs("uncore_imc_0", got)
assert.Equal(t, []pmu{expected[0]}, actual)
actual = obtainPMUs("uncore_imc_1", got)
assert.Equal(t, []pmu{expected[1]}, actual)
actual = obtainPMUs("", got)
assert.Equal(t, []pmu(nil), actual)
}
func TestUncoreParseEventName(t *testing.T) {
eventName, pmuPrefix := parseEventName("some_event")
assert.Equal(t, "some_event", eventName)
assert.Empty(t, pmuPrefix)
eventName, pmuPrefix = parseEventName("some_pmu/some_event")
assert.Equal(t, "some_pmu", pmuPrefix)
assert.Equal(t, "some_event", eventName)
eventName, pmuPrefix = parseEventName("some_pmu/some_event/first_slash/second_slash")
assert.Equal(t, "some_pmu", pmuPrefix)
assert.Equal(t, "some_event/first_slash/second_slash", eventName)
}

View File

@ -512,3 +512,14 @@ func getMatchedInt(rgx *regexp.Regexp, str string) (int, error) {
}
return valInt, nil
}
// GetSocketFromCPU returns Socket ID of passed CPU. If is not present, returns -1.
func GetSocketFromCPU(topology []info.Node, cpu int) int {
for _, node := range topology {
found, coreID := node.FindCoreByThread(cpu)
if found {
return node.Cores[coreID].SocketID
}
}
return -1
}

View File

@ -1235,3 +1235,54 @@ func TestGetNetworkStats(t *testing.T) {
t.Errorf("expected to get stats %+v, got %+v", expectedStats, netStats)
}
}
func TestGetSocketFromCPU(t *testing.T) {
topology := []info.Node{
{
Id: 0,
Memory: 0,
HugePages: nil,
Cores: []info.Core{
{
Id: 0,
Threads: []int{0, 1},
Caches: nil,
SocketID: 0,
},
{
Id: 1,
Threads: []int{2, 3},
Caches: nil,
SocketID: 0,
},
},
Caches: nil,
},
{
Id: 1,
Memory: 0,
HugePages: nil,
Cores: []info.Core{
{
Id: 0,
Threads: []int{4, 5},
Caches: nil,
SocketID: 1,
},
{
Id: 1,
Threads: []int{6, 7},
Caches: nil,
SocketID: 1,
},
},
Caches: nil,
},
}
socket := GetSocketFromCPU(topology, 6)
assert.Equal(t, socket, 1)
// Check if return "-1" when there is no data about passed CPU.
socket = GetSocketFromCPU(topology, 8)
assert.Equal(t, socket, -1)
}