Merge pull request #1121 from vishh/opt-out-metrics
Support opt out for metrics.
This commit is contained in:
commit
06ac85ca29
@ -86,6 +86,8 @@ type dockerFactory struct {
|
||||
fsInfo fs.FsInfo
|
||||
|
||||
dockerVersion []int
|
||||
|
||||
ignoreMetrics container.MetricSet
|
||||
}
|
||||
|
||||
func (self *dockerFactory) String() string {
|
||||
@ -111,6 +113,7 @@ func (self *dockerFactory) NewContainerHandler(name string, inHostNamespace bool
|
||||
inHostNamespace,
|
||||
metadataEnvs,
|
||||
self.dockerVersion,
|
||||
self.ignoreMetrics,
|
||||
)
|
||||
return
|
||||
}
|
||||
@ -178,7 +181,7 @@ func parseDockerVersion(full_version_string string) ([]int, error) {
|
||||
}
|
||||
|
||||
// Register root container before running this function!
|
||||
func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo) error {
|
||||
func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo, ignoreMetrics container.MetricSet) error {
|
||||
client, err := Client()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to communicate with docker daemon: %v", err)
|
||||
@ -236,6 +239,7 @@ func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo) error {
|
||||
machineInfoFactory: factory,
|
||||
storageDriver: storageDriver(sd),
|
||||
storageDir: storageDir,
|
||||
ignoreMetrics: ignoreMetrics,
|
||||
}
|
||||
|
||||
container.RegisterContainerHandlerFactory(f)
|
||||
|
@ -81,6 +81,8 @@ type dockerContainerHandler struct {
|
||||
|
||||
// Filesystem handler.
|
||||
fsHandler fsHandler
|
||||
|
||||
ignoreMetrics container.MetricSet
|
||||
}
|
||||
|
||||
func getRwLayerID(containerID, storageDir string, sd storageDriver, dockerVersion []int) (string, error) {
|
||||
@ -111,6 +113,7 @@ func newDockerContainerHandler(
|
||||
inHostNamespace bool,
|
||||
metadataEnvs []string,
|
||||
dockerVersion []int,
|
||||
ignoreMetrics container.MetricSet,
|
||||
) (container.ContainerHandler, error) {
|
||||
// Create the cgroup paths.
|
||||
cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
|
||||
@ -161,8 +164,12 @@ func newDockerContainerHandler(
|
||||
fsInfo: fsInfo,
|
||||
rootFs: rootFs,
|
||||
rootfsStorageDir: rootfsStorageDir,
|
||||
fsHandler: newFsHandler(time.Minute, rootfsStorageDir, otherStorageDir, fsInfo),
|
||||
envs: make(map[string]string),
|
||||
ignoreMetrics: ignoreMetrics,
|
||||
}
|
||||
|
||||
if !ignoreMetrics.Has(container.DiskUsageMetrics) {
|
||||
handler.fsHandler = newFsHandler(time.Minute, rootfsStorageDir, otherStorageDir, fsInfo)
|
||||
}
|
||||
|
||||
// We assume that if Inspect fails then the container is not known to docker.
|
||||
@ -255,8 +262,11 @@ func libcontainerConfigToContainerSpec(config *libcontainerconfigs.Config, mi *i
|
||||
return spec
|
||||
}
|
||||
|
||||
func hasNet(networkMode string) bool {
|
||||
return !strings.HasPrefix(networkMode, "container:")
|
||||
func (self *dockerContainerHandler) needNet() bool {
|
||||
if !self.ignoreMetrics.Has(container.NetworkUsageMetrics) {
|
||||
return !strings.HasPrefix(self.networkMode, "container:")
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (self *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) {
|
||||
@ -272,22 +282,25 @@ func (self *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) {
|
||||
spec := libcontainerConfigToContainerSpec(libcontainerConfig, mi)
|
||||
spec.CreationTime = self.creationTime
|
||||
|
||||
switch self.storageDriver {
|
||||
case aufsStorageDriver, overlayStorageDriver, zfsStorageDriver:
|
||||
spec.HasFilesystem = true
|
||||
default:
|
||||
spec.HasFilesystem = false
|
||||
if !self.ignoreMetrics.Has(container.DiskUsageMetrics) {
|
||||
switch self.storageDriver {
|
||||
case aufsStorageDriver, overlayStorageDriver, zfsStorageDriver:
|
||||
spec.HasFilesystem = true
|
||||
}
|
||||
}
|
||||
|
||||
spec.Labels = self.labels
|
||||
spec.Envs = self.envs
|
||||
spec.Image = self.image
|
||||
spec.HasNetwork = hasNet(self.networkMode)
|
||||
spec.HasNetwork = self.needNet()
|
||||
|
||||
return spec, err
|
||||
}
|
||||
|
||||
func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error {
|
||||
if self.ignoreMetrics.Has(container.DiskUsageMetrics) {
|
||||
return nil
|
||||
}
|
||||
switch self.storageDriver {
|
||||
case aufsStorageDriver, overlayStorageDriver, zfsStorageDriver:
|
||||
default:
|
||||
@ -327,7 +340,7 @@ func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error
|
||||
|
||||
// TODO(vmarmol): Get from libcontainer API instead of cgroup manager when we don't have to support older Dockers.
|
||||
func (self *dockerContainerHandler) GetStats() (*info.ContainerStats, error) {
|
||||
stats, err := containerlibcontainer.GetStats(self.cgroupManager, self.rootFs, self.pid)
|
||||
stats, err := containerlibcontainer.GetStats(self.cgroupManager, self.rootFs, self.pid, self.ignoreMetrics)
|
||||
if err != nil {
|
||||
return stats, err
|
||||
}
|
||||
@ -335,7 +348,7 @@ func (self *dockerContainerHandler) GetStats() (*info.ContainerStats, error) {
|
||||
// includes containers running in Kubernetes pods that use the network of the
|
||||
// infrastructure container. This stops metrics being reported multiple times
|
||||
// for each container in a pod.
|
||||
if !hasNet(self.networkMode) {
|
||||
if !self.needNet() {
|
||||
stats.Network = info.NetworkStats{}
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,35 @@ type ContainerHandlerFactory interface {
|
||||
DebugInfo() map[string][]string
|
||||
}
|
||||
|
||||
// MetricKind represents the kind of metrics that cAdvisor exposes.
|
||||
type MetricKind string
|
||||
|
||||
const (
|
||||
CpuUsageMetrics MetricKind = "cpu"
|
||||
MemoryUsageMetrics MetricKind = "memory"
|
||||
CpuLoadMetrics MetricKind = "cpuLoad"
|
||||
DiskIOMetrics MetricKind = "diskIO"
|
||||
DiskUsageMetrics MetricKind = "disk"
|
||||
NetworkUsageMetrics MetricKind = "network"
|
||||
NetworkTcpUsageMetrics MetricKind = "tcp"
|
||||
AppMetrics MetricKind = "app"
|
||||
)
|
||||
|
||||
func (mk MetricKind) String() string {
|
||||
return string(mk)
|
||||
}
|
||||
|
||||
type MetricSet map[MetricKind]struct{}
|
||||
|
||||
func (ms MetricSet) Has(mk MetricKind) bool {
|
||||
_, exists := ms[mk]
|
||||
return exists
|
||||
}
|
||||
|
||||
func (ms MetricSet) Add(mk MetricKind) {
|
||||
ms[mk] = struct{}{}
|
||||
}
|
||||
|
||||
// TODO(vmarmol): Consider not making this global.
|
||||
// Global list of factories.
|
||||
var (
|
||||
|
@ -17,12 +17,14 @@ package libcontainer
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/google/cadvisor/container"
|
||||
info "github.com/google/cadvisor/info/v1"
|
||||
|
||||
"github.com/golang/glog"
|
||||
@ -79,7 +81,7 @@ var supportedSubsystems map[string]struct{} = map[string]struct{}{
|
||||
}
|
||||
|
||||
// Get cgroup and networking stats of the specified container
|
||||
func GetStats(cgroupManager cgroups.Manager, rootFs string, pid int) (*info.ContainerStats, error) {
|
||||
func GetStats(cgroupManager cgroups.Manager, rootFs string, pid int, ignoreMetrics container.MetricSet) (*info.ContainerStats, error) {
|
||||
cgroupStats, err := cgroupManager.GetStats()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -91,27 +93,29 @@ func GetStats(cgroupManager cgroups.Manager, rootFs string, pid int) (*info.Cont
|
||||
|
||||
// If we know the pid then get network stats from /proc/<pid>/net/dev
|
||||
if pid > 0 {
|
||||
netStats, err := networkStatsFromProc(rootFs, pid)
|
||||
if err != nil {
|
||||
glog.V(2).Infof("Unable to get network stats from pid %d: %v", pid, err)
|
||||
} else {
|
||||
stats.Network.Interfaces = append(stats.Network.Interfaces, netStats...)
|
||||
if !ignoreMetrics.Has(container.NetworkUsageMetrics) {
|
||||
netStats, err := networkStatsFromProc(rootFs, pid)
|
||||
if err != nil {
|
||||
glog.V(2).Infof("Unable to get network stats from pid %d: %v", pid, err)
|
||||
} else {
|
||||
stats.Network.Interfaces = append(stats.Network.Interfaces, netStats...)
|
||||
}
|
||||
}
|
||||
if !ignoreMetrics.Has(container.NetworkTcpUsageMetrics) {
|
||||
t, err := tcpStatsFromProc(rootFs, pid, "net/tcp")
|
||||
if err != nil {
|
||||
glog.V(2).Infof("Unable to get tcp stats from pid %d: %v", pid, err)
|
||||
} else {
|
||||
stats.Network.Tcp = t
|
||||
}
|
||||
|
||||
// Commenting out to disable: too CPU intensive
|
||||
/*t, err := tcpStatsFromProc(rootFs, pid, "net/tcp")
|
||||
if err != nil {
|
||||
glog.V(2).Infof("Unable to get tcp stats from pid %d: %v", pid, err)
|
||||
} else {
|
||||
stats.Network.Tcp = t
|
||||
t6, err := tcpStatsFromProc(rootFs, pid, "net/tcp6")
|
||||
if err != nil {
|
||||
glog.V(2).Infof("Unable to get tcp6 stats from pid %d: %v", pid, err)
|
||||
} else {
|
||||
stats.Network.Tcp6 = t6
|
||||
}
|
||||
}
|
||||
|
||||
t6, err := tcpStatsFromProc(rootFs, pid, "net/tcp6")
|
||||
if err != nil {
|
||||
glog.V(2).Infof("Unable to get tcp6 stats from pid %d: %v", pid, err)
|
||||
} else {
|
||||
stats.Network.Tcp6 = t6
|
||||
}*/
|
||||
}
|
||||
|
||||
// For backwards compatibility.
|
||||
@ -211,7 +215,6 @@ func setInterfaceStatValues(fields []string, pointers []*uint64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
/*
|
||||
func tcpStatsFromProc(rootFs string, pid int, file string) (info.TcpStat, error) {
|
||||
tcpStatsFile := path.Join(rootFs, "proc", strconv.Itoa(pid), file)
|
||||
|
||||
@ -286,7 +289,6 @@ func scanTcpStats(tcpStatsFile string) (info.TcpStat, error) {
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
*/
|
||||
|
||||
func GetProcesses(cgroupManager cgroups.Manager) ([]int, error) {
|
||||
pids, err := cgroupManager.GetPids()
|
||||
|
@ -40,6 +40,9 @@ type rawFactory struct {
|
||||
|
||||
// Watcher for inotify events.
|
||||
watcher *InotifyWatcher
|
||||
|
||||
// List of metrics to be ignored.
|
||||
ignoreMetrics map[container.MetricKind]struct{}
|
||||
}
|
||||
|
||||
func (self *rawFactory) String() string {
|
||||
@ -51,7 +54,7 @@ func (self *rawFactory) NewContainerHandler(name string, inHostNamespace bool) (
|
||||
if !inHostNamespace {
|
||||
rootFs = "/rootfs"
|
||||
}
|
||||
return newRawContainerHandler(name, self.cgroupSubsystems, self.machineInfoFactory, self.fsInfo, self.watcher, rootFs)
|
||||
return newRawContainerHandler(name, self.cgroupSubsystems, self.machineInfoFactory, self.fsInfo, self.watcher, rootFs, self.ignoreMetrics)
|
||||
}
|
||||
|
||||
// The raw factory can handle any container. If --docker_only is set to false, non-docker containers are ignored.
|
||||
@ -77,7 +80,7 @@ func (self *rawFactory) DebugInfo() map[string][]string {
|
||||
return out
|
||||
}
|
||||
|
||||
func Register(machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo) error {
|
||||
func Register(machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo, ignoreMetrics map[container.MetricKind]struct{}) error {
|
||||
cgroupSubsystems, err := libcontainer.GetCgroupSubsystems()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get cgroup subsystems: %v", err)
|
||||
@ -97,6 +100,7 @@ func Register(machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo) erro
|
||||
fsInfo: fsInfo,
|
||||
cgroupSubsystems: &cgroupSubsystems,
|
||||
watcher: watcher,
|
||||
ignoreMetrics: ignoreMetrics,
|
||||
}
|
||||
container.RegisterContainerHandlerFactory(factory)
|
||||
return nil
|
||||
|
@ -57,16 +57,16 @@ type rawContainerHandler struct {
|
||||
// Manager of this container's cgroups.
|
||||
cgroupManager cgroups.Manager
|
||||
|
||||
// Whether this container has network isolation enabled.
|
||||
hasNetwork bool
|
||||
|
||||
fsInfo fs.FsInfo
|
||||
externalMounts []mount
|
||||
|
||||
rootFs string
|
||||
|
||||
// Metrics to be ignored.
|
||||
ignoreMetrics container.MetricSet
|
||||
}
|
||||
|
||||
func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSubsystems, machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo, watcher *InotifyWatcher, rootFs string) (container.ContainerHandler, error) {
|
||||
func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSubsystems, machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo, watcher *InotifyWatcher, rootFs string, ignoreMetrics container.MetricSet) (container.ContainerHandler, error) {
|
||||
// Create the cgroup paths.
|
||||
cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
|
||||
for key, val := range cgroupSubsystems.MountPoints {
|
||||
@ -86,15 +86,9 @@ func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSu
|
||||
Paths: cgroupPaths,
|
||||
}
|
||||
|
||||
hasNetwork := false
|
||||
var externalMounts []mount
|
||||
for _, container := range cHints.AllHosts {
|
||||
if name == container.FullName {
|
||||
/*libcontainerState.NetworkState = network.NetworkState{
|
||||
VethHost: container.NetworkInterface.VethHost,
|
||||
VethChild: container.NetworkInterface.VethChild,
|
||||
}
|
||||
hasNetwork = true*/
|
||||
externalMounts = container.Mounts
|
||||
break
|
||||
}
|
||||
@ -108,10 +102,10 @@ func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSu
|
||||
cgroupPaths: cgroupPaths,
|
||||
cgroupManager: cgroupManager,
|
||||
fsInfo: fsInfo,
|
||||
hasNetwork: hasNetwork,
|
||||
externalMounts: externalMounts,
|
||||
watcher: watcher,
|
||||
rootFs: rootFs,
|
||||
ignoreMetrics: ignoreMetrics,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -266,9 +260,6 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
|
||||
spec.HasFilesystem = true
|
||||
}
|
||||
|
||||
//Network
|
||||
spec.HasNetwork = self.hasNetwork
|
||||
|
||||
// DiskIo.
|
||||
if blkioRoot, ok := self.cgroupPaths["blkio"]; ok && utils.FileExists(blkioRoot) {
|
||||
spec.HasDiskIo = true
|
||||
@ -350,7 +341,7 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
|
||||
}
|
||||
|
||||
func (self *rawContainerHandler) GetStats() (*info.ContainerStats, error) {
|
||||
stats, err := libcontainer.GetStats(self.cgroupManager, self.rootFs, os.Getpid())
|
||||
stats, err := libcontainer.GetStats(self.cgroupManager, self.rootFs, os.Getpid(), self.ignoreMetrics)
|
||||
if err != nil {
|
||||
return stats, err
|
||||
}
|
||||
|
@ -332,7 +332,9 @@ func TestDockerFilesystemStats(t *testing.T) {
|
||||
}
|
||||
sanityCheckV2(containerId, info, t)
|
||||
|
||||
require.NotNil(t, info.Stats[0].Filesystem.TotalUsageBytes)
|
||||
require.NotNil(t, info.Stats[0], "got info: %+v", info)
|
||||
require.NotNil(t, info.Stats[0].Filesystem, "got info: %+v", info)
|
||||
require.NotNil(t, info.Stats[0].Filesystem.TotalUsageBytes, "got info: %+v", info.Stats[0].Filesystem)
|
||||
if *info.Stats[0].Filesystem.TotalUsageBytes >= ddUsage {
|
||||
if !needsBaseUsageCheck {
|
||||
pass = true
|
||||
|
@ -50,6 +50,42 @@ var eventStorageAgeLimit = flag.String("event_storage_age_limit", "default=24h",
|
||||
var eventStorageEventLimit = flag.String("event_storage_event_limit", "default=100000", "Max number of events to store (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is an integer. Default is applied to all non-specified event types")
|
||||
var applicationMetricsCountLimit = flag.Int("application_metrics_count_limit", 100, "Max number of application metrics to store (per container)")
|
||||
|
||||
var (
|
||||
// Metrics to be ignored.
|
||||
ignoreMetrics metricSetValue = metricSetValue{container.MetricSet{}}
|
||||
// List of metrics that can be ignored.
|
||||
ignoreWhitelist = container.MetricSet{
|
||||
container.DiskUsageMetrics: struct{}{},
|
||||
container.NetworkUsageMetrics: struct{}{},
|
||||
container.NetworkTcpUsageMetrics: struct{}{},
|
||||
}
|
||||
)
|
||||
|
||||
func init() {
|
||||
flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of metrics to be disabled. Options are `disk`, `network`, `tcp`. Note: tcp is disabled by default due to high CPU usage.")
|
||||
// Tcp metrics are ignored by default.
|
||||
flag.Set("disable_metrics", "tcp")
|
||||
}
|
||||
|
||||
type metricSetValue struct {
|
||||
container.MetricSet
|
||||
}
|
||||
|
||||
func (ml *metricSetValue) String() string {
|
||||
return fmt.Sprint(*ml)
|
||||
}
|
||||
|
||||
func (ml *metricSetValue) Set(value string) error {
|
||||
for _, metric := range strings.Split(value, ",") {
|
||||
if ignoreWhitelist.Has(container.MetricKind(metric)) {
|
||||
(*ml).Add(container.MetricKind(metric))
|
||||
} else {
|
||||
return fmt.Errorf("unsupported metric %q specified in disable_metrics", metric)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// The Manager interface defines operations for starting a manager and getting
|
||||
// container and machine information.
|
||||
type Manager interface {
|
||||
@ -147,6 +183,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, maxHousekeepingIn
|
||||
if _, err := os.Stat("/rootfs/proc"); os.IsNotExist(err) {
|
||||
inHostNamespace = true
|
||||
}
|
||||
|
||||
newManager := &manager{
|
||||
containers: make(map[namespacedContainerName]*containerData),
|
||||
quitChannels: make([]chan error, 0, 2),
|
||||
@ -157,6 +194,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, maxHousekeepingIn
|
||||
startupTime: time.Now(),
|
||||
maxHousekeepingInterval: maxHousekeepingInterval,
|
||||
allowDynamicHousekeeping: allowDynamicHousekeeping,
|
||||
ignoreMetrics: ignoreMetrics.MetricSet,
|
||||
}
|
||||
|
||||
machineInfo, err := getMachineInfo(sysfs, fsInfo, inHostNamespace)
|
||||
@ -199,18 +237,19 @@ type manager struct {
|
||||
startupTime time.Time
|
||||
maxHousekeepingInterval time.Duration
|
||||
allowDynamicHousekeeping bool
|
||||
ignoreMetrics container.MetricSet
|
||||
}
|
||||
|
||||
// Start the container manager.
|
||||
func (self *manager) Start() error {
|
||||
// Register Docker container factory.
|
||||
err := docker.Register(self, self.fsInfo)
|
||||
err := docker.Register(self, self.fsInfo, self.ignoreMetrics)
|
||||
if err != nil {
|
||||
glog.Errorf("Docker container factory registration failed: %v.", err)
|
||||
}
|
||||
|
||||
// Register the raw driver.
|
||||
err = raw.Register(self, self.fsInfo)
|
||||
err = raw.Register(self, self.fsInfo, self.ignoreMetrics)
|
||||
if err != nil {
|
||||
glog.Errorf("Registration of the raw container factory failed: %v", err)
|
||||
}
|
||||
|
@ -29,6 +29,7 @@ import (
|
||||
info "github.com/google/cadvisor/info/v1"
|
||||
itest "github.com/google/cadvisor/info/v1/test"
|
||||
"github.com/google/cadvisor/utils/sysfs/fakesysfs"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
// TODO(vmarmol): Refactor these tests.
|
||||
@ -210,3 +211,7 @@ func TestNewNilManager(t *testing.T) {
|
||||
t.Fatalf("Expected nil manager to return error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTcpMetricsAreDisabledByDefault(t *testing.T) {
|
||||
assert.True(t, ignoreMetrics.Has(container.NetworkTcpUsageMetrics))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user