Merge pull request #1121 from vishh/opt-out-metrics

Support opt out for metrics.
This commit is contained in:
Vish Kannan 2016-02-25 16:25:02 -08:00
commit 06ac85ca29
9 changed files with 142 additions and 53 deletions

View File

@ -86,6 +86,8 @@ type dockerFactory struct {
fsInfo fs.FsInfo
dockerVersion []int
ignoreMetrics container.MetricSet
}
func (self *dockerFactory) String() string {
@ -111,6 +113,7 @@ func (self *dockerFactory) NewContainerHandler(name string, inHostNamespace bool
inHostNamespace,
metadataEnvs,
self.dockerVersion,
self.ignoreMetrics,
)
return
}
@ -178,7 +181,7 @@ func parseDockerVersion(full_version_string string) ([]int, error) {
}
// Register root container before running this function!
func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo) error {
func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo, ignoreMetrics container.MetricSet) error {
client, err := Client()
if err != nil {
return fmt.Errorf("unable to communicate with docker daemon: %v", err)
@ -236,6 +239,7 @@ func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo) error {
machineInfoFactory: factory,
storageDriver: storageDriver(sd),
storageDir: storageDir,
ignoreMetrics: ignoreMetrics,
}
container.RegisterContainerHandlerFactory(f)

View File

@ -81,6 +81,8 @@ type dockerContainerHandler struct {
// Filesystem handler.
fsHandler fsHandler
ignoreMetrics container.MetricSet
}
func getRwLayerID(containerID, storageDir string, sd storageDriver, dockerVersion []int) (string, error) {
@ -111,6 +113,7 @@ func newDockerContainerHandler(
inHostNamespace bool,
metadataEnvs []string,
dockerVersion []int,
ignoreMetrics container.MetricSet,
) (container.ContainerHandler, error) {
// Create the cgroup paths.
cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
@ -161,8 +164,12 @@ func newDockerContainerHandler(
fsInfo: fsInfo,
rootFs: rootFs,
rootfsStorageDir: rootfsStorageDir,
fsHandler: newFsHandler(time.Minute, rootfsStorageDir, otherStorageDir, fsInfo),
envs: make(map[string]string),
ignoreMetrics: ignoreMetrics,
}
if !ignoreMetrics.Has(container.DiskUsageMetrics) {
handler.fsHandler = newFsHandler(time.Minute, rootfsStorageDir, otherStorageDir, fsInfo)
}
// We assume that if Inspect fails then the container is not known to docker.
@ -255,8 +262,11 @@ func libcontainerConfigToContainerSpec(config *libcontainerconfigs.Config, mi *i
return spec
}
func hasNet(networkMode string) bool {
return !strings.HasPrefix(networkMode, "container:")
func (self *dockerContainerHandler) needNet() bool {
if !self.ignoreMetrics.Has(container.NetworkUsageMetrics) {
return !strings.HasPrefix(self.networkMode, "container:")
}
return false
}
func (self *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) {
@ -272,22 +282,25 @@ func (self *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) {
spec := libcontainerConfigToContainerSpec(libcontainerConfig, mi)
spec.CreationTime = self.creationTime
switch self.storageDriver {
case aufsStorageDriver, overlayStorageDriver, zfsStorageDriver:
spec.HasFilesystem = true
default:
spec.HasFilesystem = false
if !self.ignoreMetrics.Has(container.DiskUsageMetrics) {
switch self.storageDriver {
case aufsStorageDriver, overlayStorageDriver, zfsStorageDriver:
spec.HasFilesystem = true
}
}
spec.Labels = self.labels
spec.Envs = self.envs
spec.Image = self.image
spec.HasNetwork = hasNet(self.networkMode)
spec.HasNetwork = self.needNet()
return spec, err
}
func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error {
if self.ignoreMetrics.Has(container.DiskUsageMetrics) {
return nil
}
switch self.storageDriver {
case aufsStorageDriver, overlayStorageDriver, zfsStorageDriver:
default:
@ -327,7 +340,7 @@ func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error
// TODO(vmarmol): Get from libcontainer API instead of cgroup manager when we don't have to support older Dockers.
func (self *dockerContainerHandler) GetStats() (*info.ContainerStats, error) {
stats, err := containerlibcontainer.GetStats(self.cgroupManager, self.rootFs, self.pid)
stats, err := containerlibcontainer.GetStats(self.cgroupManager, self.rootFs, self.pid, self.ignoreMetrics)
if err != nil {
return stats, err
}
@ -335,7 +348,7 @@ func (self *dockerContainerHandler) GetStats() (*info.ContainerStats, error) {
// includes containers running in Kubernetes pods that use the network of the
// infrastructure container. This stops metrics being reported multiple times
// for each container in a pod.
if !hasNet(self.networkMode) {
if !self.needNet() {
stats.Network = info.NetworkStats{}
}

View File

@ -35,6 +35,35 @@ type ContainerHandlerFactory interface {
DebugInfo() map[string][]string
}
// MetricKind represents the kind of metrics that cAdvisor exposes.
type MetricKind string
const (
CpuUsageMetrics MetricKind = "cpu"
MemoryUsageMetrics MetricKind = "memory"
CpuLoadMetrics MetricKind = "cpuLoad"
DiskIOMetrics MetricKind = "diskIO"
DiskUsageMetrics MetricKind = "disk"
NetworkUsageMetrics MetricKind = "network"
NetworkTcpUsageMetrics MetricKind = "tcp"
AppMetrics MetricKind = "app"
)
func (mk MetricKind) String() string {
return string(mk)
}
type MetricSet map[MetricKind]struct{}
func (ms MetricSet) Has(mk MetricKind) bool {
_, exists := ms[mk]
return exists
}
func (ms MetricSet) Add(mk MetricKind) {
ms[mk] = struct{}{}
}
// TODO(vmarmol): Consider not making this global.
// Global list of factories.
var (

View File

@ -17,12 +17,14 @@ package libcontainer
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path"
"strconv"
"strings"
"time"
"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
"github.com/golang/glog"
@ -79,7 +81,7 @@ var supportedSubsystems map[string]struct{} = map[string]struct{}{
}
// Get cgroup and networking stats of the specified container
func GetStats(cgroupManager cgroups.Manager, rootFs string, pid int) (*info.ContainerStats, error) {
func GetStats(cgroupManager cgroups.Manager, rootFs string, pid int, ignoreMetrics container.MetricSet) (*info.ContainerStats, error) {
cgroupStats, err := cgroupManager.GetStats()
if err != nil {
return nil, err
@ -91,27 +93,29 @@ func GetStats(cgroupManager cgroups.Manager, rootFs string, pid int) (*info.Cont
// If we know the pid then get network stats from /proc/<pid>/net/dev
if pid > 0 {
netStats, err := networkStatsFromProc(rootFs, pid)
if err != nil {
glog.V(2).Infof("Unable to get network stats from pid %d: %v", pid, err)
} else {
stats.Network.Interfaces = append(stats.Network.Interfaces, netStats...)
if !ignoreMetrics.Has(container.NetworkUsageMetrics) {
netStats, err := networkStatsFromProc(rootFs, pid)
if err != nil {
glog.V(2).Infof("Unable to get network stats from pid %d: %v", pid, err)
} else {
stats.Network.Interfaces = append(stats.Network.Interfaces, netStats...)
}
}
if !ignoreMetrics.Has(container.NetworkTcpUsageMetrics) {
t, err := tcpStatsFromProc(rootFs, pid, "net/tcp")
if err != nil {
glog.V(2).Infof("Unable to get tcp stats from pid %d: %v", pid, err)
} else {
stats.Network.Tcp = t
}
// Commenting out to disable: too CPU intensive
/*t, err := tcpStatsFromProc(rootFs, pid, "net/tcp")
if err != nil {
glog.V(2).Infof("Unable to get tcp stats from pid %d: %v", pid, err)
} else {
stats.Network.Tcp = t
t6, err := tcpStatsFromProc(rootFs, pid, "net/tcp6")
if err != nil {
glog.V(2).Infof("Unable to get tcp6 stats from pid %d: %v", pid, err)
} else {
stats.Network.Tcp6 = t6
}
}
t6, err := tcpStatsFromProc(rootFs, pid, "net/tcp6")
if err != nil {
glog.V(2).Infof("Unable to get tcp6 stats from pid %d: %v", pid, err)
} else {
stats.Network.Tcp6 = t6
}*/
}
// For backwards compatibility.
@ -211,7 +215,6 @@ func setInterfaceStatValues(fields []string, pointers []*uint64) error {
return nil
}
/*
func tcpStatsFromProc(rootFs string, pid int, file string) (info.TcpStat, error) {
tcpStatsFile := path.Join(rootFs, "proc", strconv.Itoa(pid), file)
@ -286,7 +289,6 @@ func scanTcpStats(tcpStatsFile string) (info.TcpStat, error) {
return stats, nil
}
*/
func GetProcesses(cgroupManager cgroups.Manager) ([]int, error) {
pids, err := cgroupManager.GetPids()

View File

@ -40,6 +40,9 @@ type rawFactory struct {
// Watcher for inotify events.
watcher *InotifyWatcher
// List of metrics to be ignored.
ignoreMetrics map[container.MetricKind]struct{}
}
func (self *rawFactory) String() string {
@ -51,7 +54,7 @@ func (self *rawFactory) NewContainerHandler(name string, inHostNamespace bool) (
if !inHostNamespace {
rootFs = "/rootfs"
}
return newRawContainerHandler(name, self.cgroupSubsystems, self.machineInfoFactory, self.fsInfo, self.watcher, rootFs)
return newRawContainerHandler(name, self.cgroupSubsystems, self.machineInfoFactory, self.fsInfo, self.watcher, rootFs, self.ignoreMetrics)
}
// The raw factory can handle any container. If --docker_only is set to false, non-docker containers are ignored.
@ -77,7 +80,7 @@ func (self *rawFactory) DebugInfo() map[string][]string {
return out
}
func Register(machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo) error {
func Register(machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo, ignoreMetrics map[container.MetricKind]struct{}) error {
cgroupSubsystems, err := libcontainer.GetCgroupSubsystems()
if err != nil {
return fmt.Errorf("failed to get cgroup subsystems: %v", err)
@ -97,6 +100,7 @@ func Register(machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo) erro
fsInfo: fsInfo,
cgroupSubsystems: &cgroupSubsystems,
watcher: watcher,
ignoreMetrics: ignoreMetrics,
}
container.RegisterContainerHandlerFactory(factory)
return nil

View File

@ -57,16 +57,16 @@ type rawContainerHandler struct {
// Manager of this container's cgroups.
cgroupManager cgroups.Manager
// Whether this container has network isolation enabled.
hasNetwork bool
fsInfo fs.FsInfo
externalMounts []mount
rootFs string
// Metrics to be ignored.
ignoreMetrics container.MetricSet
}
func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSubsystems, machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo, watcher *InotifyWatcher, rootFs string) (container.ContainerHandler, error) {
func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSubsystems, machineInfoFactory info.MachineInfoFactory, fsInfo fs.FsInfo, watcher *InotifyWatcher, rootFs string, ignoreMetrics container.MetricSet) (container.ContainerHandler, error) {
// Create the cgroup paths.
cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
for key, val := range cgroupSubsystems.MountPoints {
@ -86,15 +86,9 @@ func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSu
Paths: cgroupPaths,
}
hasNetwork := false
var externalMounts []mount
for _, container := range cHints.AllHosts {
if name == container.FullName {
/*libcontainerState.NetworkState = network.NetworkState{
VethHost: container.NetworkInterface.VethHost,
VethChild: container.NetworkInterface.VethChild,
}
hasNetwork = true*/
externalMounts = container.Mounts
break
}
@ -108,10 +102,10 @@ func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSu
cgroupPaths: cgroupPaths,
cgroupManager: cgroupManager,
fsInfo: fsInfo,
hasNetwork: hasNetwork,
externalMounts: externalMounts,
watcher: watcher,
rootFs: rootFs,
ignoreMetrics: ignoreMetrics,
}, nil
}
@ -266,9 +260,6 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
spec.HasFilesystem = true
}
//Network
spec.HasNetwork = self.hasNetwork
// DiskIo.
if blkioRoot, ok := self.cgroupPaths["blkio"]; ok && utils.FileExists(blkioRoot) {
spec.HasDiskIo = true
@ -350,7 +341,7 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
}
func (self *rawContainerHandler) GetStats() (*info.ContainerStats, error) {
stats, err := libcontainer.GetStats(self.cgroupManager, self.rootFs, os.Getpid())
stats, err := libcontainer.GetStats(self.cgroupManager, self.rootFs, os.Getpid(), self.ignoreMetrics)
if err != nil {
return stats, err
}

View File

@ -332,7 +332,9 @@ func TestDockerFilesystemStats(t *testing.T) {
}
sanityCheckV2(containerId, info, t)
require.NotNil(t, info.Stats[0].Filesystem.TotalUsageBytes)
require.NotNil(t, info.Stats[0], "got info: %+v", info)
require.NotNil(t, info.Stats[0].Filesystem, "got info: %+v", info)
require.NotNil(t, info.Stats[0].Filesystem.TotalUsageBytes, "got info: %+v", info.Stats[0].Filesystem)
if *info.Stats[0].Filesystem.TotalUsageBytes >= ddUsage {
if !needsBaseUsageCheck {
pass = true

View File

@ -50,6 +50,42 @@ var eventStorageAgeLimit = flag.String("event_storage_age_limit", "default=24h",
var eventStorageEventLimit = flag.String("event_storage_event_limit", "default=100000", "Max number of events to store (per type). Value is a comma separated list of key values, where the keys are event types (e.g.: creation, oom) or \"default\" and the value is an integer. Default is applied to all non-specified event types")
var applicationMetricsCountLimit = flag.Int("application_metrics_count_limit", 100, "Max number of application metrics to store (per container)")
var (
// Metrics to be ignored.
ignoreMetrics metricSetValue = metricSetValue{container.MetricSet{}}
// List of metrics that can be ignored.
ignoreWhitelist = container.MetricSet{
container.DiskUsageMetrics: struct{}{},
container.NetworkUsageMetrics: struct{}{},
container.NetworkTcpUsageMetrics: struct{}{},
}
)
func init() {
flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of metrics to be disabled. Options are `disk`, `network`, `tcp`. Note: tcp is disabled by default due to high CPU usage.")
// Tcp metrics are ignored by default.
flag.Set("disable_metrics", "tcp")
}
type metricSetValue struct {
container.MetricSet
}
func (ml *metricSetValue) String() string {
return fmt.Sprint(*ml)
}
func (ml *metricSetValue) Set(value string) error {
for _, metric := range strings.Split(value, ",") {
if ignoreWhitelist.Has(container.MetricKind(metric)) {
(*ml).Add(container.MetricKind(metric))
} else {
return fmt.Errorf("unsupported metric %q specified in disable_metrics", metric)
}
}
return nil
}
// The Manager interface defines operations for starting a manager and getting
// container and machine information.
type Manager interface {
@ -147,6 +183,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, maxHousekeepingIn
if _, err := os.Stat("/rootfs/proc"); os.IsNotExist(err) {
inHostNamespace = true
}
newManager := &manager{
containers: make(map[namespacedContainerName]*containerData),
quitChannels: make([]chan error, 0, 2),
@ -157,6 +194,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, maxHousekeepingIn
startupTime: time.Now(),
maxHousekeepingInterval: maxHousekeepingInterval,
allowDynamicHousekeeping: allowDynamicHousekeeping,
ignoreMetrics: ignoreMetrics.MetricSet,
}
machineInfo, err := getMachineInfo(sysfs, fsInfo, inHostNamespace)
@ -199,18 +237,19 @@ type manager struct {
startupTime time.Time
maxHousekeepingInterval time.Duration
allowDynamicHousekeeping bool
ignoreMetrics container.MetricSet
}
// Start the container manager.
func (self *manager) Start() error {
// Register Docker container factory.
err := docker.Register(self, self.fsInfo)
err := docker.Register(self, self.fsInfo, self.ignoreMetrics)
if err != nil {
glog.Errorf("Docker container factory registration failed: %v.", err)
}
// Register the raw driver.
err = raw.Register(self, self.fsInfo)
err = raw.Register(self, self.fsInfo, self.ignoreMetrics)
if err != nil {
glog.Errorf("Registration of the raw container factory failed: %v", err)
}

View File

@ -29,6 +29,7 @@ import (
info "github.com/google/cadvisor/info/v1"
itest "github.com/google/cadvisor/info/v1/test"
"github.com/google/cadvisor/utils/sysfs/fakesysfs"
"github.com/stretchr/testify/assert"
)
// TODO(vmarmol): Refactor these tests.
@ -210,3 +211,7 @@ func TestNewNilManager(t *testing.T) {
t.Fatalf("Expected nil manager to return error")
}
}
func TestTcpMetricsAreDisabledByDefault(t *testing.T) {
assert.True(t, ignoreMetrics.Has(container.NetworkTcpUsageMetrics))
}