diff --git a/container/libcontainer/handler.go b/container/libcontainer/handler.go index 18c465f2..c06ceb3b 100644 --- a/container/libcontainer/handler.go +++ b/container/libcontainer/handler.go @@ -123,6 +123,18 @@ func (h *Handler) GetStats() (*info.ContainerStats, error) { stats.Network.Udp6 = u6 } } + if h.includedMetrics.Has(container.ProcessMetrics) { + paths := h.cgroupManager.GetPaths() + path, ok := paths["cpu"] + if !ok { + glog.V(4).Infof("Could not find cgroups CPU for container %d", h.pid) + } else { + stats.Processes, err = processStatsFromProcs(h.rootFs, path) + if err != nil { + glog.V(4).Infof("Unable to get Process Stats: %v", err) + } + } + } // For backwards compatibility. if len(stats.Network.Interfaces) > 0 { @@ -132,6 +144,41 @@ func (h *Handler) GetStats() (*info.ContainerStats, error) { return stats, nil } +func processStatsFromProcs(rootFs string, cgroupPath string) (info.ProcessStats, error) { + var fdCount uint64 + filePath := path.Join(cgroupPath, "cgroup.procs") + out, err := ioutil.ReadFile(filePath) + if err != nil { + return info.ProcessStats{}, fmt.Errorf("couldn't open cpu cgroup procs file %v : %v", filePath, err) + } + + pids := strings.Split(string(out), "\n") + + // EOL is also treated as a new line while reading "cgroup.procs" file with ioutil.ReadFile. + // The last value is an empty string "". Ex: pids = ["22", "1223", ""] + // Trim the last value + if len(pids) != 0 && pids[len(pids)-1] == "" { + pids = pids[:len(pids)-1] + } + + for _, pid := range pids { + dirPath := path.Join(rootFs, "/proc", pid, "fd") + fds, err := ioutil.ReadDir(dirPath) + if err != nil { + glog.V(4).Infof("error while listing directory %q to measure fd count: %v", dirPath, err) + continue + } + fdCount += uint64(len(fds)) + } + + processStats := info.ProcessStats{ + ProcessCount: uint64(len(pids)), + FdCount: fdCount, + } + + return processStats, nil +} + func schedulerStatsFromProcs(rootFs string, pids []int, pidMetricsCache map[int]*info.CpuSchedstat) (info.CpuSchedstat, error) { for _, pid := range pids { f, err := os.Open(path.Join(rootFs, "proc", strconv.Itoa(pid), "schedstat")) diff --git a/info/v1/container.go b/info/v1/container.go index b3a18528..b54233d5 100644 --- a/info/v1/container.go +++ b/info/v1/container.go @@ -557,6 +557,14 @@ type AcceleratorStats struct { DutyCycle uint64 `json:"duty_cycle"` } +type ProcessStats struct { + // Number of processes + ProcessCount uint64 `json:"process_count"` + + // Number of open file descriptors + FdCount uint64 `json:"fd_count"` +} + type ContainerStats struct { // The time of this stat point. Timestamp time.Time `json:"timestamp"` @@ -574,6 +582,9 @@ type ContainerStats struct { // Metrics for Accelerators. Each Accelerator corresponds to one element in the array. Accelerators []AcceleratorStats `json:"accelerators,omitempty"` + // ProcessStats for Containers + Processes ProcessStats `json:"processes,omitempty"` + // Custom metrics from all collectors CustomMetrics map[string][]MetricVal `json:"custom_metrics,omitempty"` } diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 6f01d606..c88c67c4 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -840,6 +840,26 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri }, }...) } + if includedMetrics.Has(container.ProcessMetrics) { + c.containerMetrics = append(c.containerMetrics, []containerMetric{ + { + name: "container_processes", + help: "Number of processes running inside the container.", + valueType: prometheus.GaugeValue, + getValues: func(s *info.ContainerStats) metricValues { + return metricValues{{value: float64(s.Processes.ProcessCount)}} + }, + }, + { + name: "container_file_descriptors", + help: "Number of open file descriptors for the container.", + valueType: prometheus.GaugeValue, + getValues: func(s *info.ContainerStats) metricValues { + return metricValues{{value: float64(s.Processes.FdCount)}} + }, + }, + }...) + } return c } @@ -932,10 +952,6 @@ func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) } } - psReqOpt := v2.RequestOptions{ - IdType: v2.TypeName, - } - for _, cont := range containers { values := make([]string, 0, len(rawLabels)) labels := make([]string, 0, len(rawLabels)) @@ -969,21 +985,6 @@ func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(cont.Spec.Memory.Reservation), values...) } - if c.includedMetrics.Has(container.ProcessMetrics) { - psList, err := c.infoProvider.GetProcessList(cont.Name, psReqOpt) - if err == nil { - desc = prometheus.NewDesc("container_processes", "Number of processes running inside the container.", labels, nil) - ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(len(psList)), values...) - - var fd int - for _, ps := range psList { - fd += ps.FdCount - } - desc = prometheus.NewDesc("container_file_descriptors", "Number of open file descriptors for the container.", labels, nil) - ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(fd), values...) - } - } - // Now for the actual metrics if len(cont.Stats) == 0 { continue diff --git a/metrics/prometheus_test.go b/metrics/prometheus_test.go index ddaab830..38b5df0f 100644 --- a/metrics/prometheus_test.go +++ b/metrics/prometheus_test.go @@ -252,6 +252,10 @@ func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.Container DutyCycle: 6, }, }, + Processes: info.ProcessStats{ + ProcessCount: 1, + FdCount: 5, + }, TaskStats: info.LoadStats{ NrSleeping: 50, NrRunning: 51,