From 1edb798de5ee35745a97592e97ccd2d3a7b89f17 Mon Sep 17 00:00:00 2001 From: Vishnu Kannan Date: Sat, 19 Jul 2014 00:34:55 +0000 Subject: [PATCH] Features: Added Network Stats to Container Info. It still not exposed via the HTTP UI. Bug fixes: 1. Modified docker handler to use libcontainer.GetStats instead of quering the fs package. 2. cAdvisor will not stall if any of its core operations fail. 3. cAdvisor will safely ignore any inactive or leaked docker containers. When containers are leaked cgroup state exists but docker is not aware of them. --- cadvisor.go | 8 +++- container/container.go | 8 +++- container/docker/handler.go | 72 +++++++++++++++++++++++-------- container/filter_test.go | 12 +++--- container/libcontainer/helpers.go | 23 +++------- container/raw/handler.go | 2 +- info/container.go | 26 +++++++++-- manager/container.go | 2 +- manager/manager.go | 28 +++++++----- 9 files changed, 123 insertions(+), 58 deletions(-) diff --git a/cadvisor.go b/cadvisor.go index 31b18f71..4d60300d 100644 --- a/cadvisor.go +++ b/cadvisor.go @@ -83,11 +83,15 @@ func main() { } }) - go containerManager.Start() + errChan := make(chan error) + go containerManager.Start(errChan) log.Printf("Starting cAdvisor version: %q", info.VERSION) log.Print("About to serve on port ", *argPort) addr := fmt.Sprintf(":%v", *argPort) - log.Fatal(http.ListenAndServe(addr, nil)) + go func() { + errChan <- http.ListenAndServe(addr, nil) + }() + log.Fatal(<-errChan) } diff --git a/container/container.go b/container/container.go index de478fd0..efe92397 100644 --- a/container/container.go +++ b/container/container.go @@ -14,7 +14,11 @@ package container -import "github.com/google/cadvisor/info" +import ( + "errors" + + "github.com/google/cadvisor/info" +) // Listing types. const ( @@ -22,6 +26,8 @@ const ( LIST_RECURSIVE ) +var NotActive = errors.New("Container is not active") + type ListType int // Interface for container operation handlers. diff --git a/container/docker/handler.go b/container/docker/handler.go index b689e3f8..77eb035f 100644 --- a/container/docker/handler.go +++ b/container/docker/handler.go @@ -26,13 +26,16 @@ import ( "time" "github.com/docker/libcontainer" - "github.com/docker/libcontainer/cgroups" "github.com/fsouza/go-dockerclient" "github.com/google/cadvisor/container" containerLibcontainer "github.com/google/cadvisor/container/libcontainer" "github.com/google/cadvisor/info" + "github.com/google/cadvisor/utils" ) +// Basepath to all container specific information that libcontainer stores. +const dockerRootDir = "/var/lib/docker/execdriver/native" + type dockerContainerHandler struct { client *docker.Client name string @@ -61,8 +64,9 @@ func newDockerContainerHandler( return nil, fmt.Errorf("invalid docker container %v: %v", name, err) } ctnr, err := client.InspectContainer(id) - if err != nil { - return nil, fmt.Errorf("unable to inspect container %v: %v", name, err) + // We assume that if Inspect fails then the container is not known to docker. + if err != nil || !ctnr.State.Running { + return nil, container.NotActive } handler.aliases = append(handler.aliases, path.Join("/docker", ctnr.Name)) return handler, nil @@ -125,21 +129,50 @@ func (self *dockerContainerHandler) isDockerContainer() bool { } // TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API. -func readLibcontainerSpec(id string) (spec *libcontainer.Config, err error) { - dir := "/var/lib/docker/execdriver/native" - configPath := path.Join(dir, id, "container.json") +func readLibcontainerConfig(id string) (config *libcontainer.Config, err error) { + configPath := path.Join(dockerRootDir, id, "container.json") + if !utils.FileExists(configPath) { + err = container.NotActive + return + } f, err := os.Open(configPath) if err != nil { return } defer f.Close() d := json.NewDecoder(f) - ret := new(libcontainer.Config) - err = d.Decode(ret) + retConfig := new(libcontainer.Config) + err = d.Decode(retConfig) if err != nil { return } - spec = ret + config = retConfig + + return +} + +func readLibcontainerState(id string) (state *libcontainer.State, err error) { + statePath := path.Join(dockerRootDir, id, "state.json") + if !utils.FileExists(statePath) { + err = container.NotActive + return + } + f, err := os.Open(statePath) + if err != nil { + if os.IsNotExist(err) { + err = container.NotActive + } + return + } + defer f.Close() + d := json.NewDecoder(f) + retState := new(libcontainer.State) + err = d.Decode(retState) + if err != nil { + return + } + state = retState + return } @@ -183,12 +216,12 @@ func (self *dockerContainerHandler) GetSpec() (spec *info.ContainerSpec, err err if err != nil { return } - libcontainerSpec, err := readLibcontainerSpec(id) + libcontainerConfig, err := readLibcontainerConfig(id) if err != nil { return } - spec = libcontainerConfigToContainerSpec(libcontainerSpec, mi) + spec = libcontainerConfigToContainerSpec(libcontainerConfig, mi) return } @@ -199,15 +232,20 @@ func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err stats.Timestamp = time.Now() return } - parent, id, err := self.splitName() + _, id, err := self.splitName() if err != nil { return } - cg := &cgroups.Cgroup{ - Parent: parent, - Name: id, + config, err := readLibcontainerConfig(id) + if err != nil { + return } - return containerLibcontainer.GetStats(cg, self.useSystemd) + state, err := readLibcontainerState(id) + if err != nil { + return + } + + return containerLibcontainer.GetStats(config, state) } func (self *dockerContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) { @@ -215,7 +253,7 @@ func (self *dockerContainerHandler) ListContainers(listType container.ListType) return nil, nil } if self.isRootContainer() && listType == container.LIST_SELF { - return []info.ContainerReference{info.ContainerReference{Name: "/docker"}}, nil + return []info.ContainerReference{{Name: "/docker"}}, nil } opt := docker.ListContainersOptions{ All: true, diff --git a/container/filter_test.go b/container/filter_test.go index 5e8ef538..0c6e9efa 100644 --- a/container/filter_test.go +++ b/container/filter_test.go @@ -60,9 +60,9 @@ func TestWhiteListContainerFilter(t *testing.T) { mockc := &mockContainerHandler{} mockc.On("ListContainers", LIST_RECURSIVE).Return( []info.ContainerReference{ - info.ContainerReference{Name: "/docker/ee0103"}, - info.ContainerReference{Name: "/container/created/by/lmctfy"}, - info.ContainerReference{Name: "/user/something"}, + {Name: "/docker/ee0103"}, + {Name: "/container/created/by/lmctfy"}, + {Name: "/user/something"}, }, nil, ) @@ -95,9 +95,9 @@ func TestBlackListContainerFilter(t *testing.T) { mockc := &mockContainerHandler{} mockc.On("ListContainers", LIST_RECURSIVE).Return( []info.ContainerReference{ - info.ContainerReference{Name: "/docker/ee0103"}, - info.ContainerReference{Name: "/container/created/by/lmctfy"}, - info.ContainerReference{Name: "/user/something"}, + {Name: "/docker/ee0103"}, + {Name: "/container/created/by/lmctfy"}, + {Name: "/user/something"}, }, nil, ) diff --git a/container/libcontainer/helpers.go b/container/libcontainer/helpers.go index 957be091..99b43994 100644 --- a/container/libcontainer/helpers.go +++ b/container/libcontainer/helpers.go @@ -3,33 +3,23 @@ package libcontainer import ( "time" - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/cgroups/fs" - "github.com/docker/libcontainer/cgroups/systemd" + "github.com/docker/libcontainer" "github.com/google/cadvisor/info" ) // Get stats of the specified cgroup -func GetStats(cgroup *cgroups.Cgroup, useSystemd bool) (*info.ContainerStats, error) { +func GetStats(config *libcontainer.Config, state *libcontainer.State) (*info.ContainerStats, error) { // TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready. - // Use systemd paths if systemd is being used. - var ( - s *cgroups.Stats - err error - ) - if useSystemd { - s, err = systemd.GetStats(cgroup) - } else { - s, err = fs.GetStats(cgroup) - } + libcontainerStats, err := libcontainer.GetStats(config, state) if err != nil { return nil, err } - return toContainerStats(s), nil + return toContainerStats(libcontainerStats), nil } // Convert libcontainer stats to info.ContainerStats. -func toContainerStats(s *cgroups.Stats) *info.ContainerStats { +func toContainerStats(libcontainerStats *libcontainer.ContainerStats) *info.ContainerStats { + s := libcontainerStats.CgroupStats ret := new(info.ContainerStats) ret.Timestamp = time.Now() ret.Cpu = new(info.CpuStats) @@ -59,5 +49,6 @@ func toContainerStats(s *cgroups.Stats) *info.ContainerStats { ret.Memory.WorkingSet -= v } } + ret.Network = (*info.NetworkStats)(&libcontainerStats.NetworkStats) return ret } diff --git a/container/raw/handler.go b/container/raw/handler.go index 47251276..f7948946 100644 --- a/container/raw/handler.go +++ b/container/raw/handler.go @@ -96,7 +96,7 @@ func (self *rawContainerHandler) ListContainers(listType container.ListType) ([] // Make into container references. ret := make([]info.ContainerReference, 0, len(containers)) - for cont, _ := range containers { + for cont := range containers { ret = append(ret, info.ContainerReference{ Name: cont, }) diff --git a/info/container.go b/info/container.go index 57ba8141..194f77a3 100644 --- a/info/container.go +++ b/info/container.go @@ -239,11 +239,31 @@ type MemoryStatsMemoryData struct { Pgmajfault uint64 `json:"pgmajfault,omitempty"` } +type NetworkStats struct { + // Cumulative count of bytes received. + RxBytes uint64 `json:"rx_bytes,omitempty"` + // Cumulative count of packets received. + RxPackets uint64 `json:"rx_packets,omitempty"` + // Cumulative count of receive errors encountered. + RxErrors uint64 `json:"rx_errors,omitempty"` + // Cumulative count of packets dropped while receiving. + RxDropped uint64 `json:"rx_dropped,omitempty"` + // Cumulative count of bytes transmitted. + TxBytes uint64 `json:"tx_bytes,omitempty"` + // Cumulative count of packets transmitted. + TxPackets uint64 `json:"tx_packets,omitempty"` + // Cumulative count of transmit errors encountered. + TxErrors uint64 `json:"tx_errors,omitempty"` + // Cumulative count of packets dropped while transmitting. + TxDropped uint64 `json:"tx_dropped,omitempty"` +} + type ContainerStats struct { // The time of this stat point. - Timestamp time.Time `json:"timestamp"` - Cpu *CpuStats `json:"cpu,omitempty"` - Memory *MemoryStats `json:"memory,omitempty"` + Timestamp time.Time `json:"timestamp"` + Cpu *CpuStats `json:"cpu,omitempty"` + Memory *MemoryStats `json:"memory,omitempty"` + Network *NetworkStats `json:"network,omitempty"` } // Makes a deep copy of the ContainerStats and returns a pointer to the new diff --git a/manager/container.go b/manager/container.go index b366c9d6..60f641fc 100644 --- a/manager/container.go +++ b/manager/container.go @@ -127,7 +127,7 @@ func (c *containerData) housekeeping() { func (c *containerData) housekeepingTick() { err := c.updateStats() - if err != nil { + if err != nil && err != container.NotActive { log.Printf("Failed to update stats for container \"%s\": %s", c.info.Name, err) } } diff --git a/manager/manager.go b/manager/manager.go index 696e0bad..041599a4 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -27,7 +27,7 @@ import ( type Manager interface { // Start the manager, blocks forever. - Start() error + Start(chanErr chan error) // Get information about a container. GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) @@ -73,16 +73,18 @@ type manager struct { } // Start the container manager. -func (m *manager) Start() error { +func (m *manager) Start(errChan chan error) { // Create root and then recover all containers. _, err := m.createContainer("/") if err != nil { - return err + errChan <- err + return } log.Printf("Starting recovery of all containers") err = m.detectContainers() if err != nil { - return err + errChan <- err + return } log.Printf("Recovery completed") @@ -102,7 +104,7 @@ func (m *manager) Start() error { log.Printf("Global Housekeeping(%d) took %s", t.Unix(), duration) } } - return nil + errChan <- nil } // Get a container by name. @@ -281,18 +283,22 @@ func (m *manager) detectContainers() error { } // Add the new containers. - for _, container := range added { - _, err = m.createContainer(container.Name) + for _, cont := range added { + _, err = m.createContainer(cont.Name) if err != nil { - return fmt.Errorf("Failed to create existing container: %s: %s", container.Name, err) + if err != container.NotActive { + log.Printf("failed to create existing container: %s: %s", cont.Name, err) + } } } // Remove the old containers. - for _, container := range removed { - err = m.destroyContainer(container.Name) + for _, cont := range removed { + err = m.destroyContainer(cont.Name) if err != nil { - return fmt.Errorf("Failed to destroy existing container: %s: %s", container.Name, err) + if err != container.NotActive { + log.Printf("failed to destroy existing container: %s: %s", cont.Name, err) + } } }