Features:

Added Network Stats to Container Info. It still not exposed via the HTTP UI.
Bug fixes:
1. Modified docker handler to use libcontainer.GetStats instead of quering the fs package.
2. cAdvisor will not stall if any of its core operations fail.
3. cAdvisor will safely ignore any inactive or leaked docker containers. When containers are leaked cgroup state exists but docker is not aware of them.
This commit is contained in:
Vishnu Kannan 2014-07-19 00:34:55 +00:00
parent bc23a92932
commit 1edb798de5
9 changed files with 123 additions and 58 deletions

View File

@ -83,11 +83,15 @@ func main() {
} }
}) })
go containerManager.Start() errChan := make(chan error)
go containerManager.Start(errChan)
log.Printf("Starting cAdvisor version: %q", info.VERSION) log.Printf("Starting cAdvisor version: %q", info.VERSION)
log.Print("About to serve on port ", *argPort) log.Print("About to serve on port ", *argPort)
addr := fmt.Sprintf(":%v", *argPort) addr := fmt.Sprintf(":%v", *argPort)
log.Fatal(http.ListenAndServe(addr, nil)) go func() {
errChan <- http.ListenAndServe(addr, nil)
}()
log.Fatal(<-errChan)
} }

View File

@ -14,7 +14,11 @@
package container package container
import "github.com/google/cadvisor/info" import (
"errors"
"github.com/google/cadvisor/info"
)
// Listing types. // Listing types.
const ( const (
@ -22,6 +26,8 @@ const (
LIST_RECURSIVE LIST_RECURSIVE
) )
var NotActive = errors.New("Container is not active")
type ListType int type ListType int
// Interface for container operation handlers. // Interface for container operation handlers.

View File

@ -26,13 +26,16 @@ import (
"time" "time"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups"
"github.com/fsouza/go-dockerclient" "github.com/fsouza/go-dockerclient"
"github.com/google/cadvisor/container" "github.com/google/cadvisor/container"
containerLibcontainer "github.com/google/cadvisor/container/libcontainer" containerLibcontainer "github.com/google/cadvisor/container/libcontainer"
"github.com/google/cadvisor/info" "github.com/google/cadvisor/info"
"github.com/google/cadvisor/utils"
) )
// Basepath to all container specific information that libcontainer stores.
const dockerRootDir = "/var/lib/docker/execdriver/native"
type dockerContainerHandler struct { type dockerContainerHandler struct {
client *docker.Client client *docker.Client
name string name string
@ -61,8 +64,9 @@ func newDockerContainerHandler(
return nil, fmt.Errorf("invalid docker container %v: %v", name, err) return nil, fmt.Errorf("invalid docker container %v: %v", name, err)
} }
ctnr, err := client.InspectContainer(id) ctnr, err := client.InspectContainer(id)
if err != nil { // We assume that if Inspect fails then the container is not known to docker.
return nil, fmt.Errorf("unable to inspect container %v: %v", name, err) if err != nil || !ctnr.State.Running {
return nil, container.NotActive
} }
handler.aliases = append(handler.aliases, path.Join("/docker", ctnr.Name)) handler.aliases = append(handler.aliases, path.Join("/docker", ctnr.Name))
return handler, nil return handler, nil
@ -125,21 +129,50 @@ func (self *dockerContainerHandler) isDockerContainer() bool {
} }
// TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API. // TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API.
func readLibcontainerSpec(id string) (spec *libcontainer.Config, err error) { func readLibcontainerConfig(id string) (config *libcontainer.Config, err error) {
dir := "/var/lib/docker/execdriver/native" configPath := path.Join(dockerRootDir, id, "container.json")
configPath := path.Join(dir, id, "container.json") if !utils.FileExists(configPath) {
err = container.NotActive
return
}
f, err := os.Open(configPath) f, err := os.Open(configPath)
if err != nil { if err != nil {
return return
} }
defer f.Close() defer f.Close()
d := json.NewDecoder(f) d := json.NewDecoder(f)
ret := new(libcontainer.Config) retConfig := new(libcontainer.Config)
err = d.Decode(ret) err = d.Decode(retConfig)
if err != nil { if err != nil {
return return
} }
spec = ret config = retConfig
return
}
func readLibcontainerState(id string) (state *libcontainer.State, err error) {
statePath := path.Join(dockerRootDir, id, "state.json")
if !utils.FileExists(statePath) {
err = container.NotActive
return
}
f, err := os.Open(statePath)
if err != nil {
if os.IsNotExist(err) {
err = container.NotActive
}
return
}
defer f.Close()
d := json.NewDecoder(f)
retState := new(libcontainer.State)
err = d.Decode(retState)
if err != nil {
return
}
state = retState
return return
} }
@ -183,12 +216,12 @@ func (self *dockerContainerHandler) GetSpec() (spec *info.ContainerSpec, err err
if err != nil { if err != nil {
return return
} }
libcontainerSpec, err := readLibcontainerSpec(id) libcontainerConfig, err := readLibcontainerConfig(id)
if err != nil { if err != nil {
return return
} }
spec = libcontainerConfigToContainerSpec(libcontainerSpec, mi) spec = libcontainerConfigToContainerSpec(libcontainerConfig, mi)
return return
} }
@ -199,15 +232,20 @@ func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err
stats.Timestamp = time.Now() stats.Timestamp = time.Now()
return return
} }
parent, id, err := self.splitName() _, id, err := self.splitName()
if err != nil { if err != nil {
return return
} }
cg := &cgroups.Cgroup{ config, err := readLibcontainerConfig(id)
Parent: parent, if err != nil {
Name: id, return
} }
return containerLibcontainer.GetStats(cg, self.useSystemd) state, err := readLibcontainerState(id)
if err != nil {
return
}
return containerLibcontainer.GetStats(config, state)
} }
func (self *dockerContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) { func (self *dockerContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
@ -215,7 +253,7 @@ func (self *dockerContainerHandler) ListContainers(listType container.ListType)
return nil, nil return nil, nil
} }
if self.isRootContainer() && listType == container.LIST_SELF { if self.isRootContainer() && listType == container.LIST_SELF {
return []info.ContainerReference{info.ContainerReference{Name: "/docker"}}, nil return []info.ContainerReference{{Name: "/docker"}}, nil
} }
opt := docker.ListContainersOptions{ opt := docker.ListContainersOptions{
All: true, All: true,

View File

@ -60,9 +60,9 @@ func TestWhiteListContainerFilter(t *testing.T) {
mockc := &mockContainerHandler{} mockc := &mockContainerHandler{}
mockc.On("ListContainers", LIST_RECURSIVE).Return( mockc.On("ListContainers", LIST_RECURSIVE).Return(
[]info.ContainerReference{ []info.ContainerReference{
info.ContainerReference{Name: "/docker/ee0103"}, {Name: "/docker/ee0103"},
info.ContainerReference{Name: "/container/created/by/lmctfy"}, {Name: "/container/created/by/lmctfy"},
info.ContainerReference{Name: "/user/something"}, {Name: "/user/something"},
}, },
nil, nil,
) )
@ -95,9 +95,9 @@ func TestBlackListContainerFilter(t *testing.T) {
mockc := &mockContainerHandler{} mockc := &mockContainerHandler{}
mockc.On("ListContainers", LIST_RECURSIVE).Return( mockc.On("ListContainers", LIST_RECURSIVE).Return(
[]info.ContainerReference{ []info.ContainerReference{
info.ContainerReference{Name: "/docker/ee0103"}, {Name: "/docker/ee0103"},
info.ContainerReference{Name: "/container/created/by/lmctfy"}, {Name: "/container/created/by/lmctfy"},
info.ContainerReference{Name: "/user/something"}, {Name: "/user/something"},
}, },
nil, nil,
) )

View File

@ -3,33 +3,23 @@ package libcontainer
import ( import (
"time" "time"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd"
"github.com/google/cadvisor/info" "github.com/google/cadvisor/info"
) )
// Get stats of the specified cgroup // Get stats of the specified cgroup
func GetStats(cgroup *cgroups.Cgroup, useSystemd bool) (*info.ContainerStats, error) { func GetStats(config *libcontainer.Config, state *libcontainer.State) (*info.ContainerStats, error) {
// TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready. // TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready.
// Use systemd paths if systemd is being used. libcontainerStats, err := libcontainer.GetStats(config, state)
var (
s *cgroups.Stats
err error
)
if useSystemd {
s, err = systemd.GetStats(cgroup)
} else {
s, err = fs.GetStats(cgroup)
}
if err != nil { if err != nil {
return nil, err return nil, err
} }
return toContainerStats(s), nil return toContainerStats(libcontainerStats), nil
} }
// Convert libcontainer stats to info.ContainerStats. // Convert libcontainer stats to info.ContainerStats.
func toContainerStats(s *cgroups.Stats) *info.ContainerStats { func toContainerStats(libcontainerStats *libcontainer.ContainerStats) *info.ContainerStats {
s := libcontainerStats.CgroupStats
ret := new(info.ContainerStats) ret := new(info.ContainerStats)
ret.Timestamp = time.Now() ret.Timestamp = time.Now()
ret.Cpu = new(info.CpuStats) ret.Cpu = new(info.CpuStats)
@ -59,5 +49,6 @@ func toContainerStats(s *cgroups.Stats) *info.ContainerStats {
ret.Memory.WorkingSet -= v ret.Memory.WorkingSet -= v
} }
} }
ret.Network = (*info.NetworkStats)(&libcontainerStats.NetworkStats)
return ret return ret
} }

View File

@ -96,7 +96,7 @@ func (self *rawContainerHandler) ListContainers(listType container.ListType) ([]
// Make into container references. // Make into container references.
ret := make([]info.ContainerReference, 0, len(containers)) ret := make([]info.ContainerReference, 0, len(containers))
for cont, _ := range containers { for cont := range containers {
ret = append(ret, info.ContainerReference{ ret = append(ret, info.ContainerReference{
Name: cont, Name: cont,
}) })

View File

@ -239,11 +239,31 @@ type MemoryStatsMemoryData struct {
Pgmajfault uint64 `json:"pgmajfault,omitempty"` Pgmajfault uint64 `json:"pgmajfault,omitempty"`
} }
type NetworkStats struct {
// Cumulative count of bytes received.
RxBytes uint64 `json:"rx_bytes,omitempty"`
// Cumulative count of packets received.
RxPackets uint64 `json:"rx_packets,omitempty"`
// Cumulative count of receive errors encountered.
RxErrors uint64 `json:"rx_errors,omitempty"`
// Cumulative count of packets dropped while receiving.
RxDropped uint64 `json:"rx_dropped,omitempty"`
// Cumulative count of bytes transmitted.
TxBytes uint64 `json:"tx_bytes,omitempty"`
// Cumulative count of packets transmitted.
TxPackets uint64 `json:"tx_packets,omitempty"`
// Cumulative count of transmit errors encountered.
TxErrors uint64 `json:"tx_errors,omitempty"`
// Cumulative count of packets dropped while transmitting.
TxDropped uint64 `json:"tx_dropped,omitempty"`
}
type ContainerStats struct { type ContainerStats struct {
// The time of this stat point. // The time of this stat point.
Timestamp time.Time `json:"timestamp"` Timestamp time.Time `json:"timestamp"`
Cpu *CpuStats `json:"cpu,omitempty"` Cpu *CpuStats `json:"cpu,omitempty"`
Memory *MemoryStats `json:"memory,omitempty"` Memory *MemoryStats `json:"memory,omitempty"`
Network *NetworkStats `json:"network,omitempty"`
} }
// Makes a deep copy of the ContainerStats and returns a pointer to the new // Makes a deep copy of the ContainerStats and returns a pointer to the new

View File

@ -127,7 +127,7 @@ func (c *containerData) housekeeping() {
func (c *containerData) housekeepingTick() { func (c *containerData) housekeepingTick() {
err := c.updateStats() err := c.updateStats()
if err != nil { if err != nil && err != container.NotActive {
log.Printf("Failed to update stats for container \"%s\": %s", c.info.Name, err) log.Printf("Failed to update stats for container \"%s\": %s", c.info.Name, err)
} }
} }

View File

@ -27,7 +27,7 @@ import (
type Manager interface { type Manager interface {
// Start the manager, blocks forever. // Start the manager, blocks forever.
Start() error Start(chanErr chan error)
// Get information about a container. // Get information about a container.
GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error)
@ -73,16 +73,18 @@ type manager struct {
} }
// Start the container manager. // Start the container manager.
func (m *manager) Start() error { func (m *manager) Start(errChan chan error) {
// Create root and then recover all containers. // Create root and then recover all containers.
_, err := m.createContainer("/") _, err := m.createContainer("/")
if err != nil { if err != nil {
return err errChan <- err
return
} }
log.Printf("Starting recovery of all containers") log.Printf("Starting recovery of all containers")
err = m.detectContainers() err = m.detectContainers()
if err != nil { if err != nil {
return err errChan <- err
return
} }
log.Printf("Recovery completed") log.Printf("Recovery completed")
@ -102,7 +104,7 @@ func (m *manager) Start() error {
log.Printf("Global Housekeeping(%d) took %s", t.Unix(), duration) log.Printf("Global Housekeeping(%d) took %s", t.Unix(), duration)
} }
} }
return nil errChan <- nil
} }
// Get a container by name. // Get a container by name.
@ -281,18 +283,22 @@ func (m *manager) detectContainers() error {
} }
// Add the new containers. // Add the new containers.
for _, container := range added { for _, cont := range added {
_, err = m.createContainer(container.Name) _, err = m.createContainer(cont.Name)
if err != nil { if err != nil {
return fmt.Errorf("Failed to create existing container: %s: %s", container.Name, err) if err != container.NotActive {
log.Printf("failed to create existing container: %s: %s", cont.Name, err)
}
} }
} }
// Remove the old containers. // Remove the old containers.
for _, container := range removed { for _, cont := range removed {
err = m.destroyContainer(container.Name) err = m.destroyContainer(cont.Name)
if err != nil { if err != nil {
return fmt.Errorf("Failed to destroy existing container: %s: %s", container.Name, err) if err != container.NotActive {
log.Printf("failed to destroy existing container: %s: %s", cont.Name, err)
}
} }
} }