Features:

Added Network Stats to Container Info. It still not exposed via the HTTP UI.
Bug fixes:
1. Modified docker handler to use libcontainer.GetStats instead of quering the fs package.
2. cAdvisor will not stall if any of its core operations fail.
3. cAdvisor will safely ignore any inactive or leaked docker containers. When containers are leaked cgroup state exists but docker is not aware of them.
This commit is contained in:
Vishnu Kannan 2014-07-19 00:34:55 +00:00
parent bc23a92932
commit 1edb798de5
9 changed files with 123 additions and 58 deletions

View File

@ -83,11 +83,15 @@ func main() {
}
})
go containerManager.Start()
errChan := make(chan error)
go containerManager.Start(errChan)
log.Printf("Starting cAdvisor version: %q", info.VERSION)
log.Print("About to serve on port ", *argPort)
addr := fmt.Sprintf(":%v", *argPort)
log.Fatal(http.ListenAndServe(addr, nil))
go func() {
errChan <- http.ListenAndServe(addr, nil)
}()
log.Fatal(<-errChan)
}

View File

@ -14,7 +14,11 @@
package container
import "github.com/google/cadvisor/info"
import (
"errors"
"github.com/google/cadvisor/info"
)
// Listing types.
const (
@ -22,6 +26,8 @@ const (
LIST_RECURSIVE
)
var NotActive = errors.New("Container is not active")
type ListType int
// Interface for container operation handlers.

View File

@ -26,13 +26,16 @@ import (
"time"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups"
"github.com/fsouza/go-dockerclient"
"github.com/google/cadvisor/container"
containerLibcontainer "github.com/google/cadvisor/container/libcontainer"
"github.com/google/cadvisor/info"
"github.com/google/cadvisor/utils"
)
// Basepath to all container specific information that libcontainer stores.
const dockerRootDir = "/var/lib/docker/execdriver/native"
type dockerContainerHandler struct {
client *docker.Client
name string
@ -61,8 +64,9 @@ func newDockerContainerHandler(
return nil, fmt.Errorf("invalid docker container %v: %v", name, err)
}
ctnr, err := client.InspectContainer(id)
if err != nil {
return nil, fmt.Errorf("unable to inspect container %v: %v", name, err)
// We assume that if Inspect fails then the container is not known to docker.
if err != nil || !ctnr.State.Running {
return nil, container.NotActive
}
handler.aliases = append(handler.aliases, path.Join("/docker", ctnr.Name))
return handler, nil
@ -125,21 +129,50 @@ func (self *dockerContainerHandler) isDockerContainer() bool {
}
// TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API.
func readLibcontainerSpec(id string) (spec *libcontainer.Config, err error) {
dir := "/var/lib/docker/execdriver/native"
configPath := path.Join(dir, id, "container.json")
func readLibcontainerConfig(id string) (config *libcontainer.Config, err error) {
configPath := path.Join(dockerRootDir, id, "container.json")
if !utils.FileExists(configPath) {
err = container.NotActive
return
}
f, err := os.Open(configPath)
if err != nil {
return
}
defer f.Close()
d := json.NewDecoder(f)
ret := new(libcontainer.Config)
err = d.Decode(ret)
retConfig := new(libcontainer.Config)
err = d.Decode(retConfig)
if err != nil {
return
}
spec = ret
config = retConfig
return
}
func readLibcontainerState(id string) (state *libcontainer.State, err error) {
statePath := path.Join(dockerRootDir, id, "state.json")
if !utils.FileExists(statePath) {
err = container.NotActive
return
}
f, err := os.Open(statePath)
if err != nil {
if os.IsNotExist(err) {
err = container.NotActive
}
return
}
defer f.Close()
d := json.NewDecoder(f)
retState := new(libcontainer.State)
err = d.Decode(retState)
if err != nil {
return
}
state = retState
return
}
@ -183,12 +216,12 @@ func (self *dockerContainerHandler) GetSpec() (spec *info.ContainerSpec, err err
if err != nil {
return
}
libcontainerSpec, err := readLibcontainerSpec(id)
libcontainerConfig, err := readLibcontainerConfig(id)
if err != nil {
return
}
spec = libcontainerConfigToContainerSpec(libcontainerSpec, mi)
spec = libcontainerConfigToContainerSpec(libcontainerConfig, mi)
return
}
@ -199,15 +232,20 @@ func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err
stats.Timestamp = time.Now()
return
}
parent, id, err := self.splitName()
_, id, err := self.splitName()
if err != nil {
return
}
cg := &cgroups.Cgroup{
Parent: parent,
Name: id,
config, err := readLibcontainerConfig(id)
if err != nil {
return
}
return containerLibcontainer.GetStats(cg, self.useSystemd)
state, err := readLibcontainerState(id)
if err != nil {
return
}
return containerLibcontainer.GetStats(config, state)
}
func (self *dockerContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
@ -215,7 +253,7 @@ func (self *dockerContainerHandler) ListContainers(listType container.ListType)
return nil, nil
}
if self.isRootContainer() && listType == container.LIST_SELF {
return []info.ContainerReference{info.ContainerReference{Name: "/docker"}}, nil
return []info.ContainerReference{{Name: "/docker"}}, nil
}
opt := docker.ListContainersOptions{
All: true,

View File

@ -60,9 +60,9 @@ func TestWhiteListContainerFilter(t *testing.T) {
mockc := &mockContainerHandler{}
mockc.On("ListContainers", LIST_RECURSIVE).Return(
[]info.ContainerReference{
info.ContainerReference{Name: "/docker/ee0103"},
info.ContainerReference{Name: "/container/created/by/lmctfy"},
info.ContainerReference{Name: "/user/something"},
{Name: "/docker/ee0103"},
{Name: "/container/created/by/lmctfy"},
{Name: "/user/something"},
},
nil,
)
@ -95,9 +95,9 @@ func TestBlackListContainerFilter(t *testing.T) {
mockc := &mockContainerHandler{}
mockc.On("ListContainers", LIST_RECURSIVE).Return(
[]info.ContainerReference{
info.ContainerReference{Name: "/docker/ee0103"},
info.ContainerReference{Name: "/container/created/by/lmctfy"},
info.ContainerReference{Name: "/user/something"},
{Name: "/docker/ee0103"},
{Name: "/container/created/by/lmctfy"},
{Name: "/user/something"},
},
nil,
)

View File

@ -3,33 +3,23 @@ package libcontainer
import (
"time"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd"
"github.com/docker/libcontainer"
"github.com/google/cadvisor/info"
)
// Get stats of the specified cgroup
func GetStats(cgroup *cgroups.Cgroup, useSystemd bool) (*info.ContainerStats, error) {
func GetStats(config *libcontainer.Config, state *libcontainer.State) (*info.ContainerStats, error) {
// TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready.
// Use systemd paths if systemd is being used.
var (
s *cgroups.Stats
err error
)
if useSystemd {
s, err = systemd.GetStats(cgroup)
} else {
s, err = fs.GetStats(cgroup)
}
libcontainerStats, err := libcontainer.GetStats(config, state)
if err != nil {
return nil, err
}
return toContainerStats(s), nil
return toContainerStats(libcontainerStats), nil
}
// Convert libcontainer stats to info.ContainerStats.
func toContainerStats(s *cgroups.Stats) *info.ContainerStats {
func toContainerStats(libcontainerStats *libcontainer.ContainerStats) *info.ContainerStats {
s := libcontainerStats.CgroupStats
ret := new(info.ContainerStats)
ret.Timestamp = time.Now()
ret.Cpu = new(info.CpuStats)
@ -59,5 +49,6 @@ func toContainerStats(s *cgroups.Stats) *info.ContainerStats {
ret.Memory.WorkingSet -= v
}
}
ret.Network = (*info.NetworkStats)(&libcontainerStats.NetworkStats)
return ret
}

View File

@ -96,7 +96,7 @@ func (self *rawContainerHandler) ListContainers(listType container.ListType) ([]
// Make into container references.
ret := make([]info.ContainerReference, 0, len(containers))
for cont, _ := range containers {
for cont := range containers {
ret = append(ret, info.ContainerReference{
Name: cont,
})

View File

@ -239,11 +239,31 @@ type MemoryStatsMemoryData struct {
Pgmajfault uint64 `json:"pgmajfault,omitempty"`
}
type NetworkStats struct {
// Cumulative count of bytes received.
RxBytes uint64 `json:"rx_bytes,omitempty"`
// Cumulative count of packets received.
RxPackets uint64 `json:"rx_packets,omitempty"`
// Cumulative count of receive errors encountered.
RxErrors uint64 `json:"rx_errors,omitempty"`
// Cumulative count of packets dropped while receiving.
RxDropped uint64 `json:"rx_dropped,omitempty"`
// Cumulative count of bytes transmitted.
TxBytes uint64 `json:"tx_bytes,omitempty"`
// Cumulative count of packets transmitted.
TxPackets uint64 `json:"tx_packets,omitempty"`
// Cumulative count of transmit errors encountered.
TxErrors uint64 `json:"tx_errors,omitempty"`
// Cumulative count of packets dropped while transmitting.
TxDropped uint64 `json:"tx_dropped,omitempty"`
}
type ContainerStats struct {
// The time of this stat point.
Timestamp time.Time `json:"timestamp"`
Cpu *CpuStats `json:"cpu,omitempty"`
Memory *MemoryStats `json:"memory,omitempty"`
Timestamp time.Time `json:"timestamp"`
Cpu *CpuStats `json:"cpu,omitempty"`
Memory *MemoryStats `json:"memory,omitempty"`
Network *NetworkStats `json:"network,omitempty"`
}
// Makes a deep copy of the ContainerStats and returns a pointer to the new

View File

@ -127,7 +127,7 @@ func (c *containerData) housekeeping() {
func (c *containerData) housekeepingTick() {
err := c.updateStats()
if err != nil {
if err != nil && err != container.NotActive {
log.Printf("Failed to update stats for container \"%s\": %s", c.info.Name, err)
}
}

View File

@ -27,7 +27,7 @@ import (
type Manager interface {
// Start the manager, blocks forever.
Start() error
Start(chanErr chan error)
// Get information about a container.
GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error)
@ -73,16 +73,18 @@ type manager struct {
}
// Start the container manager.
func (m *manager) Start() error {
func (m *manager) Start(errChan chan error) {
// Create root and then recover all containers.
_, err := m.createContainer("/")
if err != nil {
return err
errChan <- err
return
}
log.Printf("Starting recovery of all containers")
err = m.detectContainers()
if err != nil {
return err
errChan <- err
return
}
log.Printf("Recovery completed")
@ -102,7 +104,7 @@ func (m *manager) Start() error {
log.Printf("Global Housekeeping(%d) took %s", t.Unix(), duration)
}
}
return nil
errChan <- nil
}
// Get a container by name.
@ -281,18 +283,22 @@ func (m *manager) detectContainers() error {
}
// Add the new containers.
for _, container := range added {
_, err = m.createContainer(container.Name)
for _, cont := range added {
_, err = m.createContainer(cont.Name)
if err != nil {
return fmt.Errorf("Failed to create existing container: %s: %s", container.Name, err)
if err != container.NotActive {
log.Printf("failed to create existing container: %s: %s", cont.Name, err)
}
}
}
// Remove the old containers.
for _, container := range removed {
err = m.destroyContainer(container.Name)
for _, cont := range removed {
err = m.destroyContainer(cont.Name)
if err != nil {
return fmt.Errorf("Failed to destroy existing container: %s: %s", container.Name, err)
if err != container.NotActive {
log.Printf("failed to destroy existing container: %s: %s", cont.Name, err)
}
}
}