Features:
Added Network Stats to Container Info. It still not exposed via the HTTP UI. Bug fixes: 1. Modified docker handler to use libcontainer.GetStats instead of quering the fs package. 2. cAdvisor will not stall if any of its core operations fail. 3. cAdvisor will safely ignore any inactive or leaked docker containers. When containers are leaked cgroup state exists but docker is not aware of them.
This commit is contained in:
parent
bc23a92932
commit
1edb798de5
@ -83,11 +83,15 @@ func main() {
|
||||
}
|
||||
})
|
||||
|
||||
go containerManager.Start()
|
||||
errChan := make(chan error)
|
||||
go containerManager.Start(errChan)
|
||||
|
||||
log.Printf("Starting cAdvisor version: %q", info.VERSION)
|
||||
log.Print("About to serve on port ", *argPort)
|
||||
|
||||
addr := fmt.Sprintf(":%v", *argPort)
|
||||
log.Fatal(http.ListenAndServe(addr, nil))
|
||||
go func() {
|
||||
errChan <- http.ListenAndServe(addr, nil)
|
||||
}()
|
||||
log.Fatal(<-errChan)
|
||||
}
|
||||
|
@ -14,7 +14,11 @@
|
||||
|
||||
package container
|
||||
|
||||
import "github.com/google/cadvisor/info"
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/google/cadvisor/info"
|
||||
)
|
||||
|
||||
// Listing types.
|
||||
const (
|
||||
@ -22,6 +26,8 @@ const (
|
||||
LIST_RECURSIVE
|
||||
)
|
||||
|
||||
var NotActive = errors.New("Container is not active")
|
||||
|
||||
type ListType int
|
||||
|
||||
// Interface for container operation handlers.
|
||||
|
@ -26,13 +26,16 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/docker/libcontainer"
|
||||
"github.com/docker/libcontainer/cgroups"
|
||||
"github.com/fsouza/go-dockerclient"
|
||||
"github.com/google/cadvisor/container"
|
||||
containerLibcontainer "github.com/google/cadvisor/container/libcontainer"
|
||||
"github.com/google/cadvisor/info"
|
||||
"github.com/google/cadvisor/utils"
|
||||
)
|
||||
|
||||
// Basepath to all container specific information that libcontainer stores.
|
||||
const dockerRootDir = "/var/lib/docker/execdriver/native"
|
||||
|
||||
type dockerContainerHandler struct {
|
||||
client *docker.Client
|
||||
name string
|
||||
@ -61,8 +64,9 @@ func newDockerContainerHandler(
|
||||
return nil, fmt.Errorf("invalid docker container %v: %v", name, err)
|
||||
}
|
||||
ctnr, err := client.InspectContainer(id)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to inspect container %v: %v", name, err)
|
||||
// We assume that if Inspect fails then the container is not known to docker.
|
||||
if err != nil || !ctnr.State.Running {
|
||||
return nil, container.NotActive
|
||||
}
|
||||
handler.aliases = append(handler.aliases, path.Join("/docker", ctnr.Name))
|
||||
return handler, nil
|
||||
@ -125,21 +129,50 @@ func (self *dockerContainerHandler) isDockerContainer() bool {
|
||||
}
|
||||
|
||||
// TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API.
|
||||
func readLibcontainerSpec(id string) (spec *libcontainer.Config, err error) {
|
||||
dir := "/var/lib/docker/execdriver/native"
|
||||
configPath := path.Join(dir, id, "container.json")
|
||||
func readLibcontainerConfig(id string) (config *libcontainer.Config, err error) {
|
||||
configPath := path.Join(dockerRootDir, id, "container.json")
|
||||
if !utils.FileExists(configPath) {
|
||||
err = container.NotActive
|
||||
return
|
||||
}
|
||||
f, err := os.Open(configPath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
d := json.NewDecoder(f)
|
||||
ret := new(libcontainer.Config)
|
||||
err = d.Decode(ret)
|
||||
retConfig := new(libcontainer.Config)
|
||||
err = d.Decode(retConfig)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
spec = ret
|
||||
config = retConfig
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func readLibcontainerState(id string) (state *libcontainer.State, err error) {
|
||||
statePath := path.Join(dockerRootDir, id, "state.json")
|
||||
if !utils.FileExists(statePath) {
|
||||
err = container.NotActive
|
||||
return
|
||||
}
|
||||
f, err := os.Open(statePath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
err = container.NotActive
|
||||
}
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
d := json.NewDecoder(f)
|
||||
retState := new(libcontainer.State)
|
||||
err = d.Decode(retState)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
state = retState
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@ -183,12 +216,12 @@ func (self *dockerContainerHandler) GetSpec() (spec *info.ContainerSpec, err err
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
libcontainerSpec, err := readLibcontainerSpec(id)
|
||||
libcontainerConfig, err := readLibcontainerConfig(id)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
spec = libcontainerConfigToContainerSpec(libcontainerSpec, mi)
|
||||
spec = libcontainerConfigToContainerSpec(libcontainerConfig, mi)
|
||||
return
|
||||
}
|
||||
|
||||
@ -199,15 +232,20 @@ func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err
|
||||
stats.Timestamp = time.Now()
|
||||
return
|
||||
}
|
||||
parent, id, err := self.splitName()
|
||||
_, id, err := self.splitName()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
cg := &cgroups.Cgroup{
|
||||
Parent: parent,
|
||||
Name: id,
|
||||
config, err := readLibcontainerConfig(id)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
return containerLibcontainer.GetStats(cg, self.useSystemd)
|
||||
state, err := readLibcontainerState(id)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
return containerLibcontainer.GetStats(config, state)
|
||||
}
|
||||
|
||||
func (self *dockerContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
|
||||
@ -215,7 +253,7 @@ func (self *dockerContainerHandler) ListContainers(listType container.ListType)
|
||||
return nil, nil
|
||||
}
|
||||
if self.isRootContainer() && listType == container.LIST_SELF {
|
||||
return []info.ContainerReference{info.ContainerReference{Name: "/docker"}}, nil
|
||||
return []info.ContainerReference{{Name: "/docker"}}, nil
|
||||
}
|
||||
opt := docker.ListContainersOptions{
|
||||
All: true,
|
||||
|
@ -60,9 +60,9 @@ func TestWhiteListContainerFilter(t *testing.T) {
|
||||
mockc := &mockContainerHandler{}
|
||||
mockc.On("ListContainers", LIST_RECURSIVE).Return(
|
||||
[]info.ContainerReference{
|
||||
info.ContainerReference{Name: "/docker/ee0103"},
|
||||
info.ContainerReference{Name: "/container/created/by/lmctfy"},
|
||||
info.ContainerReference{Name: "/user/something"},
|
||||
{Name: "/docker/ee0103"},
|
||||
{Name: "/container/created/by/lmctfy"},
|
||||
{Name: "/user/something"},
|
||||
},
|
||||
nil,
|
||||
)
|
||||
@ -95,9 +95,9 @@ func TestBlackListContainerFilter(t *testing.T) {
|
||||
mockc := &mockContainerHandler{}
|
||||
mockc.On("ListContainers", LIST_RECURSIVE).Return(
|
||||
[]info.ContainerReference{
|
||||
info.ContainerReference{Name: "/docker/ee0103"},
|
||||
info.ContainerReference{Name: "/container/created/by/lmctfy"},
|
||||
info.ContainerReference{Name: "/user/something"},
|
||||
{Name: "/docker/ee0103"},
|
||||
{Name: "/container/created/by/lmctfy"},
|
||||
{Name: "/user/something"},
|
||||
},
|
||||
nil,
|
||||
)
|
||||
|
@ -3,33 +3,23 @@ package libcontainer
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/docker/libcontainer/cgroups"
|
||||
"github.com/docker/libcontainer/cgroups/fs"
|
||||
"github.com/docker/libcontainer/cgroups/systemd"
|
||||
"github.com/docker/libcontainer"
|
||||
"github.com/google/cadvisor/info"
|
||||
)
|
||||
|
||||
// Get stats of the specified cgroup
|
||||
func GetStats(cgroup *cgroups.Cgroup, useSystemd bool) (*info.ContainerStats, error) {
|
||||
func GetStats(config *libcontainer.Config, state *libcontainer.State) (*info.ContainerStats, error) {
|
||||
// TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready.
|
||||
// Use systemd paths if systemd is being used.
|
||||
var (
|
||||
s *cgroups.Stats
|
||||
err error
|
||||
)
|
||||
if useSystemd {
|
||||
s, err = systemd.GetStats(cgroup)
|
||||
} else {
|
||||
s, err = fs.GetStats(cgroup)
|
||||
}
|
||||
libcontainerStats, err := libcontainer.GetStats(config, state)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return toContainerStats(s), nil
|
||||
return toContainerStats(libcontainerStats), nil
|
||||
}
|
||||
|
||||
// Convert libcontainer stats to info.ContainerStats.
|
||||
func toContainerStats(s *cgroups.Stats) *info.ContainerStats {
|
||||
func toContainerStats(libcontainerStats *libcontainer.ContainerStats) *info.ContainerStats {
|
||||
s := libcontainerStats.CgroupStats
|
||||
ret := new(info.ContainerStats)
|
||||
ret.Timestamp = time.Now()
|
||||
ret.Cpu = new(info.CpuStats)
|
||||
@ -59,5 +49,6 @@ func toContainerStats(s *cgroups.Stats) *info.ContainerStats {
|
||||
ret.Memory.WorkingSet -= v
|
||||
}
|
||||
}
|
||||
ret.Network = (*info.NetworkStats)(&libcontainerStats.NetworkStats)
|
||||
return ret
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ func (self *rawContainerHandler) ListContainers(listType container.ListType) ([]
|
||||
|
||||
// Make into container references.
|
||||
ret := make([]info.ContainerReference, 0, len(containers))
|
||||
for cont, _ := range containers {
|
||||
for cont := range containers {
|
||||
ret = append(ret, info.ContainerReference{
|
||||
Name: cont,
|
||||
})
|
||||
|
@ -239,11 +239,31 @@ type MemoryStatsMemoryData struct {
|
||||
Pgmajfault uint64 `json:"pgmajfault,omitempty"`
|
||||
}
|
||||
|
||||
type NetworkStats struct {
|
||||
// Cumulative count of bytes received.
|
||||
RxBytes uint64 `json:"rx_bytes,omitempty"`
|
||||
// Cumulative count of packets received.
|
||||
RxPackets uint64 `json:"rx_packets,omitempty"`
|
||||
// Cumulative count of receive errors encountered.
|
||||
RxErrors uint64 `json:"rx_errors,omitempty"`
|
||||
// Cumulative count of packets dropped while receiving.
|
||||
RxDropped uint64 `json:"rx_dropped,omitempty"`
|
||||
// Cumulative count of bytes transmitted.
|
||||
TxBytes uint64 `json:"tx_bytes,omitempty"`
|
||||
// Cumulative count of packets transmitted.
|
||||
TxPackets uint64 `json:"tx_packets,omitempty"`
|
||||
// Cumulative count of transmit errors encountered.
|
||||
TxErrors uint64 `json:"tx_errors,omitempty"`
|
||||
// Cumulative count of packets dropped while transmitting.
|
||||
TxDropped uint64 `json:"tx_dropped,omitempty"`
|
||||
}
|
||||
|
||||
type ContainerStats struct {
|
||||
// The time of this stat point.
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Cpu *CpuStats `json:"cpu,omitempty"`
|
||||
Memory *MemoryStats `json:"memory,omitempty"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Cpu *CpuStats `json:"cpu,omitempty"`
|
||||
Memory *MemoryStats `json:"memory,omitempty"`
|
||||
Network *NetworkStats `json:"network,omitempty"`
|
||||
}
|
||||
|
||||
// Makes a deep copy of the ContainerStats and returns a pointer to the new
|
||||
|
@ -127,7 +127,7 @@ func (c *containerData) housekeeping() {
|
||||
|
||||
func (c *containerData) housekeepingTick() {
|
||||
err := c.updateStats()
|
||||
if err != nil {
|
||||
if err != nil && err != container.NotActive {
|
||||
log.Printf("Failed to update stats for container \"%s\": %s", c.info.Name, err)
|
||||
}
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ import (
|
||||
|
||||
type Manager interface {
|
||||
// Start the manager, blocks forever.
|
||||
Start() error
|
||||
Start(chanErr chan error)
|
||||
|
||||
// Get information about a container.
|
||||
GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error)
|
||||
@ -73,16 +73,18 @@ type manager struct {
|
||||
}
|
||||
|
||||
// Start the container manager.
|
||||
func (m *manager) Start() error {
|
||||
func (m *manager) Start(errChan chan error) {
|
||||
// Create root and then recover all containers.
|
||||
_, err := m.createContainer("/")
|
||||
if err != nil {
|
||||
return err
|
||||
errChan <- err
|
||||
return
|
||||
}
|
||||
log.Printf("Starting recovery of all containers")
|
||||
err = m.detectContainers()
|
||||
if err != nil {
|
||||
return err
|
||||
errChan <- err
|
||||
return
|
||||
}
|
||||
log.Printf("Recovery completed")
|
||||
|
||||
@ -102,7 +104,7 @@ func (m *manager) Start() error {
|
||||
log.Printf("Global Housekeeping(%d) took %s", t.Unix(), duration)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
errChan <- nil
|
||||
}
|
||||
|
||||
// Get a container by name.
|
||||
@ -281,18 +283,22 @@ func (m *manager) detectContainers() error {
|
||||
}
|
||||
|
||||
// Add the new containers.
|
||||
for _, container := range added {
|
||||
_, err = m.createContainer(container.Name)
|
||||
for _, cont := range added {
|
||||
_, err = m.createContainer(cont.Name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to create existing container: %s: %s", container.Name, err)
|
||||
if err != container.NotActive {
|
||||
log.Printf("failed to create existing container: %s: %s", cont.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the old containers.
|
||||
for _, container := range removed {
|
||||
err = m.destroyContainer(container.Name)
|
||||
for _, cont := range removed {
|
||||
err = m.destroyContainer(cont.Name)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to destroy existing container: %s: %s", container.Name, err)
|
||||
if err != container.NotActive {
|
||||
log.Printf("failed to destroy existing container: %s: %s", cont.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user