Features:
Added Network Stats to Container Info. It still not exposed via the HTTP UI. Bug fixes: 1. Modified docker handler to use libcontainer.GetStats instead of quering the fs package. 2. cAdvisor will not stall if any of its core operations fail. 3. cAdvisor will safely ignore any inactive or leaked docker containers. When containers are leaked cgroup state exists but docker is not aware of them.
This commit is contained in:
parent
bc23a92932
commit
1edb798de5
@ -83,11 +83,15 @@ func main() {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
go containerManager.Start()
|
errChan := make(chan error)
|
||||||
|
go containerManager.Start(errChan)
|
||||||
|
|
||||||
log.Printf("Starting cAdvisor version: %q", info.VERSION)
|
log.Printf("Starting cAdvisor version: %q", info.VERSION)
|
||||||
log.Print("About to serve on port ", *argPort)
|
log.Print("About to serve on port ", *argPort)
|
||||||
|
|
||||||
addr := fmt.Sprintf(":%v", *argPort)
|
addr := fmt.Sprintf(":%v", *argPort)
|
||||||
log.Fatal(http.ListenAndServe(addr, nil))
|
go func() {
|
||||||
|
errChan <- http.ListenAndServe(addr, nil)
|
||||||
|
}()
|
||||||
|
log.Fatal(<-errChan)
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,11 @@
|
|||||||
|
|
||||||
package container
|
package container
|
||||||
|
|
||||||
import "github.com/google/cadvisor/info"
|
import (
|
||||||
|
"errors"
|
||||||
|
|
||||||
|
"github.com/google/cadvisor/info"
|
||||||
|
)
|
||||||
|
|
||||||
// Listing types.
|
// Listing types.
|
||||||
const (
|
const (
|
||||||
@ -22,6 +26,8 @@ const (
|
|||||||
LIST_RECURSIVE
|
LIST_RECURSIVE
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var NotActive = errors.New("Container is not active")
|
||||||
|
|
||||||
type ListType int
|
type ListType int
|
||||||
|
|
||||||
// Interface for container operation handlers.
|
// Interface for container operation handlers.
|
||||||
|
@ -26,13 +26,16 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/docker/libcontainer"
|
"github.com/docker/libcontainer"
|
||||||
"github.com/docker/libcontainer/cgroups"
|
|
||||||
"github.com/fsouza/go-dockerclient"
|
"github.com/fsouza/go-dockerclient"
|
||||||
"github.com/google/cadvisor/container"
|
"github.com/google/cadvisor/container"
|
||||||
containerLibcontainer "github.com/google/cadvisor/container/libcontainer"
|
containerLibcontainer "github.com/google/cadvisor/container/libcontainer"
|
||||||
"github.com/google/cadvisor/info"
|
"github.com/google/cadvisor/info"
|
||||||
|
"github.com/google/cadvisor/utils"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Basepath to all container specific information that libcontainer stores.
|
||||||
|
const dockerRootDir = "/var/lib/docker/execdriver/native"
|
||||||
|
|
||||||
type dockerContainerHandler struct {
|
type dockerContainerHandler struct {
|
||||||
client *docker.Client
|
client *docker.Client
|
||||||
name string
|
name string
|
||||||
@ -61,8 +64,9 @@ func newDockerContainerHandler(
|
|||||||
return nil, fmt.Errorf("invalid docker container %v: %v", name, err)
|
return nil, fmt.Errorf("invalid docker container %v: %v", name, err)
|
||||||
}
|
}
|
||||||
ctnr, err := client.InspectContainer(id)
|
ctnr, err := client.InspectContainer(id)
|
||||||
if err != nil {
|
// We assume that if Inspect fails then the container is not known to docker.
|
||||||
return nil, fmt.Errorf("unable to inspect container %v: %v", name, err)
|
if err != nil || !ctnr.State.Running {
|
||||||
|
return nil, container.NotActive
|
||||||
}
|
}
|
||||||
handler.aliases = append(handler.aliases, path.Join("/docker", ctnr.Name))
|
handler.aliases = append(handler.aliases, path.Join("/docker", ctnr.Name))
|
||||||
return handler, nil
|
return handler, nil
|
||||||
@ -125,21 +129,50 @@ func (self *dockerContainerHandler) isDockerContainer() bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API.
|
// TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API.
|
||||||
func readLibcontainerSpec(id string) (spec *libcontainer.Config, err error) {
|
func readLibcontainerConfig(id string) (config *libcontainer.Config, err error) {
|
||||||
dir := "/var/lib/docker/execdriver/native"
|
configPath := path.Join(dockerRootDir, id, "container.json")
|
||||||
configPath := path.Join(dir, id, "container.json")
|
if !utils.FileExists(configPath) {
|
||||||
|
err = container.NotActive
|
||||||
|
return
|
||||||
|
}
|
||||||
f, err := os.Open(configPath)
|
f, err := os.Open(configPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
d := json.NewDecoder(f)
|
d := json.NewDecoder(f)
|
||||||
ret := new(libcontainer.Config)
|
retConfig := new(libcontainer.Config)
|
||||||
err = d.Decode(ret)
|
err = d.Decode(retConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
spec = ret
|
config = retConfig
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func readLibcontainerState(id string) (state *libcontainer.State, err error) {
|
||||||
|
statePath := path.Join(dockerRootDir, id, "state.json")
|
||||||
|
if !utils.FileExists(statePath) {
|
||||||
|
err = container.NotActive
|
||||||
|
return
|
||||||
|
}
|
||||||
|
f, err := os.Open(statePath)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
err = container.NotActive
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
d := json.NewDecoder(f)
|
||||||
|
retState := new(libcontainer.State)
|
||||||
|
err = d.Decode(retState)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
state = retState
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -183,12 +216,12 @@ func (self *dockerContainerHandler) GetSpec() (spec *info.ContainerSpec, err err
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
libcontainerSpec, err := readLibcontainerSpec(id)
|
libcontainerConfig, err := readLibcontainerConfig(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
spec = libcontainerConfigToContainerSpec(libcontainerSpec, mi)
|
spec = libcontainerConfigToContainerSpec(libcontainerConfig, mi)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -199,15 +232,20 @@ func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err
|
|||||||
stats.Timestamp = time.Now()
|
stats.Timestamp = time.Now()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
parent, id, err := self.splitName()
|
_, id, err := self.splitName()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
cg := &cgroups.Cgroup{
|
config, err := readLibcontainerConfig(id)
|
||||||
Parent: parent,
|
if err != nil {
|
||||||
Name: id,
|
return
|
||||||
}
|
}
|
||||||
return containerLibcontainer.GetStats(cg, self.useSystemd)
|
state, err := readLibcontainerState(id)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
return containerLibcontainer.GetStats(config, state)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (self *dockerContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
|
func (self *dockerContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
|
||||||
@ -215,7 +253,7 @@ func (self *dockerContainerHandler) ListContainers(listType container.ListType)
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
if self.isRootContainer() && listType == container.LIST_SELF {
|
if self.isRootContainer() && listType == container.LIST_SELF {
|
||||||
return []info.ContainerReference{info.ContainerReference{Name: "/docker"}}, nil
|
return []info.ContainerReference{{Name: "/docker"}}, nil
|
||||||
}
|
}
|
||||||
opt := docker.ListContainersOptions{
|
opt := docker.ListContainersOptions{
|
||||||
All: true,
|
All: true,
|
||||||
|
@ -60,9 +60,9 @@ func TestWhiteListContainerFilter(t *testing.T) {
|
|||||||
mockc := &mockContainerHandler{}
|
mockc := &mockContainerHandler{}
|
||||||
mockc.On("ListContainers", LIST_RECURSIVE).Return(
|
mockc.On("ListContainers", LIST_RECURSIVE).Return(
|
||||||
[]info.ContainerReference{
|
[]info.ContainerReference{
|
||||||
info.ContainerReference{Name: "/docker/ee0103"},
|
{Name: "/docker/ee0103"},
|
||||||
info.ContainerReference{Name: "/container/created/by/lmctfy"},
|
{Name: "/container/created/by/lmctfy"},
|
||||||
info.ContainerReference{Name: "/user/something"},
|
{Name: "/user/something"},
|
||||||
},
|
},
|
||||||
nil,
|
nil,
|
||||||
)
|
)
|
||||||
@ -95,9 +95,9 @@ func TestBlackListContainerFilter(t *testing.T) {
|
|||||||
mockc := &mockContainerHandler{}
|
mockc := &mockContainerHandler{}
|
||||||
mockc.On("ListContainers", LIST_RECURSIVE).Return(
|
mockc.On("ListContainers", LIST_RECURSIVE).Return(
|
||||||
[]info.ContainerReference{
|
[]info.ContainerReference{
|
||||||
info.ContainerReference{Name: "/docker/ee0103"},
|
{Name: "/docker/ee0103"},
|
||||||
info.ContainerReference{Name: "/container/created/by/lmctfy"},
|
{Name: "/container/created/by/lmctfy"},
|
||||||
info.ContainerReference{Name: "/user/something"},
|
{Name: "/user/something"},
|
||||||
},
|
},
|
||||||
nil,
|
nil,
|
||||||
)
|
)
|
||||||
|
@ -3,33 +3,23 @@ package libcontainer
|
|||||||
import (
|
import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/docker/libcontainer/cgroups"
|
"github.com/docker/libcontainer"
|
||||||
"github.com/docker/libcontainer/cgroups/fs"
|
|
||||||
"github.com/docker/libcontainer/cgroups/systemd"
|
|
||||||
"github.com/google/cadvisor/info"
|
"github.com/google/cadvisor/info"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Get stats of the specified cgroup
|
// Get stats of the specified cgroup
|
||||||
func GetStats(cgroup *cgroups.Cgroup, useSystemd bool) (*info.ContainerStats, error) {
|
func GetStats(config *libcontainer.Config, state *libcontainer.State) (*info.ContainerStats, error) {
|
||||||
// TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready.
|
// TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready.
|
||||||
// Use systemd paths if systemd is being used.
|
libcontainerStats, err := libcontainer.GetStats(config, state)
|
||||||
var (
|
|
||||||
s *cgroups.Stats
|
|
||||||
err error
|
|
||||||
)
|
|
||||||
if useSystemd {
|
|
||||||
s, err = systemd.GetStats(cgroup)
|
|
||||||
} else {
|
|
||||||
s, err = fs.GetStats(cgroup)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return toContainerStats(s), nil
|
return toContainerStats(libcontainerStats), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert libcontainer stats to info.ContainerStats.
|
// Convert libcontainer stats to info.ContainerStats.
|
||||||
func toContainerStats(s *cgroups.Stats) *info.ContainerStats {
|
func toContainerStats(libcontainerStats *libcontainer.ContainerStats) *info.ContainerStats {
|
||||||
|
s := libcontainerStats.CgroupStats
|
||||||
ret := new(info.ContainerStats)
|
ret := new(info.ContainerStats)
|
||||||
ret.Timestamp = time.Now()
|
ret.Timestamp = time.Now()
|
||||||
ret.Cpu = new(info.CpuStats)
|
ret.Cpu = new(info.CpuStats)
|
||||||
@ -59,5 +49,6 @@ func toContainerStats(s *cgroups.Stats) *info.ContainerStats {
|
|||||||
ret.Memory.WorkingSet -= v
|
ret.Memory.WorkingSet -= v
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
ret.Network = (*info.NetworkStats)(&libcontainerStats.NetworkStats)
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
@ -96,7 +96,7 @@ func (self *rawContainerHandler) ListContainers(listType container.ListType) ([]
|
|||||||
|
|
||||||
// Make into container references.
|
// Make into container references.
|
||||||
ret := make([]info.ContainerReference, 0, len(containers))
|
ret := make([]info.ContainerReference, 0, len(containers))
|
||||||
for cont, _ := range containers {
|
for cont := range containers {
|
||||||
ret = append(ret, info.ContainerReference{
|
ret = append(ret, info.ContainerReference{
|
||||||
Name: cont,
|
Name: cont,
|
||||||
})
|
})
|
||||||
|
@ -239,11 +239,31 @@ type MemoryStatsMemoryData struct {
|
|||||||
Pgmajfault uint64 `json:"pgmajfault,omitempty"`
|
Pgmajfault uint64 `json:"pgmajfault,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NetworkStats struct {
|
||||||
|
// Cumulative count of bytes received.
|
||||||
|
RxBytes uint64 `json:"rx_bytes,omitempty"`
|
||||||
|
// Cumulative count of packets received.
|
||||||
|
RxPackets uint64 `json:"rx_packets,omitempty"`
|
||||||
|
// Cumulative count of receive errors encountered.
|
||||||
|
RxErrors uint64 `json:"rx_errors,omitempty"`
|
||||||
|
// Cumulative count of packets dropped while receiving.
|
||||||
|
RxDropped uint64 `json:"rx_dropped,omitempty"`
|
||||||
|
// Cumulative count of bytes transmitted.
|
||||||
|
TxBytes uint64 `json:"tx_bytes,omitempty"`
|
||||||
|
// Cumulative count of packets transmitted.
|
||||||
|
TxPackets uint64 `json:"tx_packets,omitempty"`
|
||||||
|
// Cumulative count of transmit errors encountered.
|
||||||
|
TxErrors uint64 `json:"tx_errors,omitempty"`
|
||||||
|
// Cumulative count of packets dropped while transmitting.
|
||||||
|
TxDropped uint64 `json:"tx_dropped,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type ContainerStats struct {
|
type ContainerStats struct {
|
||||||
// The time of this stat point.
|
// The time of this stat point.
|
||||||
Timestamp time.Time `json:"timestamp"`
|
Timestamp time.Time `json:"timestamp"`
|
||||||
Cpu *CpuStats `json:"cpu,omitempty"`
|
Cpu *CpuStats `json:"cpu,omitempty"`
|
||||||
Memory *MemoryStats `json:"memory,omitempty"`
|
Memory *MemoryStats `json:"memory,omitempty"`
|
||||||
|
Network *NetworkStats `json:"network,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Makes a deep copy of the ContainerStats and returns a pointer to the new
|
// Makes a deep copy of the ContainerStats and returns a pointer to the new
|
||||||
|
@ -127,7 +127,7 @@ func (c *containerData) housekeeping() {
|
|||||||
|
|
||||||
func (c *containerData) housekeepingTick() {
|
func (c *containerData) housekeepingTick() {
|
||||||
err := c.updateStats()
|
err := c.updateStats()
|
||||||
if err != nil {
|
if err != nil && err != container.NotActive {
|
||||||
log.Printf("Failed to update stats for container \"%s\": %s", c.info.Name, err)
|
log.Printf("Failed to update stats for container \"%s\": %s", c.info.Name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,7 +27,7 @@ import (
|
|||||||
|
|
||||||
type Manager interface {
|
type Manager interface {
|
||||||
// Start the manager, blocks forever.
|
// Start the manager, blocks forever.
|
||||||
Start() error
|
Start(chanErr chan error)
|
||||||
|
|
||||||
// Get information about a container.
|
// Get information about a container.
|
||||||
GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error)
|
GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error)
|
||||||
@ -73,16 +73,18 @@ type manager struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Start the container manager.
|
// Start the container manager.
|
||||||
func (m *manager) Start() error {
|
func (m *manager) Start(errChan chan error) {
|
||||||
// Create root and then recover all containers.
|
// Create root and then recover all containers.
|
||||||
_, err := m.createContainer("/")
|
_, err := m.createContainer("/")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
errChan <- err
|
||||||
|
return
|
||||||
}
|
}
|
||||||
log.Printf("Starting recovery of all containers")
|
log.Printf("Starting recovery of all containers")
|
||||||
err = m.detectContainers()
|
err = m.detectContainers()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
errChan <- err
|
||||||
|
return
|
||||||
}
|
}
|
||||||
log.Printf("Recovery completed")
|
log.Printf("Recovery completed")
|
||||||
|
|
||||||
@ -102,7 +104,7 @@ func (m *manager) Start() error {
|
|||||||
log.Printf("Global Housekeeping(%d) took %s", t.Unix(), duration)
|
log.Printf("Global Housekeeping(%d) took %s", t.Unix(), duration)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
errChan <- nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get a container by name.
|
// Get a container by name.
|
||||||
@ -281,18 +283,22 @@ func (m *manager) detectContainers() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add the new containers.
|
// Add the new containers.
|
||||||
for _, container := range added {
|
for _, cont := range added {
|
||||||
_, err = m.createContainer(container.Name)
|
_, err = m.createContainer(cont.Name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Failed to create existing container: %s: %s", container.Name, err)
|
if err != container.NotActive {
|
||||||
|
log.Printf("failed to create existing container: %s: %s", cont.Name, err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove the old containers.
|
// Remove the old containers.
|
||||||
for _, container := range removed {
|
for _, cont := range removed {
|
||||||
err = m.destroyContainer(container.Name)
|
err = m.destroyContainer(cont.Name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Failed to destroy existing container: %s: %s", container.Name, err)
|
if err != container.NotActive {
|
||||||
|
log.Printf("failed to destroy existing container: %s: %s", cont.Name, err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user