Ignore update errors for dead containers.
This adds an Exists() interface to detect when the container is dead. Before reporting an update error we check is Exists() is true. Some documentation was added as well.
This commit is contained in:
parent
c005bc1722
commit
e3ab15417c
@ -60,6 +60,7 @@ func RegisterHandlers(m manager.Manager) error {
|
|||||||
|
|
||||||
func handleRequest(m manager.Manager, w http.ResponseWriter, r *http.Request) error {
|
func handleRequest(m manager.Manager, w http.ResponseWriter, r *http.Request) error {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
defer glog.V(2).Infof("Request took %s", time.Since(start))
|
||||||
|
|
||||||
request := r.URL.Path
|
request := r.URL.Path
|
||||||
requestElements := strings.Split(r.URL.Path, "/")
|
requestElements := strings.Split(r.URL.Path, "/")
|
||||||
@ -195,7 +196,6 @@ func handleRequest(m manager.Manager, w http.ResponseWriter, r *http.Request) er
|
|||||||
return fmt.Errorf("unknown API request type %q", requestType)
|
return fmt.Errorf("unknown API request type %q", requestType)
|
||||||
}
|
}
|
||||||
|
|
||||||
glog.V(2).Infof("Request took %s", time.Since(start))
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,7 +154,7 @@ func (self *Client) httpGetJsonData(data, postData interface{}, url, infoName st
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if err = json.Unmarshal(body, data); err != nil {
|
if err = json.Unmarshal(body, data); err != nil {
|
||||||
err = fmt.Errorf("unable to unmarshal %q (%v): %v", infoName, string(body), err)
|
err = fmt.Errorf("unable to unmarshal %q (Body: %q) with error: %v", infoName, string(body), err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -46,12 +46,30 @@ type SubcontainerEvent struct {
|
|||||||
|
|
||||||
// Interface for container operation handlers.
|
// Interface for container operation handlers.
|
||||||
type ContainerHandler interface {
|
type ContainerHandler interface {
|
||||||
|
// Returns the ContainerReference
|
||||||
ContainerReference() (info.ContainerReference, error)
|
ContainerReference() (info.ContainerReference, error)
|
||||||
|
|
||||||
|
// Returns container's isolation spec.
|
||||||
GetSpec() (info.ContainerSpec, error)
|
GetSpec() (info.ContainerSpec, error)
|
||||||
|
|
||||||
|
// Returns the current stats values of the container.
|
||||||
GetStats() (*info.ContainerStats, error)
|
GetStats() (*info.ContainerStats, error)
|
||||||
|
|
||||||
|
// Returns the subcontainers of this container.
|
||||||
ListContainers(listType ListType) ([]info.ContainerReference, error)
|
ListContainers(listType ListType) ([]info.ContainerReference, error)
|
||||||
|
|
||||||
|
// Returns the threads inside this container.
|
||||||
ListThreads(listType ListType) ([]int, error)
|
ListThreads(listType ListType) ([]int, error)
|
||||||
|
|
||||||
|
// Returns the processes inside this container.
|
||||||
ListProcesses(listType ListType) ([]int, error)
|
ListProcesses(listType ListType) ([]int, error)
|
||||||
|
|
||||||
|
// Registers a channel to listen for events affecting subcontainers (recursively).
|
||||||
WatchSubcontainers(events chan SubcontainerEvent) error
|
WatchSubcontainers(events chan SubcontainerEvent) error
|
||||||
|
|
||||||
|
// Stops watching for subcontainer changes.
|
||||||
StopWatchingSubcontainers() error
|
StopWatchingSubcontainers() error
|
||||||
|
|
||||||
|
// Returns whether the container still exists.
|
||||||
|
Exists() bool
|
||||||
}
|
}
|
||||||
|
@ -12,11 +12,11 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Handler for Docker containers.
|
||||||
package docker
|
package docker
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
@ -27,7 +27,6 @@ import (
|
|||||||
"github.com/docker/libcontainer/cgroups"
|
"github.com/docker/libcontainer/cgroups"
|
||||||
cgroup_fs "github.com/docker/libcontainer/cgroups/fs"
|
cgroup_fs "github.com/docker/libcontainer/cgroups/fs"
|
||||||
"github.com/fsouza/go-dockerclient"
|
"github.com/fsouza/go-dockerclient"
|
||||||
"github.com/golang/glog"
|
|
||||||
"github.com/google/cadvisor/container"
|
"github.com/google/cadvisor/container"
|
||||||
containerLibcontainer "github.com/google/cadvisor/container/libcontainer"
|
containerLibcontainer "github.com/google/cadvisor/container/libcontainer"
|
||||||
"github.com/google/cadvisor/fs"
|
"github.com/google/cadvisor/fs"
|
||||||
@ -43,19 +42,27 @@ const pathToLibcontainerState = "execdriver/native"
|
|||||||
// aufs/mnt contains the mount points used to compose the rootfs. Hence it is also ignored.
|
// aufs/mnt contains the mount points used to compose the rootfs. Hence it is also ignored.
|
||||||
var pathToAufsDir = "aufs/diff"
|
var pathToAufsDir = "aufs/diff"
|
||||||
|
|
||||||
var fileNotFound = errors.New("file not found")
|
|
||||||
|
|
||||||
type dockerContainerHandler struct {
|
type dockerContainerHandler struct {
|
||||||
client *docker.Client
|
client *docker.Client
|
||||||
name string
|
name string
|
||||||
id string
|
id string
|
||||||
aliases []string
|
aliases []string
|
||||||
machineInfoFactory info.MachineInfoFactory
|
machineInfoFactory info.MachineInfoFactory
|
||||||
libcontainerStateDir string
|
|
||||||
cgroup cgroups.Cgroup
|
// Path to the libcontainer config file.
|
||||||
usesAufsDriver bool
|
libcontainerConfigPath string
|
||||||
fsInfo fs.FsInfo
|
|
||||||
storageDirs []string
|
// Path to the libcontainer state file.
|
||||||
|
libcontainerStatePath string
|
||||||
|
|
||||||
|
// TODO(vmarmol): Remove when we depend on a newer Docker.
|
||||||
|
// Path to the libcontainer pid file.
|
||||||
|
libcontainerPidPath string
|
||||||
|
|
||||||
|
cgroup cgroups.Cgroup
|
||||||
|
usesAufsDriver bool
|
||||||
|
fsInfo fs.FsInfo
|
||||||
|
storageDirs []string
|
||||||
}
|
}
|
||||||
|
|
||||||
func newDockerContainerHandler(
|
func newDockerContainerHandler(
|
||||||
@ -69,11 +76,15 @@ func newDockerContainerHandler(
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
id := ContainerNameToDockerId(name)
|
||||||
handler := &dockerContainerHandler{
|
handler := &dockerContainerHandler{
|
||||||
client: client,
|
id: id,
|
||||||
name: name,
|
client: client,
|
||||||
machineInfoFactory: machineInfoFactory,
|
name: name,
|
||||||
libcontainerStateDir: path.Join(dockerRootDir, pathToLibcontainerState),
|
machineInfoFactory: machineInfoFactory,
|
||||||
|
libcontainerConfigPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "container.json"),
|
||||||
|
libcontainerStatePath: path.Join(dockerRootDir, pathToLibcontainerState, id, "state.json"),
|
||||||
|
libcontainerPidPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "pid"),
|
||||||
cgroup: cgroups.Cgroup{
|
cgroup: cgroups.Cgroup{
|
||||||
Parent: "/",
|
Parent: "/",
|
||||||
Name: name,
|
Name: name,
|
||||||
@ -82,12 +93,11 @@ func newDockerContainerHandler(
|
|||||||
fsInfo: fsInfo,
|
fsInfo: fsInfo,
|
||||||
}
|
}
|
||||||
handler.storageDirs = append(handler.storageDirs, path.Join(dockerRootDir, pathToAufsDir, path.Base(name)))
|
handler.storageDirs = append(handler.storageDirs, path.Join(dockerRootDir, pathToAufsDir, path.Base(name)))
|
||||||
id := ContainerNameToDockerId(name)
|
|
||||||
handler.id = id
|
|
||||||
ctnr, err := client.InspectContainer(id)
|
|
||||||
// We assume that if Inspect fails then the container is not known to docker.
|
// We assume that if Inspect fails then the container is not known to docker.
|
||||||
|
ctnr, err := client.InspectContainer(id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to inspect container %s - %s\n", id, err)
|
return nil, fmt.Errorf("failed to inspect container %q: %v", id, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the name and bare ID as aliases of the container.
|
// Add the name and bare ID as aliases of the container.
|
||||||
@ -107,15 +117,9 @@ func (self *dockerContainerHandler) ContainerReference() (info.ContainerReferenc
|
|||||||
|
|
||||||
// TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API.
|
// TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API.
|
||||||
func (self *dockerContainerHandler) readLibcontainerConfig() (config *libcontainer.Config, err error) {
|
func (self *dockerContainerHandler) readLibcontainerConfig() (config *libcontainer.Config, err error) {
|
||||||
configPath := path.Join(self.libcontainerStateDir, self.id, "container.json")
|
f, err := os.Open(self.libcontainerConfigPath)
|
||||||
if !utils.FileExists(configPath) {
|
|
||||||
// TODO(vishh): Return file name as well once we have a better error interface.
|
|
||||||
err = fileNotFound
|
|
||||||
return
|
|
||||||
}
|
|
||||||
f, err := os.Open(configPath)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to open %s - %s\n", configPath, err)
|
return nil, fmt.Errorf("failed to open %s - %s\n", self.libcontainerConfigPath, err)
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
d := json.NewDecoder(f)
|
d := json.NewDecoder(f)
|
||||||
@ -134,23 +138,17 @@ func (self *dockerContainerHandler) readLibcontainerConfig() (config *libcontain
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (self *dockerContainerHandler) readLibcontainerState() (state *libcontainer.State, err error) {
|
func (self *dockerContainerHandler) readLibcontainerState() (state *libcontainer.State, err error) {
|
||||||
statePath := path.Join(self.libcontainerStateDir, self.id, "state.json")
|
// TODO(vmarmol): Remove this once we can depend on a newer Docker.
|
||||||
if !utils.FileExists(statePath) {
|
// Libcontainer changed how its state was stored, try the old way of a "pid" file
|
||||||
// TODO(vmarmol): Remove this once we can depend on a newer Docker.
|
if !utils.FileExists(self.libcontainerStatePath) {
|
||||||
// Libcontainer changed how its state was stored, try the old way of a "pid" file
|
if utils.FileExists(self.libcontainerPidPath) {
|
||||||
if utils.FileExists(path.Join(self.libcontainerStateDir, self.id, "pid")) {
|
|
||||||
// We don't need the old state, return an empty state and we'll gracefully degrade.
|
// We don't need the old state, return an empty state and we'll gracefully degrade.
|
||||||
state = new(libcontainer.State)
|
return &libcontainer.State{}, nil
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(vishh): Return file name as well once we have a better error interface.
|
|
||||||
err = fileNotFound
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
f, err := os.Open(statePath)
|
f, err := os.Open(self.libcontainerStatePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to open %s - %s\n", statePath, err)
|
return nil, fmt.Errorf("failed to open %s - %s\n", self.libcontainerStatePath, err)
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
d := json.NewDecoder(f)
|
d := json.NewDecoder(f)
|
||||||
@ -258,10 +256,6 @@ func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error
|
|||||||
func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err error) {
|
func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err error) {
|
||||||
state, err := self.readLibcontainerState()
|
state, err := self.readLibcontainerState()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if err == fileNotFound {
|
|
||||||
glog.Errorf("Libcontainer state not found for container %q", self.name)
|
|
||||||
return &info.ContainerStats{}, nil
|
|
||||||
}
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -322,3 +316,8 @@ func (self *dockerContainerHandler) StopWatchingSubcontainers() error {
|
|||||||
// No-op for Docker driver.
|
// No-op for Docker driver.
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (self *dockerContainerHandler) Exists() bool {
|
||||||
|
// We consider the container existing if both libcontainer config and state files exist.
|
||||||
|
return utils.FileExists(self.libcontainerConfigPath) && utils.FileExists(self.libcontainerStatePath)
|
||||||
|
}
|
||||||
|
@ -85,6 +85,11 @@ func (self *MockContainerHandler) StopWatchingSubcontainers() error {
|
|||||||
return args.Error(0)
|
return args.Error(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (self *MockContainerHandler) Exists() bool {
|
||||||
|
args := self.Called()
|
||||||
|
return args.Get(0).(bool)
|
||||||
|
}
|
||||||
|
|
||||||
type FactoryForMockContainerHandler struct {
|
type FactoryForMockContainerHandler struct {
|
||||||
Name string
|
Name string
|
||||||
PrepareContainerHandlerFunc func(name string, handler *MockContainerHandler)
|
PrepareContainerHandlerFunc func(name string, handler *MockContainerHandler)
|
||||||
|
@ -25,9 +25,11 @@ import (
|
|||||||
|
|
||||||
type cgroupSubsystems struct {
|
type cgroupSubsystems struct {
|
||||||
// Cgroup subsystem mounts.
|
// Cgroup subsystem mounts.
|
||||||
|
// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
|
||||||
mounts []cgroups.Mount
|
mounts []cgroups.Mount
|
||||||
|
|
||||||
// Cgroup subsystem to their mount location.
|
// Cgroup subsystem to their mount location.
|
||||||
|
// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
|
||||||
mountPoints map[string]string
|
mountPoints map[string]string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
// TODO(cAdvisor): Package comment.
|
// Handler for "raw" containers.
|
||||||
package raw
|
package raw
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@ -478,3 +478,13 @@ func (self *rawContainerHandler) StopWatchingSubcontainers() error {
|
|||||||
self.stopWatcher <- nil
|
self.stopWatcher <- nil
|
||||||
return <-self.stopWatcher
|
return <-self.stopWatcher
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (self *rawContainerHandler) Exists() bool {
|
||||||
|
// If any cgroup exists, the container is still alive.
|
||||||
|
for _, subsystem := range self.cgroupSubsystems.mounts {
|
||||||
|
if utils.FileExists(path.Join(subsystem.Mountpoint, self.name)) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
@ -118,11 +118,11 @@ func (self *containerData) nextHousekeeping(lastHousekeeping time.Time) time.Tim
|
|||||||
if self.housekeepingInterval > *maxHousekeepingInterval {
|
if self.housekeepingInterval > *maxHousekeepingInterval {
|
||||||
self.housekeepingInterval = *maxHousekeepingInterval
|
self.housekeepingInterval = *maxHousekeepingInterval
|
||||||
}
|
}
|
||||||
glog.V(2).Infof("Raising housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
|
glog.V(3).Infof("Raising housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
|
||||||
} else if self.housekeepingInterval != *HousekeepingInterval {
|
} else if self.housekeepingInterval != *HousekeepingInterval {
|
||||||
// Lower interval back to the baseline.
|
// Lower interval back to the baseline.
|
||||||
self.housekeepingInterval = *HousekeepingInterval
|
self.housekeepingInterval = *HousekeepingInterval
|
||||||
glog.V(1).Infof("Lowering housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
|
glog.V(3).Infof("Lowering housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -193,6 +193,10 @@ func (c *containerData) housekeepingTick() {
|
|||||||
func (c *containerData) updateSpec() error {
|
func (c *containerData) updateSpec() error {
|
||||||
spec, err := c.handler.GetSpec()
|
spec, err := c.handler.GetSpec()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Ignore errors if the container is dead.
|
||||||
|
if !c.handler.Exists() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
c.lock.Lock()
|
c.lock.Lock()
|
||||||
@ -204,6 +208,10 @@ func (c *containerData) updateSpec() error {
|
|||||||
func (c *containerData) updateStats() error {
|
func (c *containerData) updateStats() error {
|
||||||
stats, err := c.handler.GetStats()
|
stats, err := c.handler.GetStats()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Ignore errors if the container is dead.
|
||||||
|
if !c.handler.Exists() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if stats == nil {
|
if stats == nil {
|
||||||
@ -211,6 +219,10 @@ func (c *containerData) updateStats() error {
|
|||||||
}
|
}
|
||||||
ref, err := c.handler.ContainerReference()
|
ref, err := c.handler.ContainerReference()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Ignore errors if the container is dead.
|
||||||
|
if !c.handler.Exists() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
err = c.storageDriver.AddStats(ref, stats)
|
err = c.storageDriver.AddStats(ref, stats)
|
||||||
@ -223,6 +235,10 @@ func (c *containerData) updateStats() error {
|
|||||||
func (c *containerData) updateSubcontainers() error {
|
func (c *containerData) updateSubcontainers() error {
|
||||||
subcontainers, err := c.handler.ListContainers(container.ListSelf)
|
subcontainers, err := c.handler.ListContainers(container.ListSelf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
// Ignore errors if the container is dead.
|
||||||
|
if !c.handler.Exists() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
c.lock.Lock()
|
c.lock.Lock()
|
||||||
|
@ -26,6 +26,7 @@ import (
|
|||||||
"github.com/google/cadvisor/info"
|
"github.com/google/cadvisor/info"
|
||||||
itest "github.com/google/cadvisor/info/test"
|
itest "github.com/google/cadvisor/info/test"
|
||||||
stest "github.com/google/cadvisor/storage/test"
|
stest "github.com/google/cadvisor/storage/test"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
const containerName = "/container"
|
const containerName = "/container"
|
||||||
@ -83,15 +84,22 @@ func TestUpdateSubcontainersWithError(t *testing.T) {
|
|||||||
[]info.ContainerReference{},
|
[]info.ContainerReference{},
|
||||||
fmt.Errorf("some error"),
|
fmt.Errorf("some error"),
|
||||||
)
|
)
|
||||||
|
mockHandler.On("Exists").Return(true)
|
||||||
|
|
||||||
err := cd.updateSubcontainers()
|
assert.NotNil(t, cd.updateSubcontainers())
|
||||||
if err == nil {
|
assert.Empty(t, cd.info.Subcontainers, "subcontainers should not be populated on failure")
|
||||||
t.Fatal("updateSubcontainers should return error")
|
mockHandler.AssertExpectations(t)
|
||||||
}
|
}
|
||||||
if len(cd.info.Subcontainers) != 0 {
|
|
||||||
t.Errorf("Received %v subcontainers, should be 0", len(cd.info.Subcontainers))
|
|
||||||
}
|
|
||||||
|
|
||||||
|
func TestUpdateSubcontainersWithErrorOnDeadContainer(t *testing.T) {
|
||||||
|
cd, mockHandler, _ := newTestContainerData(t)
|
||||||
|
mockHandler.On("ListContainers", container.ListSelf).Return(
|
||||||
|
[]info.ContainerReference{},
|
||||||
|
fmt.Errorf("some error"),
|
||||||
|
)
|
||||||
|
mockHandler.On("Exists").Return(false)
|
||||||
|
|
||||||
|
assert.Nil(t, cd.updateSubcontainers())
|
||||||
mockHandler.AssertExpectations(t)
|
mockHandler.AssertExpectations(t)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user