Register inotify watches on all cgroup hierarchies.

We used to only register them on the first hierarchy that was created (I
think this was unintentional). This caused some weird edgecases where
we'd try to delete a watch event we didn't create. It is an error we
ignore today (since we fix it in < 60s) but delays our destruction of
the container.
This commit is contained in:
Victor Marmol 2014-11-21 20:05:50 +08:00
parent f6a90d7bac
commit fa00344601

View File

@ -36,16 +36,27 @@ import (
) )
type rawContainerHandler struct { type rawContainerHandler struct {
// Name of the container for this handler.
name string name string
cgroup *cgroups.Cgroup cgroup *cgroups.Cgroup
cgroupSubsystems *cgroupSubsystems cgroupSubsystems *cgroupSubsystems
machineInfoFactory info.MachineInfoFactory machineInfoFactory info.MachineInfoFactory
watcher *inotify.Watcher
stopWatcher chan error // Inotify event watcher.
watches map[string]struct{} watcher *inotify.Watcher
fsInfo fs.FsInfo
networkInterface *networkInterface // Signal for watcher thread to stop.
externalMounts []mount stopWatcher chan error
// Containers being watched for new subcontainers.
watches map[string]struct{}
// Cgroup paths being watchd for new subcontainers
cgroupWatches map[string]struct{}
fsInfo fs.FsInfo
networkInterface *networkInterface
externalMounts []mount
} }
func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, machineInfoFactory info.MachineInfoFactory) (container.ContainerHandler, error) { func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, machineInfoFactory info.MachineInfoFactory) (container.ContainerHandler, error) {
@ -76,6 +87,7 @@ func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, mac
machineInfoFactory: machineInfoFactory, machineInfoFactory: machineInfoFactory,
stopWatcher: make(chan error), stopWatcher: make(chan error),
watches: make(map[string]struct{}), watches: make(map[string]struct{}),
cgroupWatches: make(map[string]struct{}),
fsInfo: fsInfo, fsInfo: fsInfo,
networkInterface: networkInterface, networkInterface: networkInterface,
externalMounts: externalMounts, externalMounts: externalMounts,
@ -322,7 +334,9 @@ func (self *rawContainerHandler) watchDirectory(dir string, containerName string
return err return err
} }
self.watches[containerName] = struct{}{} self.watches[containerName] = struct{}{}
self.cgroupWatches[dir] = struct{}{}
// TODO(vmarmol): We should re-do this once we're done to ensure directories were not added in the meantime.
// Watch subdirectories as well. // Watch subdirectories as well.
entries, err := ioutil.ReadDir(dir) entries, err := ioutil.ReadDir(dir)
if err != nil { if err != nil {
@ -372,28 +386,33 @@ func (self *rawContainerHandler) processEvent(event *inotify.Event, events chan
// Maintain the watch for the new or deleted container. // Maintain the watch for the new or deleted container.
switch { switch {
case eventType == container.SubcontainerAdd: case eventType == container.SubcontainerAdd:
// If we've already seen this event, return. _, alreadyWatched := self.watches[containerName]
if _, ok := self.watches[containerName]; ok {
return nil
}
// New container was created, watch it. // New container was created, watch it.
err := self.watchDirectory(event.Name, containerName) err := self.watchDirectory(event.Name, containerName)
if err != nil { if err != nil {
return err return err
} }
// Only report container creation once.
if alreadyWatched {
return nil
}
case eventType == container.SubcontainerDelete: case eventType == container.SubcontainerDelete:
// If we've already seen this event, return. // Container was deleted, stop watching for it. Only delete the event if we registered it.
if _, ok := self.cgroupWatches[event.Name]; ok {
err := self.watcher.RemoveWatch(event.Name)
if err != nil {
return err
}
delete(self.cgroupWatches, event.Name)
}
// Only report container deletion once.
if _, ok := self.watches[containerName]; !ok { if _, ok := self.watches[containerName]; !ok {
return nil return nil
} }
delete(self.watches, containerName) delete(self.watches, containerName)
// Container was deleted, stop watching for it.
err := self.watcher.RemoveWatch(event.Name)
if err != nil {
return err
}
default: default:
return fmt.Errorf("unknown event type %v", eventType) return fmt.Errorf("unknown event type %v", eventType)
} }