diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 97686a06..15d6e736 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -55,8 +55,8 @@ }, { "ImportPath": "github.com/docker/libcontainer", - "Comment": "v1.2.0-99-gfe3801c", - "Rev": "fe3801ccd2f5d0cc3ec5d063067fc4a1c312fa81" + "Comment": "v1.2.0-173-g58fc931", + "Rev": "58fc93160e03387a4f41dcf4aed2e376c4a92db4" }, { "ImportPath": "github.com/fsouza/go-dockerclient", diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/.drone.yml b/Godeps/_workspace/src/github.com/docker/libcontainer/.drone.yml new file mode 100644 index 00000000..80d298f2 --- /dev/null +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/.drone.yml @@ -0,0 +1,9 @@ +image: dockercore/libcontainer +script: +# Setup the DockerInDocker environment. + - /dind + - sed -i 's!docker/docker!docker/libcontainer!' /go/src/github.com/docker/docker/hack/make/.validate + - bash /go/src/github.com/docker/docker/hack/make/validate-dco + - bash /go/src/github.com/docker/docker/hack/make/validate-gofmt + - export GOPATH="$GOPATH:/go:$(pwd)/vendor" # Drone mucks with our GOPATH + - make direct-test diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/.travis.yml b/Godeps/_workspace/src/github.com/docker/libcontainer/.travis.yml deleted file mode 100644 index 3ce0e27e..00000000 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/.travis.yml +++ /dev/null @@ -1,36 +0,0 @@ -language: go -go: 1.3 - -# let us have pretty experimental Docker-based Travis workers -sudo: false - -env: - - TRAVIS_GLOBAL_WTF=1 - - _GOOS=linux _GOARCH=amd64 CGO_ENABLED=1 - - _GOOS=linux _GOARCH=amd64 CGO_ENABLED=0 -# - _GOOS=linux _GOARCH=386 CGO_ENABLED=1 # TODO add this once Travis can handle it (https://github.com/travis-ci/travis-ci/issues/2207#issuecomment-49625061) - - _GOOS=linux _GOARCH=386 CGO_ENABLED=0 - - _GOOS=linux _GOARCH=arm CGO_ENABLED=0 - -install: - - go get code.google.com/p/go.tools/cmd/cover - - mkdir -pv "${GOPATH%%:*}/src/github.com/docker" && [ -d "${GOPATH%%:*}/src/github.com/docker/libcontainer" ] || ln -sv "$(readlink -f .)" "${GOPATH%%:*}/src/github.com/docker/libcontainer" - - if [ -z "$TRAVIS_GLOBAL_WTF" ]; then - gvm cross "$_GOOS" "$_GOARCH"; - export GOOS="$_GOOS" GOARCH="$_GOARCH"; - fi - - export GOPATH="$GOPATH:$(pwd)/vendor" - - if [ -z "$TRAVIS_GLOBAL_WTF" ]; then go env; fi - - go get -d -v ./... # TODO remove this if /docker/docker gets purged from our includes - - if [ "$TRAVIS_GLOBAL_WTF" ]; then - export DOCKER_PATH="${GOPATH%%:*}/src/github.com/docker/docker"; - mkdir -p "$DOCKER_PATH/hack/make"; - ( cd "$DOCKER_PATH/hack/make" && wget -c 'https://raw.githubusercontent.com/docker/docker/master/hack/make/'{.validate,validate-dco,validate-gofmt} ); - sed -i 's!docker/docker!docker/libcontainer!' "$DOCKER_PATH/hack/make/.validate"; - fi - -script: - - if [ "$TRAVIS_GLOBAL_WTF" ]; then bash "$DOCKER_PATH/hack/make/validate-dco"; fi - - if [ "$TRAVIS_GLOBAL_WTF" ]; then bash "$DOCKER_PATH/hack/make/validate-gofmt"; fi - - if [ -z "$TRAVIS_GLOBAL_WTF" ]; then make direct-build; fi - - if [ -z "$TRAVIS_GLOBAL_WTF" -a "$GOARCH" != 'arm' ]; then make direct-test-short; fi diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/Dockerfile b/Godeps/_workspace/src/github.com/docker/libcontainer/Dockerfile index 96d8f352..614e5979 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/Dockerfile +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/Dockerfile @@ -1,7 +1,7 @@ FROM crosbymichael/golang RUN apt-get update && apt-get install -y gcc make -RUN go get code.google.com/p/go.tools/cmd/cover +RUN go get golang.org/x/tools/cmd/cover ENV GOPATH $GOPATH:/go/src/github.com/docker/libcontainer/vendor RUN go get github.com/docker/docker/pkg/term @@ -10,7 +10,7 @@ RUN go get github.com/docker/docker/pkg/term RUN mkdir /busybox && \ curl -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.02/rootfs.tar' | tar -xC /busybox -RUN curl -sSL https://raw.githubusercontent.com/docker/docker/master/hack/dind -o /dind && \ +RUN curl -sSL https://raw.githubusercontent.com/docker/docker/master/project/dind -o /dind && \ chmod +x /dind COPY . /go/src/github.com/docker/libcontainer diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/MAINTAINERS b/Godeps/_workspace/src/github.com/docker/libcontainer/MAINTAINERS index 24011b05..7295c603 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/MAINTAINERS +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/MAINTAINERS @@ -2,5 +2,4 @@ Michael Crosby (@crosbymichael) Rohit Jnagal (@rjnagal) Victor Marmol (@vmarmol) Mrunal Patel (@mrunalp) -.travis.yml: Tianon Gravi (@tianon) update-vendor.sh: Tianon Gravi (@tianon) diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/Makefile b/Godeps/_workspace/src/github.com/docker/libcontainer/Makefile index 0ec995fc..0c4dda7c 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/Makefile +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/Makefile @@ -12,10 +12,10 @@ sh: GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u) direct-test: - go test -cover -v $(GO_PACKAGES) + go test $(TEST_TAGS) -cover -v $(GO_PACKAGES) direct-test-short: - go test -cover -test.short -v $(GO_PACKAGES) + go test $(TEST_TAGS) -cover -test.short -v $(GO_PACKAGES) direct-build: go build -v $(GO_PACKAGES) diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/README.md b/Godeps/_workspace/src/github.com/docker/libcontainer/README.md index 3201df9b..37047e68 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/README.md +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/README.md @@ -1,4 +1,4 @@ -## libcontainer - reference implementation for containers [![Build Status](https://travis-ci.org/docker/libcontainer.png?branch=master)](https://travis-ci.org/docker/libcontainer) +## libcontainer - reference implementation for containers [![Build Status](https://ci.dockerproject.com/github.com/docker/libcontainer/status.svg?branch=master)](https://ci.dockerproject.com/github.com/docker/libcontainer) ### Note on API changes: diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/SPEC.md b/Godeps/_workspace/src/github.com/docker/libcontainer/SPEC.md new file mode 100644 index 00000000..f5afaadc --- /dev/null +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/SPEC.md @@ -0,0 +1,321 @@ +## Container Specification - v1 + +This is the standard configuration for version 1 containers. It includes +namespaces, standard filesystem setup, a default Linux capability set, and +information about resource reservations. It also has information about any +populated environment settings for the processes running inside a container. + +Along with the configuration of how a container is created the standard also +discusses actions that can be performed on a container to manage and inspect +information about the processes running inside. + +The v1 profile is meant to be able to accommodate the majority of applications +with a strong security configuration. + +### System Requirements and Compatibility + +Minimum requirements: +* Kernel version - 3.8 recommended 2.6.2x minimum(with backported patches) +* Mounted cgroups with each subsystem in its own hierarchy + + +### Namespaces + +| Flag | Enabled | +| ------------ | ------- | +| CLONE_NEWPID | 1 | +| CLONE_NEWUTS | 1 | +| CLONE_NEWIPC | 1 | +| CLONE_NEWNET | 1 | +| CLONE_NEWNS | 1 | +| CLONE_NEWUSER | 0 | + +In v1 the user namespace is not enabled by default for support of older kernels +where the user namespace feature is not fully implemented. Namespaces are +created for the container via the `clone` syscall. + + +### Filesystem + +A root filesystem must be provided to a container for execution. The container +will use this root filesystem (rootfs) to jail and spawn processes inside where +the binaries and system libraries are local to that directory. Any binaries +to be executed must be contained within this rootfs. + +Mounts that happen inside the container are automatically cleaned up when the +container exits as the mount namespace is destroyed and the kernel will +unmount all the mounts that were setup within that namespace. + +For a container to execute properly there are certain filesystems that +are required to be mounted within the rootfs that the runtime will setup. + +| Path | Type | Flags | Data | +| ----------- | ------ | -------------------------------------- | --------------------------------------- | +| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | | +| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 | +| /dev/shm | shm | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k | +| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | | +| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid5 | +| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | | + + +After a container's filesystems are mounted within the newly created +mount namespace `/dev` will need to be populated with a set of device nodes. +It is expected that a rootfs does not need to have any device nodes specified +for `/dev` witin the rootfs as the container will setup the correct devices +that are required for executing a container's process. + +| Path | Mode | Access | +| ------------ | ---- | ---------- | +| /dev/null | 0666 | rwm | +| /dev/zero | 0666 | rwm | +| /dev/full | 0666 | rwm | +| /dev/tty | 0666 | rwm | +| /dev/random | 0666 | rwm | +| /dev/urandom | 0666 | rwm | +| /dev/fuse | 0666 | rwm | + + +**ptmx** +`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within +the container. + +The use of a pseudo TTY is optional within a container and it should support both. +If a pseudo is provided to the container `/dev/console` will need to be +setup by binding the console in `/dev/` after it has been populated and mounted +in tmpfs. + +| Source | Destination | UID GID | Mode | Type | +| --------------- | ------------ | ------- | ---- | ---- | +| *pty host path* | /dev/console | 0 0 | 0600 | bind | + + +After `/dev/null` has been setup we check for any external links between +the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing +to `/dev/null` outside the container we close and `dup2` the the `/dev/null` +that is local to the container's rootfs. + + +After the container has `/proc` mounted a few standard symlinks are setup +within `/dev/` for the io. + +| Source | Destination | +| ------------ | ----------- | +| /proc/1/fd | /dev/fd | +| /proc/1/fd/0 | /dev/stdin | +| /proc/1/fd/1 | /dev/stdout | +| /proc/1/fd/2 | /dev/stderr | + +A `pivot_root` is used to change the root for the process, effectively +jailing the process inside the rootfs. + +```c +put_old = mkdir(...); +pivot_root(rootfs, put_old); +chdir("/"); +unmount(put_old, MS_DETACH); +rmdir(put_old); +``` + +For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined +with a `chroot` is required as `pivot_root` is not supported in `ramfs`. + +```c +mount(rootfs, "/", NULL, MS_MOVE, NULL); +chroot("."); +chdir("/"); +``` + +The `umask` is set back to `0022` after the filesystem setup has been completed. + +### Resources + +Cgroups are used to handle resource allocation for containers. This includes +system resources like cpu, memory, and device access. + +| Subsystem | Enabled | +| ---------- | ------- | +| devices | 1 | +| memory | 1 | +| cpu | 1 | +| cpuacct | 1 | +| cpuset | 1 | +| blkio | 1 | +| perf_event | 1 | +| freezer | 1 | + + +All cgroup subsystem are joined so that statistics can be collected from +each of the subsystems. Freezer does not expose any stats but is joined +so that containers can be paused and resumed. + +The parent process of the container's init must place the init pid inside +the correct cgroups before the initialization begins. This is done so +that no processes or threads escape the cgroups. This sync is +done via a pipe ( specified in the runtime section below ) that the container's +init process will block waiting for the parent to finish setup. + +### Security + +The standard set of Linux capabilities that are set in a container +provide a good default for security and flexibility for the applications. + + +| Capability | Enabled | +| -------------------- | ------- | +| CAP_NET_RAW | 1 | +| CAP_NET_BIND_SERVICE | 1 | +| CAP_AUDIT_WRITE | 1 | +| CAP_DAC_OVERRIDE | 1 | +| CAP_SETFCAP | 1 | +| CAP_SETPCAP | 1 | +| CAP_SETGID | 1 | +| CAP_SETUID | 1 | +| CAP_MKNOD | 1 | +| CAP_CHOWN | 1 | +| CAP_FOWNER | 1 | +| CAP_FSETID | 1 | +| CAP_KILL | 1 | +| CAP_SYS_CHROOT | 1 | +| CAP_NET_BROADCAST | 0 | +| CAP_SYS_MODULE | 0 | +| CAP_SYS_RAWIO | 0 | +| CAP_SYS_PACCT | 0 | +| CAP_SYS_ADMIN | 0 | +| CAP_SYS_NICE | 0 | +| CAP_SYS_RESOURCE | 0 | +| CAP_SYS_TIME | 0 | +| CAP_SYS_TTY_CONFIG | 0 | +| CAP_AUDIT_CONTROL | 0 | +| CAP_MAC_OVERRIDE | 0 | +| CAP_MAC_ADMIN | 0 | +| CAP_NET_ADMIN | 0 | +| CAP_SYSLOG | 0 | +| CAP_DAC_READ_SEARCH | 0 | +| CAP_LINUX_IMMUTABLE | 0 | +| CAP_IPC_LOCK | 0 | +| CAP_IPC_OWNER | 0 | +| CAP_SYS_PTRACE | 0 | +| CAP_SYS_BOOT | 0 | +| CAP_LEASE | 0 | +| CAP_WAKE_ALARM | 0 | +| CAP_BLOCK_SUSPE | 0 | + + +Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor) +and [selinux](http://selinuxproject.org/page/Main_Page) can be used with +the containers. A container should support setting an apparmor profile or +selinux process and mount labels if provided in the configuration. + +Standard apparmor profile: +```c +#include +profile flags=(attach_disconnected,mediate_deleted) { + #include + network, + capability, + file, + umount, + + mount fstype=tmpfs, + mount fstype=mqueue, + mount fstype=fuse.*, + mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/, + mount fstype=efivarfs -> /sys/firmware/efi/efivars/, + mount fstype=fusectl -> /sys/fs/fuse/connections/, + mount fstype=securityfs -> /sys/kernel/security/, + mount fstype=debugfs -> /sys/kernel/debug/, + mount fstype=proc -> /proc/, + mount fstype=sysfs -> /sys/, + + deny @{PROC}/sys/fs/** wklx, + deny @{PROC}/sysrq-trigger rwklx, + deny @{PROC}/mem rwklx, + deny @{PROC}/kmem rwklx, + deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx, + deny @{PROC}/sys/kernel/*/** wklx, + + deny mount options=(ro, remount) -> /, + deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/, + deny mount fstype=devpts, + + deny /sys/[^f]*/** wklx, + deny /sys/f[^s]*/** wklx, + deny /sys/fs/[^c]*/** wklx, + deny /sys/fs/c[^g]*/** wklx, + deny /sys/fs/cg[^r]*/** wklx, + deny /sys/firmware/efi/efivars/** rwklx, + deny /sys/kernel/security/** rwklx, +} +``` + +*TODO: seccomp work is being done to find a good default config* + +### Runtime and Init Process + +During container creation the parent process needs to talk to the container's init +process and have a form of synchronization. This is accomplished by creating +a pipe that is passed to the container's init. When the init process first spawns +it will block on its side of the pipe until the parent closes its side. This +allows the parent to have time to set the new process inside a cgroup hierarchy +and/or write any uid/gid mappings required for user namespaces. +The pipe is passed to the init process via FD 3. + +The application consuming libcontainer should be compiled statically. libcontainer +does not define any init process and the arguments provided are used to `exec` the +process inside the application. There should be no long running init within the +container spec. + +If a pseudo tty is provided to a container it will open and `dup2` the console +as the container's STDIN, STDOUT, STDERR as well as mounting the console +as `/dev/console`. + +An extra set of mounts are provided to a container and setup for use. A container's +rootfs can contain some non portable files inside that can cause side effects during +execution of a process. These files are usually created and populated with the container +specific information via the runtime. + +**Extra runtime files:** +* /etc/hosts +* /etc/resolv.conf +* /etc/hostname +* /etc/localtime + + +#### Defaults + +There are a few defaults that can be overridden by users, but in their omission +these apply to processes within a container. + +| Type | Value | +| ------------------- | ------------------------------ | +| Parent Death Signal | SIGKILL | +| UID | 0 | +| GID | 0 | +| GROUPS | 0, NULL | +| CWD | "/" | +| $HOME | Current user's home dir or "/" | +| Readonly rootfs | false | +| Pseudo TTY | false | + + +## Actions + +After a container is created there is a standard set of actions that can +be done to the container. These actions are part of the public API for +a container. + +| Action | Description | +| -------------- | ------------------------------------------------------------------ | +| Get processes | Return all the pids for processes running inside a container | +| Get Stats | Return resource statistics for the container as a whole | +| Wait | Wait waits on the container's init process ( pid 1 ) | +| Wait Process | Wait on any of the container's processes returning the exit status | +| Destroy | Kill the container's init process and remove any filesystem state | +| Signal | Send a signal to the container's init process | +| Signal Process | Send a signal to any of the container's processes | +| Pause | Pause all processes inside the container | +| Resume | Resume all processes inside the container if paused | +| Exec | Execute a new process inside of the container ( requires setns ) | + + diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/api_temp.go b/Godeps/_workspace/src/github.com/docker/libcontainer/api_temp.go index 9b2c5207..5c682ee3 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/api_temp.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/api_temp.go @@ -5,30 +5,17 @@ package libcontainer import ( "github.com/docker/libcontainer/cgroups/fs" - "github.com/docker/libcontainer/cgroups/systemd" "github.com/docker/libcontainer/network" ) // TODO(vmarmol): Complete Stats() in final libcontainer API and move users to that. // DEPRECATED: The below portions are only to be used during the transition to the official API. // Returns all available stats for the given container. -func GetStats(container *Config, state *State) (*ContainerStats, error) { - var ( - err error - stats = &ContainerStats{} - ) - - if systemd.UseSystemd() { - stats.CgroupStats, err = systemd.GetStats(container.Cgroups) - } else { - stats.CgroupStats, err = fs.GetStats(container.Cgroups) - } - - if err != nil { +func GetStats(container *Config, state *State) (stats *ContainerStats, err error) { + stats = &ContainerStats{} + if stats.CgroupStats, err = fs.GetStats(state.CgroupPaths); err != nil { return stats, err } - stats.NetworkStats, err = network.GetStats(&state.NetworkState) - return stats, err } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgroups.go b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgroups.go index 567e9a6c..106698d1 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgroups.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgroups.go @@ -50,11 +50,7 @@ type Cgroup struct { CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use + CpusetMems string `json:"cpuset_mems,omitempty"` // MEM to use Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process Slice string `json:"slice,omitempty"` // Parent slice to use for systemd } - -type ActiveCgroup interface { - Cleanup() error - Paths() (map[string]string, error) -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgutil/cgutil.go b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgutil/cgutil.go deleted file mode 100644 index d1a66117..00000000 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgutil/cgutil.go +++ /dev/null @@ -1,264 +0,0 @@ -package main - -import ( - "encoding/json" - "fmt" - "log" - "os" - "syscall" - "time" - - "github.com/codegangsta/cli" - "github.com/docker/libcontainer/cgroups" - "github.com/docker/libcontainer/cgroups/fs" - "github.com/docker/libcontainer/cgroups/systemd" -) - -var createCommand = cli.Command{ - Name: "create", - Usage: "Create a cgroup container using the supplied configuration and initial process.", - Flags: []cli.Flag{ - cli.StringFlag{Name: "config, c", Value: "cgroup.json", Usage: "path to container configuration (cgroups.Cgroup object)"}, - cli.IntFlag{Name: "pid, p", Value: 0, Usage: "pid of the initial process in the container"}, - }, - Action: createAction, -} - -var destroyCommand = cli.Command{ - Name: "destroy", - Usage: "Destroy an existing cgroup container.", - Flags: []cli.Flag{ - cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"}, - cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"}, - }, - Action: destroyAction, -} - -var statsCommand = cli.Command{ - Name: "stats", - Usage: "Get stats for cgroup", - Flags: []cli.Flag{ - cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"}, - cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"}, - }, - Action: statsAction, -} - -var pauseCommand = cli.Command{ - Name: "pause", - Usage: "Pause cgroup", - Flags: []cli.Flag{ - cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"}, - cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"}, - }, - Action: pauseAction, -} - -var resumeCommand = cli.Command{ - Name: "resume", - Usage: "Resume a paused cgroup", - Flags: []cli.Flag{ - cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"}, - cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"}, - }, - Action: resumeAction, -} - -var psCommand = cli.Command{ - Name: "ps", - Usage: "Get list of pids for a cgroup", - Flags: []cli.Flag{ - cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"}, - cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"}, - }, - Action: psAction, -} - -func getConfigFromFile(c *cli.Context) (*cgroups.Cgroup, error) { - f, err := os.Open(c.String("config")) - if err != nil { - return nil, err - } - defer f.Close() - - var config *cgroups.Cgroup - if err := json.NewDecoder(f).Decode(&config); err != nil { - log.Fatal(err) - } - return config, nil -} - -func openLog(name string) error { - f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755) - if err != nil { - return err - } - - log.SetOutput(f) - return nil -} - -func getConfig(context *cli.Context) (*cgroups.Cgroup, error) { - name := context.String("name") - if name == "" { - log.Fatal(fmt.Errorf("Missing container name")) - } - parent := context.String("parent") - return &cgroups.Cgroup{ - Name: name, - Parent: parent, - }, nil -} - -func killAll(config *cgroups.Cgroup) { - // We could use freezer here to prevent process spawning while we are trying - // to kill everything. But going with more portable solution of retrying for - // now. - pids := getPids(config) - retry := 10 - for len(pids) != 0 || retry > 0 { - killPids(pids) - time.Sleep(100 * time.Millisecond) - retry-- - pids = getPids(config) - } - if len(pids) != 0 { - log.Fatal(fmt.Errorf("Could not kill existing processes in the container.")) - } -} - -func getPids(config *cgroups.Cgroup) []int { - pids, err := fs.GetPids(config) - if err != nil { - log.Fatal(err) - } - return pids -} - -func killPids(pids []int) { - for _, pid := range pids { - // pids might go away on their own. Ignore errors. - syscall.Kill(pid, syscall.SIGKILL) - } -} - -func setFreezerState(context *cli.Context, state cgroups.FreezerState) { - config, err := getConfig(context) - if err != nil { - log.Fatal(err) - } - - if systemd.UseSystemd() { - err = systemd.Freeze(config, state) - } else { - err = fs.Freeze(config, state) - } - if err != nil { - log.Fatal(err) - } -} - -func createAction(context *cli.Context) { - config, err := getConfigFromFile(context) - if err != nil { - log.Fatal(err) - } - pid := context.Int("pid") - if pid <= 0 { - log.Fatal(fmt.Errorf("Invalid pid : %d", pid)) - } - if systemd.UseSystemd() { - _, err := systemd.Apply(config, pid) - if err != nil { - log.Fatal(err) - } - } else { - _, err := fs.Apply(config, pid) - if err != nil { - log.Fatal(err) - } - } -} - -func destroyAction(context *cli.Context) { - config, err := getConfig(context) - if err != nil { - log.Fatal(err) - } - - killAll(config) - // Systemd will clean up cgroup state for empty container. - if !systemd.UseSystemd() { - err := fs.Cleanup(config) - if err != nil { - log.Fatal(err) - } - } -} - -func statsAction(context *cli.Context) { - config, err := getConfig(context) - if err != nil { - log.Fatal(err) - } - stats, err := fs.GetStats(config) - if err != nil { - log.Fatal(err) - } - - out, err := json.MarshalIndent(stats, "", "\t") - if err != nil { - log.Fatal(err) - } - fmt.Printf("Usage stats for '%s':\n %v\n", config.Name, string(out)) -} - -func pauseAction(context *cli.Context) { - setFreezerState(context, cgroups.Frozen) -} - -func resumeAction(context *cli.Context) { - setFreezerState(context, cgroups.Thawed) -} - -func psAction(context *cli.Context) { - config, err := getConfig(context) - if err != nil { - log.Fatal(err) - } - - pids, err := fs.GetPids(config) - if err != nil { - log.Fatal(err) - } - - fmt.Printf("Pids in '%s':\n", config.Name) - fmt.Println(pids) -} - -func main() { - logPath := os.Getenv("log") - if logPath != "" { - if err := openLog(logPath); err != nil { - log.Fatal(err) - } - } - - app := cli.NewApp() - app.Name = "cgutil" - app.Usage = "Test utility for libcontainer cgroups package" - app.Version = "0.1" - - app.Commands = []cli.Command{ - createCommand, - destroyCommand, - statsCommand, - pauseCommand, - resumeCommand, - psCommand, - } - - if err := app.Run(os.Args); err != nil { - log.Fatal(err) - } -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgutil/sample_cgroup.json b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgutil/sample_cgroup.json deleted file mode 100644 index 2d297849..00000000 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgutil/sample_cgroup.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "name": "luke", - "parent": "darth", - "allow_all_devices": true, - "memory": 1073741824, - "memory_swap": -1, - "cpu_shares": 2048, - "cpu_quota": 500000, - "cpu_period": 250000 -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go index 599ab572..6f85793d 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go @@ -57,20 +57,35 @@ type data struct { pid int } -func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { +func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { d, err := getCgroupData(c, pid) if err != nil { return nil, err } - for _, sys := range subsystems { + paths := make(map[string]string) + defer func() { + if err != nil { + cgroups.RemovePaths(paths) + } + }() + for name, sys := range subsystems { if err := sys.Set(d); err != nil { - d.Cleanup() return nil, err } + // FIXME: Apply should, ideally, be reentrant or be broken up into a separate + // create and join phase so that the cgroup hierarchy for a container can be + // created then join consists of writing the process pids to cgroup.procs + p, err := d.path(name) + if err != nil { + if cgroups.IsNotFound(err) { + continue + } + return nil, err + } + paths[name] = p } - - return d, nil + return paths, nil } // Symmetrical public function to update device based cgroups. Also available @@ -86,33 +101,13 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error { return devices.Set(d) } -func Cleanup(c *cgroups.Cgroup) error { - d, err := getCgroupData(c, 0) - if err != nil { - return fmt.Errorf("Could not get Cgroup data %s", err) - } - return d.Cleanup() -} - -func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) { +func GetStats(systemPaths map[string]string) (*cgroups.Stats, error) { stats := cgroups.NewStats() - - d, err := getCgroupData(c, 0) - if err != nil { - return nil, fmt.Errorf("getting CgroupData %s", err) - } - - for sysname, sys := range subsystems { - path, err := d.path(sysname) - if err != nil { - // Don't fail if a cgroup hierarchy was not found, just skip this subsystem - if cgroups.IsNotFound(err) { - continue - } - - return nil, err + for name, path := range systemPaths { + sys, ok := subsystems[name] + if !ok { + continue } - if err := sys.GetStats(path, stats); err != nil { return nil, err } @@ -176,26 +171,6 @@ func (raw *data) parent(subsystem string) (string, error) { return filepath.Join(raw.root, subsystem, initPath), nil } -func (raw *data) Paths() (map[string]string, error) { - paths := make(map[string]string) - - for sysname := range subsystems { - path, err := raw.path(sysname) - if err != nil { - // Don't fail if a cgroup hierarchy was not found, just skip this subsystem - if cgroups.IsNotFound(err) { - continue - } - - return nil, err - } - - paths[sysname] = path - } - - return paths, nil -} - func (raw *data) path(subsystem string) (string, error) { // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. if filepath.IsAbs(raw.cgroup) { @@ -234,13 +209,6 @@ func (raw *data) join(subsystem string) (string, error) { return path, nil } -func (raw *data) Cleanup() error { - for _, sys := range subsystems { - sys.Remove(raw) - } - return nil -} - func writeFile(dir, file, data string) error { return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/cpuset.go b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/cpuset.go index 88477394..ff67a53e 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/cpuset.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/cpuset.go @@ -14,17 +14,11 @@ type CpusetGroup struct { } func (s *CpusetGroup) Set(d *data) error { - // we don't want to join this cgroup unless it is specified - if d.c.CpusetCpus != "" { - dir, err := d.path("cpuset") - if err != nil { - return err - } - - return s.SetDir(dir, d.c.CpusetCpus, d.pid) + dir, err := d.path("cpuset") + if err != nil { + return err } - - return nil + return s.SetDir(dir, d.c.CpusetCpus, d.c.CpusetMems, d.pid) } func (s *CpusetGroup) Remove(d *data) error { @@ -35,7 +29,7 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { return nil } -func (s *CpusetGroup) SetDir(dir, value string, pid int) error { +func (s *CpusetGroup) SetDir(dir, cpus string, mems string, pid int) error { if err := s.ensureParent(dir); err != nil { return err } @@ -46,8 +40,17 @@ func (s *CpusetGroup) SetDir(dir, value string, pid int) error { return err } - if err := writeFile(dir, "cpuset.cpus", value); err != nil { - return err + // If we don't use --cpuset-xxx, the default value inherit from parent cgroup + // is set in s.ensureParent, otherwise, use the value we set + if cpus != "" { + if err := writeFile(dir, "cpuset.cpus", cpus); err != nil { + return err + } + } + if mems != "" { + if err := writeFile(dir, "cpuset.mems", mems); err != nil { + return err + } } return nil diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/utils_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/utils_test.go index f1afd494..8b19a84b 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/utils_test.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/utils_test.go @@ -57,7 +57,7 @@ func TestGetCgroupParamsInt(t *testing.T) { if err != nil { t.Fatal(err) } else if value != 0 { - t.Fatalf("Expected %d to equal %f", value, 0) + t.Fatalf("Expected %d to equal %d", value, 0) } // Success with negative values lesser than min int64 @@ -70,7 +70,7 @@ func TestGetCgroupParamsInt(t *testing.T) { if err != nil { t.Fatal(err) } else if value != 0 { - t.Fatalf("Expected %d to equal %f", value, 0) + t.Fatalf("Expected %d to equal %d", value, 0) } // Not a float. diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/stats.go b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/stats.go index 857fc1dc..dc5dbb3c 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/stats.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/stats.go @@ -27,7 +27,7 @@ type CpuUsage struct { type CpuStats struct { CpuUsage CpuUsage `json:"cpu_usage,omitempty"` - ThrottlingData ThrottlingData `json:"throlling_data,omitempty"` + ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` } type MemoryStats struct { diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_nosystemd.go b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_nosystemd.go index 42a09e3f..4b9a2f5b 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_nosystemd.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_nosystemd.go @@ -12,7 +12,7 @@ func UseSystemd() bool { return false } -func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { +func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { return nil, fmt.Errorf("Systemd not supported") } @@ -27,7 +27,3 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error { func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { return fmt.Errorf("Systemd not supported") } - -func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) { - return nil, fmt.Errorf("Systemd not supported") -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go index 1f84a9c6..41dce311 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go @@ -31,18 +31,15 @@ var ( connLock sync.Mutex theConn *systemd.Conn hasStartTransientUnit bool - subsystems = map[string]subsystem{ - "devices": &fs.DevicesGroup{}, - "memory": &fs.MemoryGroup{}, - "cpu": &fs.CpuGroup{}, - "cpuset": &fs.CpusetGroup{}, - "cpuacct": &fs.CpuacctGroup{}, - "blkio": &fs.BlkioGroup{}, - "perf_event": &fs.PerfEventGroup{}, - "freezer": &fs.FreezerGroup{}, - } ) +func newProp(name string, units interface{}) systemd.Property { + return systemd.Property{ + Name: name, + Value: dbus.MakeVariant(units), + } +} + func UseSystemd() bool { s, err := os.Stat("/run/systemd/system") if err != nil || !s.IsDir() { @@ -84,7 +81,7 @@ func getIfaceForUnit(unitName string) string { return "Unit" } -func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { +func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) { var ( unitName = getUnitName(c) slice = "system.slice" @@ -99,27 +96,27 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { } properties = append(properties, - systemd.Property{"Slice", dbus.MakeVariant(slice)}, - systemd.Property{"Description", dbus.MakeVariant("docker container " + c.Name)}, - systemd.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})}, + systemd.PropSlice(slice), + systemd.PropDescription("docker container "+c.Name), + newProp("PIDs", []uint32{uint32(pid)}), ) // Always enable accounting, this gets us the same behaviour as the fs implementation, // plus the kernel has some problems with joining the memory cgroup at a later time. properties = append(properties, - systemd.Property{"MemoryAccounting", dbus.MakeVariant(true)}, - systemd.Property{"CPUAccounting", dbus.MakeVariant(true)}, - systemd.Property{"BlockIOAccounting", dbus.MakeVariant(true)}) + newProp("MemoryAccounting", true), + newProp("CPUAccounting", true), + newProp("BlockIOAccounting", true)) if c.Memory != 0 { properties = append(properties, - systemd.Property{"MemoryLimit", dbus.MakeVariant(uint64(c.Memory))}) + newProp("MemoryLimit", uint64(c.Memory))) } // TODO: MemoryReservation and MemorySwap not available in systemd if c.CpuShares != 0 { properties = append(properties, - systemd.Property{"CPUShares", dbus.MakeVariant(uint64(c.CpuShares))}) + newProp("CPUShares", uint64(c.CpuShares))) } if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil { @@ -140,57 +137,42 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { } - // we need to manually join the freezer cgroup in systemd because it does not currently support it - // via the dbus api + // we need to manually join the freezer and cpuset cgroup in systemd + // because it does not currently support it via the dbus api. if err := joinFreezer(c, pid); err != nil { return nil, err } - if c.CpusetCpus != "" { - if err := joinCpuset(c, pid); err != nil { - return nil, err - } + if err := joinCpuset(c, pid); err != nil { + return nil, err } - return res, nil -} - -func writeFile(dir, file, data string) error { - return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) -} - -func (c *systemdCgroup) Paths() (map[string]string, error) { paths := make(map[string]string) - - for sysname := range subsystems { - subsystemPath, err := getSubsystemPath(c.cgroup, sysname) + for _, sysname := range []string{ + "devices", + "memory", + "cpu", + "cpuset", + "cpuacct", + "blkio", + "perf_event", + "freezer", + } { + subsystemPath, err := getSubsystemPath(res.cgroup, sysname) if err != nil { // Don't fail if a cgroup hierarchy was not found, just skip this subsystem if cgroups.IsNotFound(err) { continue } - return nil, err } - paths[sysname] = subsystemPath } - return paths, nil } -func (c *systemdCgroup) Cleanup() error { - // systemd cleans up, we don't need to do much - paths, err := c.Paths() - if err != nil { - return err - } - - for _, path := range paths { - os.RemoveAll(path) - } - - return nil +func writeFile(dir, file, data string) error { + return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) } func joinFreezer(c *cgroups.Cgroup, pid int) error { @@ -260,35 +242,6 @@ func getUnitName(c *cgroups.Cgroup) string { return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name) } -/* - * This would be nicer to get from the systemd API when accounting - * is enabled, but sadly there is no way to do that yet. - * The lack of this functionality in the API & the approach taken - * is guided by - * http://www.freedesktop.org/wiki/Software/systemd/ControlGroupInterface/#readingaccountinginformation. - */ -func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) { - stats := cgroups.NewStats() - - for sysname, sys := range subsystems { - subsystemPath, err := getSubsystemPath(c, sysname) - if err != nil { - // Don't fail if a cgroup hierarchy was not found, just skip this subsystem - if cgroups.IsNotFound(err) { - continue - } - - return nil, err - } - - if err := sys.GetStats(subsystemPath, stats); err != nil { - return nil, err - } - } - - return stats, nil -} - // Atm we can't use the systemd device support because of two missing things: // * Support for wildcards to allow mknod on any device // * Support for wildcards to allow /dev/pts support @@ -360,5 +313,5 @@ func joinCpuset(c *cgroups.Cgroup, pid int) error { s := &fs.CpusetGroup{} - return s.SetDir(path, c.CpusetCpus, pid) + return s.SetDir(path, c.CpusetCpus, c.CpusetMems, pid) } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/utils.go b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/utils.go index 77a3c0d7..224a20b9 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/utils.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/utils.go @@ -189,6 +189,17 @@ func EnterPid(cgroupPaths map[string]string, pid int) error { } } } - return nil } + +// RemovePaths iterates over the provided paths removing them. +// If an error is encountered the removal proceeds and the first error is +// returned to ensure a partial removal is not possible. +func RemovePaths(paths map[string]string) (err error) { + for _, path := range paths { + if rerr := os.RemoveAll(path); err == nil { + err = rerr + } + } + return err +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/config.go b/Godeps/_workspace/src/github.com/docker/libcontainer/config.go index 1fb377dc..94c2bd98 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/config.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/config.go @@ -10,6 +10,13 @@ type MountConfig mount.MountConfig type Network network.Network +// Namespace defines configuration for each namespace. It specifies an +// alternate path that is able to be joined via setns. +type Namespace struct { + Name string `json:"name"` + Path string `json:"path,omitempty"` +} + // Config defines configuration options for executing a process inside a contained environment. type Config struct { // Mount specific options. @@ -38,7 +45,7 @@ type Config struct { // Namespaces specifies the container's namespaces that it should setup when cloning the init process // If a namespace is not provided that namespace is shared from the container's parent process - Namespaces map[string]bool `json:"namespaces,omitempty"` + Namespaces []Namespace `json:"namespaces,omitempty"` // Capabilities specify the capabilities to keep when executing the process inside the container // All capbilities not specified will be dropped from the processes capability mask @@ -65,6 +72,10 @@ type Config struct { // RestrictSys will remount /proc/sys, /sys, and mask over sysrq-trigger as well as /proc/irq and // /proc/bus RestrictSys bool `json:"restrict_sys,omitempty"` + + // Rlimits specifies the resource limits, such as max open files, to set in the container + // If Rlimits are not set, the container will inherit rlimits from the parent process + Rlimits []Rlimit `json:"rlimits,omitempty"` } // Routes can be specified to create entries in the route table as the container is started @@ -87,3 +98,9 @@ type Route struct { // The device to set this route up for, for example: eth0 InterfaceName string `json:"interface_name,omitempty"` } + +type Rlimit struct { + Type int `json:"type,omitempty"` + Hard uint64 `json:"hard,omitempty"` + Soft uint64 `json:"soft,omitempty"` +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/config_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/config_test.go index 59812811..b4e16bf0 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/config_test.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/config_test.go @@ -64,12 +64,12 @@ func TestConfigJsonFormat(t *testing.T) { t.Fail() } - if !container.Namespaces["NEWNET"] { + if getNamespaceIndex(container, "NEWNET") == -1 { t.Log("namespaces should contain NEWNET") t.Fail() } - if container.Namespaces["NEWUSER"] { + if getNamespaceIndex(container, "NEWUSER") != -1 { t.Log("namespaces should not contain NEWUSER") t.Fail() } @@ -158,3 +158,12 @@ func TestSelinuxLabels(t *testing.T) { t.Fatalf("expected mount label %q but received %q", label, container.MountConfig.MountLabel) } } + +func getNamespaceIndex(config *Config, name string) int { + for i, v := range config.Namespaces { + if v.Name == name { + return i + } + } + return -1 +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/devices/devices.go b/Godeps/_workspace/src/github.com/docker/libcontainer/devices/devices.go index 5bf80e8c..8e86d952 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/devices/devices.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/devices/devices.go @@ -103,7 +103,7 @@ func getDeviceNodes(path string) ([]*Device, error) { switch { case f.IsDir(): switch f.Name() { - case "pts", "shm", "fd": + case "pts", "shm", "fd", "mqueue": continue default: sub, err := getDeviceNodes(filepath.Join(path, f.Name())) diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/integration/exec_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/integration/exec_test.go index 96099189..cf749efb 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/integration/exec_test.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/integration/exec_test.go @@ -1,8 +1,11 @@ package integration import ( + "os" "strings" "testing" + + "github.com/docker/libcontainer" ) func TestExecPS(t *testing.T) { @@ -36,3 +39,152 @@ func TestExecPS(t *testing.T) { t.Fatalf("expected output %q but received %q", expected, actual) } } + +func TestIPCPrivate(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootFs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/ipc") + if err != nil { + t.Fatal(err) + } + + config := newTemplateConfig(rootfs) + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") + if err != nil { + t.Fatal(err) + } + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l { + t.Fatalf("ipc link should be private to the conatiner but equals host %q %q", actual, l) + } +} + +func TestIPCHost(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootFs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/ipc") + if err != nil { + t.Fatal(err) + } + + config := newTemplateConfig(rootfs) + i := getNamespaceIndex(config, "NEWIPC") + config.Namespaces = append(config.Namespaces[:i], config.Namespaces[i+1:]...) + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") + if err != nil { + t.Fatal(err) + } + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { + t.Fatalf("ipc link not equal to host link %q %q", actual, l) + } +} + +func TestIPCJoinPath(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootFs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + l, err := os.Readlink("/proc/1/ns/ipc") + if err != nil { + t.Fatal(err) + } + + config := newTemplateConfig(rootfs) + i := getNamespaceIndex(config, "NEWIPC") + config.Namespaces[i].Path = "/proc/1/ns/ipc" + + buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc") + if err != nil { + t.Fatal(err) + } + + if exitCode != 0 { + t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr) + } + + if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l { + t.Fatalf("ipc link not equal to host link %q %q", actual, l) + } +} + +func TestIPCBadPath(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootFs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + i := getNamespaceIndex(config, "NEWIPC") + config.Namespaces[i].Path = "/proc/1/ns/ipcc" + + _, _, err = runContainer(config, "", "true") + if err == nil { + t.Fatal("container succeded with bad ipc path") + } +} + +func TestRlimit(t *testing.T) { + if testing.Short() { + return + } + + rootfs, err := newRootFs() + if err != nil { + t.Fatal(err) + } + defer remove(rootfs) + + config := newTemplateConfig(rootfs) + out, _, err := runContainer(config, "", "/bin/sh", "-c", "ulimit -n") + if err != nil { + t.Fatal(err) + } + if limit := strings.TrimSpace(out.Stdout.String()); limit != "1024" { + t.Fatalf("expected rlimit to be 1024, got %s", limit) + } +} + +func getNamespaceIndex(config *libcontainer.Config, name string) int { + for i, v := range config.Namespaces { + if v.Name == name { + return i + } + } + return -1 +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/integration/init_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/integration/init_test.go index a0570f32..9954c0f8 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/integration/init_test.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/integration/init_test.go @@ -6,7 +6,6 @@ import ( "runtime" "github.com/docker/libcontainer/namespaces" - "github.com/docker/libcontainer/syncpipe" ) // init runs the libcontainer initialization code because of the busybox style needs @@ -27,12 +26,7 @@ func init() { log.Fatal(err) } - syncPipe, err := syncpipe.NewSyncPipeFromFd(0, 3) - if err != nil { - log.Fatalf("unable to create sync pipe: %s", err) - } - - if err := namespaces.Init(container, rootfs, "", syncPipe, os.Args[3:]); err != nil { + if err := namespaces.Init(container, rootfs, "", os.NewFile(3, "pipe"), os.Args[3:]); err != nil { log.Fatalf("unable to initialize for container: %s", err) } os.Exit(1) diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/integration/template_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/integration/template_test.go index 1805eba9..f37070ff 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/integration/template_test.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/integration/template_test.go @@ -1,6 +1,8 @@ package integration import ( + "syscall" + "github.com/docker/libcontainer" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/devices" @@ -30,12 +32,12 @@ func newTemplateConfig(rootfs string) *libcontainer.Config { "KILL", "AUDIT_WRITE", }, - Namespaces: map[string]bool{ - "NEWNS": true, - "NEWUTS": true, - "NEWIPC": true, - "NEWPID": true, - "NEWNET": true, + Namespaces: []libcontainer.Namespace{ + {Name: "NEWNS"}, + {Name: "NEWUTS"}, + {Name: "NEWIPC"}, + {Name: "NEWPID"}, + {Name: "NEWNET"}, }, Cgroups: &cgroups.Cgroup{ Parent: "integration", @@ -60,5 +62,12 @@ func newTemplateConfig(rootfs string) *libcontainer.Config { Gateway: "localhost", }, }, + Rlimits: []libcontainer.Rlimit{ + { + Type: syscall.RLIMIT_NOFILE, + Hard: uint64(1024), + Soft: uint64(1024), + }, + }, } } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/label/label.go b/Godeps/_workspace/src/github.com/docker/libcontainer/label/label.go index ce60296e..5a540fd5 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/label/label.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/label/label.go @@ -25,6 +25,10 @@ func SetFileLabel(path string, fileLabel string) error { return nil } +func SetFileCreateLabel(fileLabel string) error { + return nil +} + func Relabel(path string, fileLabel string, relabel string) error { return nil } @@ -43,3 +47,15 @@ func ReserveLabel(label string) error { func UnreserveLabel(label string) error { return nil } + +// DupSecOpt takes an process label and returns security options that +// can be used to set duplicate labels on future container processes +func DupSecOpt(src string) []string { + return nil +} + +// DisableSecOpt returns a security opt that can disable labeling +// support for future container processes +func DisableSecOpt() []string { + return nil +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux.go b/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux.go index 65b84797..5983031a 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux.go @@ -17,7 +17,6 @@ func InitLabels(options []string) (string, string, error) { if !selinux.SelinuxEnabled() { return "", "", nil } - var err error processLabel, mountLabel := selinux.GetLxcContexts() if processLabel != "" { pcon := selinux.NewContext(processLabel) @@ -38,7 +37,7 @@ func InitLabels(options []string) (string, string, error) { processLabel = pcon.Get() mountLabel = mcon.Get() } - return processLabel, mountLabel, err + return processLabel, mountLabel, nil } // DEPRECATED: The GenLabels function is only to be used during the transition to the official API. @@ -88,6 +87,14 @@ func SetFileLabel(path string, fileLabel string) error { return nil } +// Tell the kernel the label for all files to be created +func SetFileCreateLabel(fileLabel string) error { + if selinux.SelinuxEnabled() { + return selinux.Setfscreatecon(fileLabel) + } + return nil +} + // Change the label of path to the filelabel string. If the relabel string // is "z", relabel will change the MCS label to s0. This will allow all // containers to share the content. If the relabel string is a "Z" then @@ -130,3 +137,15 @@ func UnreserveLabel(label string) error { selinux.FreeLxcContexts(label) return nil } + +// DupSecOpt takes an process label and returns security options that +// can be used to set duplicate labels on future container processes +func DupSecOpt(src string) []string { + return selinux.DupSecOpt(src) +} + +// DisableSecOpt returns a security opt that can disable labeling +// support for future container processes +func DisableSecOpt() []string { + return selinux.DisableSecOpt() +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux_test.go index c83654f6..8629353f 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux_test.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux_test.go @@ -3,6 +3,7 @@ package label import ( + "strings" "testing" "github.com/docker/libcontainer/selinux" @@ -33,7 +34,7 @@ func TestInit(t *testing.T) { t.Fatal(err) } if plabel != "user_u:user_r:user_t:s0:c1,c15" || mlabel != "user_u:object_r:svirt_sandbox_file_t:s0:c1,c15" { - t.Log("InitLabels User Failed") + t.Log("InitLabels User Match Failed") t.Log(plabel, mlabel) t.Fatal(err) } @@ -46,3 +47,43 @@ func TestInit(t *testing.T) { } } } +func TestDuplicateLabel(t *testing.T) { + secopt := DupSecOpt("system_u:system_r:svirt_lxc_net_t:s0:c1,c2") + t.Log(secopt) + for _, opt := range secopt { + con := strings.SplitN(opt, ":", 3) + if len(con) != 3 || con[0] != "label" { + t.Errorf("Invalid DupSecOpt return value") + continue + } + if con[1] == "user" { + if con[2] != "system_u" { + t.Errorf("DupSecOpt Failed user incorrect") + } + continue + } + if con[1] == "role" { + if con[2] != "system_r" { + t.Errorf("DupSecOpt Failed role incorrect") + } + continue + } + if con[1] == "type" { + if con[2] != "svirt_lxc_net_t" { + t.Errorf("DupSecOpt Failed type incorrect") + } + continue + } + if con[1] == "level" { + if con[2] != "s0:c1,c2" { + t.Errorf("DupSecOpt Failed level incorrect") + } + continue + } + t.Errorf("DupSecOpt Failed invalid field %q", con[1]) + } + secopt = DisableSecOpt() + if secopt[0] != "label:disable" { + t.Errorf("DisableSecOpt Failed level incorrect") + } +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/mount/init.go b/Godeps/_workspace/src/github.com/docker/libcontainer/mount/init.go index ea2b7327..a2c3d520 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/mount/init.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/mount/init.go @@ -97,7 +97,7 @@ func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountCon return nil } -// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts +// mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts // inside the mount namespace func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) error { for _, m := range newSystemMounts(rootfs, mountConfig.MountLabel, sysReadonly) { @@ -168,6 +168,7 @@ func newSystemMounts(rootfs, mountLabel string, sysReadonly bool) []mount { {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, {source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}, {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, + {source: "mqueue", path: filepath.Join(rootfs, "dev", "mqueue"), device: "mqueue", flags: defaultMountFlags}, {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/exec.go b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/exec.go index 4440ccd0..b7873edd 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/exec.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/exec.go @@ -3,6 +3,7 @@ package namespaces import ( + "encoding/json" "io" "os" "os/exec" @@ -13,7 +14,6 @@ import ( "github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/systemd" "github.com/docker/libcontainer/network" - "github.com/docker/libcontainer/syncpipe" "github.com/docker/libcontainer/system" ) @@ -22,19 +22,17 @@ import ( // Exec performs setup outside of a namespace so that a container can be // executed. Exec is a high level function for working with container namespaces. func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) { - var ( - err error - ) + var err error // create a pipe so that we can syncronize with the namespaced process and - // pass the veth name to the child - syncPipe, err := syncpipe.NewSyncPipe() + // pass the state and configuration to the child process + parent, child, err := newInitPipe() if err != nil { return -1, err } - defer syncPipe.Close() + defer parent.Close() - command := createCommand(container, console, dataPath, os.Args[0], syncPipe.Child(), args) + command := createCommand(container, console, dataPath, os.Args[0], child, args) // Note: these are only used in non-tty mode // if there is a tty for the container it will be opened within the namespace and the // fds will be duped to stdin, stdiout, and stderr @@ -43,39 +41,42 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri command.Stderr = stderr if err := command.Start(); err != nil { + child.Close() return -1, err } + child.Close() - // Now we passed the pipe to the child, close our side - syncPipe.CloseChild() + terminate := func(terr error) (int, error) { + // TODO: log the errors for kill and wait + command.Process.Kill() + command.Wait() + return -1, terr + } started, err := system.GetProcessStartTime(command.Process.Pid) if err != nil { - return -1, err + return terminate(err) } // Do this before syncing with child so that no children // can escape the cgroup - cgroupRef, err := SetupCgroups(container, command.Process.Pid) + cgroupPaths, err := SetupCgroups(container, command.Process.Pid) if err != nil { - command.Process.Kill() - command.Wait() - return -1, err - } - defer cgroupRef.Cleanup() - - cgroupPaths, err := cgroupRef.Paths() - if err != nil { - command.Process.Kill() - command.Wait() - return -1, err + return terminate(err) } + defer cgroups.RemovePaths(cgroupPaths) var networkState network.NetworkState - if err := InitializeNetworking(container, command.Process.Pid, syncPipe, &networkState); err != nil { - command.Process.Kill() - command.Wait() - return -1, err + if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil { + return terminate(err) + } + // send the state to the container's init process then shutdown writes for the parent + if err := json.NewEncoder(parent).Encode(networkState); err != nil { + return terminate(err) + } + // shutdown writes for the parent side of the pipe + if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil { + return terminate(err) } state := &libcontainer.State{ @@ -86,17 +87,18 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri } if err := libcontainer.SaveState(dataPath, state); err != nil { - command.Process.Kill() - command.Wait() - return -1, err + return terminate(err) } defer libcontainer.DeleteState(dataPath) - // Sync with child - if err := syncPipe.ReadFromChild(); err != nil { - command.Process.Kill() - command.Wait() - return -1, err + // wait for the child process to fully complete and receive an error message + // if one was encoutered + var ierr *initError + if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF { + return terminate(err) + } + if ierr != nil { + return terminate(ierr) } if startCallback != nil { @@ -108,7 +110,6 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri return -1, err } } - return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } @@ -129,16 +130,6 @@ func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, ini "data_path=" + dataPath, } - /* - TODO: move user and wd into env - if user != "" { - env = append(env, "user="+user) - } - if workingDir != "" { - env = append(env, "wd="+workingDir) - } - */ - command := exec.Command(init, append([]string{"init", "--"}, args...)...) // make sure the process is executed inside the context of the rootfs command.Dir = container.RootFs @@ -157,23 +148,20 @@ func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, ini // SetupCgroups applies the cgroup restrictions to the process running in the container based // on the container's configuration -func SetupCgroups(container *libcontainer.Config, nspid int) (cgroups.ActiveCgroup, error) { +func SetupCgroups(container *libcontainer.Config, nspid int) (map[string]string, error) { if container.Cgroups != nil { c := container.Cgroups - if systemd.UseSystemd() { return systemd.Apply(c, nspid) } - return fs.Apply(c, nspid) } - - return nil, nil + return map[string]string{}, nil } // InitializeNetworking creates the container's network stack outside of the namespace and moves // interfaces into the container's net namespaces if necessary -func InitializeNetworking(container *libcontainer.Config, nspid int, pipe *syncpipe.SyncPipe, networkState *network.NetworkState) error { +func InitializeNetworking(container *libcontainer.Config, nspid int, networkState *network.NetworkState) error { for _, config := range container.Networks { strategy, err := network.GetStrategy(config.Type) if err != nil { @@ -183,18 +171,5 @@ func InitializeNetworking(container *libcontainer.Config, nspid int, pipe *syncp return err } } - return pipe.SendToChild(networkState) -} - -// GetNamespaceFlags parses the container's Namespaces options to set the correct -// flags on clone, unshare, and setns -func GetNamespaceFlags(namespaces map[string]bool) (flag int) { - for key, enabled := range namespaces { - if enabled { - if ns := GetNamespace(key); ns != nil { - flag |= ns.Value - } - } - } - return flag + return nil } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/execin.go b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/execin.go index 53e676ac..430dc72f 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/execin.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/execin.go @@ -3,6 +3,7 @@ package namespaces import ( + "encoding/json" "fmt" "io" "os" @@ -15,7 +16,6 @@ import ( "github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/label" - "github.com/docker/libcontainer/syncpipe" "github.com/docker/libcontainer/system" ) @@ -41,11 +41,11 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs } } - pipe, err := syncpipe.NewSyncPipe() + parent, child, err := newInitPipe() if err != nil { return -1, err } - defer pipe.Close() + defer parent.Close() // Note: these are only used in non-tty mode // if there is a tty for the container it will be opened within the namespace and the @@ -53,23 +53,28 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs cmd.Stdin = stdin cmd.Stdout = stdout cmd.Stderr = stderr - - cmd.ExtraFiles = []*os.File{pipe.Child()} + cmd.ExtraFiles = []*os.File{child} if err := cmd.Start(); err != nil { + child.Close() return -1, err } - pipe.CloseChild() + child.Close() + + terminate := func(terr error) (int, error) { + // TODO: log the errors for kill and wait + cmd.Process.Kill() + cmd.Wait() + return -1, terr + } // Enter cgroups. if err := EnterCgroups(state, cmd.Process.Pid); err != nil { - return -1, err + return terminate(err) } - if err := pipe.SendToChild(container); err != nil { - cmd.Process.Kill() - cmd.Wait() - return -1, err + if err := json.NewEncoder(parent).Encode(container); err != nil { + return terminate(err) } if startCallback != nil { @@ -81,7 +86,6 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs return -1, err } } - return cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil } @@ -107,7 +111,7 @@ func FinalizeSetns(container *libcontainer.Config, args []string) error { } } - if err := system.Execv(args[0], args[0:], container.Env); err != nil { + if err := system.Execv(args[0], args[0:], os.Environ()); err != nil { return err } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/init.go b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/init.go index 4c2b3327..5c7e1a71 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/init.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/init.go @@ -3,7 +3,9 @@ package namespaces import ( + "encoding/json" "fmt" + "io/ioutil" "os" "strings" "syscall" @@ -17,7 +19,6 @@ import ( "github.com/docker/libcontainer/network" "github.com/docker/libcontainer/security/capabilities" "github.com/docker/libcontainer/security/restrict" - "github.com/docker/libcontainer/syncpipe" "github.com/docker/libcontainer/system" "github.com/docker/libcontainer/user" "github.com/docker/libcontainer/utils" @@ -29,11 +30,22 @@ import ( // and other options required for the new container. // The caller of Init function has to ensure that the go runtime is locked to an OS thread // (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended. -func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syncPipe *syncpipe.SyncPipe, args []string) (err error) { +func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) { defer func() { + // if we have an error during the initialization of the container's init then send it back to the + // parent process in the form of an initError. if err != nil { - syncPipe.ReportChildError(err) + // ensure that any data sent from the parent is consumed so it doesn't + // receive ECONNRESET when the child writes to the pipe. + ioutil.ReadAll(pipe) + if err := json.NewEncoder(pipe).Encode(initError{ + Message: err.Error(), + }); err != nil { + panic(err) + } } + // ensure that this pipe is always closed + pipe.Close() }() rootfs, err := utils.ResolveRootfs(uncleanRootfs) @@ -49,10 +61,13 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn // We always read this as it is a way to sync with the parent as well var networkState *network.NetworkState - if err := syncPipe.ReadFromParent(&networkState); err != nil { + if err := json.NewDecoder(pipe).Decode(&networkState); err != nil { + return err + } + // join any namespaces via a path to the namespace fd if provided + if err := joinExistingNamespaces(container.Namespaces); err != nil { return err } - if consolePath != "" { if err := console.OpenAndDup(consolePath); err != nil { return err @@ -66,6 +81,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn return fmt.Errorf("setctty %s", err) } } + if err := setupNetwork(container, networkState); err != nil { return fmt.Errorf("setup networking %s", err) } @@ -73,6 +89,10 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn return fmt.Errorf("setup route %s", err) } + if err := setupRlimits(container); err != nil { + return fmt.Errorf("setup rlimits %s", err) + } + label.Init() if err := mount.InitializeMountNamespace(rootfs, @@ -84,7 +104,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn if container.Hostname != "" { if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { - return fmt.Errorf("sethostname %s", err) + return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err) } } @@ -151,26 +171,43 @@ func RestoreParentDeathSignal(old int) error { // SetupUser changes the groups, gid, and uid for the user inside the container func SetupUser(u string) error { - uid, gid, suppGids, home, err := user.GetUserGroupSupplementaryHome(u, syscall.Getuid(), syscall.Getgid(), "/") + // Set up defaults. + defaultExecUser := user.ExecUser{ + Uid: syscall.Getuid(), + Gid: syscall.Getgid(), + Home: "/", + } + + passwdFile, err := user.GetPasswdFile() + if err != nil { + return err + } + + groupFile, err := user.GetGroupFile() + if err != nil { + return err + } + + execUser, err := user.GetExecUserFile(u, &defaultExecUser, passwdFile, groupFile) if err != nil { return fmt.Errorf("get supplementary groups %s", err) } - if err := syscall.Setgroups(suppGids); err != nil { + if err := syscall.Setgroups(execUser.Sgids); err != nil { return fmt.Errorf("setgroups %s", err) } - if err := syscall.Setgid(gid); err != nil { + if err := system.Setgid(execUser.Gid); err != nil { return fmt.Errorf("setgid %s", err) } - if err := syscall.Setuid(uid); err != nil { + if err := system.Setuid(execUser.Uid); err != nil { return fmt.Errorf("setuid %s", err) } // if we didn't get HOME already, set it based on the user's HOME if envHome := os.Getenv("HOME"); envHome == "" { - if err := os.Setenv("HOME", home); err != nil { + if err := os.Setenv("HOME", execUser.Home); err != nil { return fmt.Errorf("set HOME %s", err) } } @@ -205,6 +242,16 @@ func setupRoute(container *libcontainer.Config) error { return nil } +func setupRlimits(container *libcontainer.Config) error { + for _, rlimit := range container.Rlimits { + l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft} + if err := syscall.Setrlimit(rlimit.Type, l); err != nil { + return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) + } + } + return nil +} + // FinalizeNamespace drops the caps, sets the correct user // and working dir, and closes any leaky file descriptors // before execing the command inside the namespace @@ -261,3 +308,22 @@ func LoadContainerEnvironment(container *libcontainer.Config) error { } return nil } + +// joinExistingNamespaces gets all the namespace paths specified for the container and +// does a setns on the namespace fd so that the current process joins the namespace. +func joinExistingNamespaces(namespaces []libcontainer.Namespace) error { + for _, ns := range namespaces { + if ns.Path != "" { + f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0) + if err != nil { + return err + } + err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Name])) + f.Close() + if err != nil { + return err + } + } + } + return nil +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/nsenter/nsenter.c b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/nsenter/nsenter.c index 2869dd14..f060f63b 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/nsenter/nsenter.c +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/nsenter/nsenter.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,11 @@ void nsenter() return; } + if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == -1) { + fprintf(stderr, "nsenter: failed to set child subreaper: %s", strerror(errno)); + exit(1); + } + static const struct option longopts[] = { {"nspid", required_argument, NULL, 'n'}, {"console", required_argument, NULL, 't'}, diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types.go b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types.go deleted file mode 100644 index 16ce981e..00000000 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types.go +++ /dev/null @@ -1,50 +0,0 @@ -package namespaces - -import "errors" - -type ( - Namespace struct { - Key string `json:"key,omitempty"` - Value int `json:"value,omitempty"` - File string `json:"file,omitempty"` - } - Namespaces []*Namespace -) - -// namespaceList is used to convert the libcontainer types -// into the names of the files located in /proc//ns/* for -// each namespace -var ( - namespaceList = Namespaces{} - ErrUnkownNamespace = errors.New("Unknown namespace") - ErrUnsupported = errors.New("Unsupported method") -) - -func (ns *Namespace) String() string { - return ns.Key -} - -func GetNamespace(key string) *Namespace { - for _, ns := range namespaceList { - if ns.Key == key { - cpy := *ns - return &cpy - } - } - return nil -} - -// Contains returns true if the specified Namespace is -// in the slice -func (n Namespaces) Contains(ns string) bool { - return n.Get(ns) != nil -} - -func (n Namespaces) Get(ns string) *Namespace { - for _, nsp := range n { - if nsp != nil && nsp.Key == ns { - return nsp - } - } - return nil -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types_linux.go b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types_linux.go deleted file mode 100644 index d3079944..00000000 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types_linux.go +++ /dev/null @@ -1,16 +0,0 @@ -package namespaces - -import ( - "syscall" -) - -func init() { - namespaceList = Namespaces{ - {Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"}, - {Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"}, - {Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"}, - {Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"}, - {Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"}, - {Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"}, - } -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types_test.go deleted file mode 100644 index 4d0a72c9..00000000 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types_test.go +++ /dev/null @@ -1,30 +0,0 @@ -package namespaces - -import ( - "testing" -) - -func TestNamespacesContains(t *testing.T) { - ns := Namespaces{ - GetNamespace("NEWPID"), - GetNamespace("NEWNS"), - GetNamespace("NEWUTS"), - } - - if ns.Contains("NEWNET") { - t.Fatal("namespaces should not contain NEWNET") - } - - if !ns.Contains("NEWPID") { - t.Fatal("namespaces should contain NEWPID but does not") - } - - withNil := Namespaces{ - GetNamespace("UNDEFINED"), // this element will be nil - GetNamespace("NEWPID"), - } - - if !withNil.Contains("NEWPID") { - t.Fatal("namespaces should contain NEWPID but does not") - } -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/utils.go b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/utils.go new file mode 100644 index 00000000..556ea669 --- /dev/null +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/utils.go @@ -0,0 +1,45 @@ +// +build linux + +package namespaces + +import ( + "os" + "syscall" + + "github.com/docker/libcontainer" +) + +type initError struct { + Message string `json:"message,omitempty"` +} + +func (i initError) Error() string { + return i.Message +} + +var namespaceInfo = map[string]int{ + "NEWNET": syscall.CLONE_NEWNET, + "NEWNS": syscall.CLONE_NEWNS, + "NEWUSER": syscall.CLONE_NEWUSER, + "NEWIPC": syscall.CLONE_NEWIPC, + "NEWUTS": syscall.CLONE_NEWUTS, + "NEWPID": syscall.CLONE_NEWPID, +} + +// New returns a newly initialized Pipe for communication between processes +func newInitPipe() (parent *os.File, child *os.File, err error) { + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) + if err != nil { + return nil, nil, err + } + return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil +} + +// GetNamespaceFlags parses the container's Namespaces options to set the correct +// flags on clone, unshare, and setns +func GetNamespaceFlags(namespaces []libcontainer.Namespace) (flag int) { + for _, v := range namespaces { + flag |= namespaceInfo[v.Name] + } + return flag +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux.go b/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux.go index c858b112..1bf70430 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux.go @@ -7,6 +7,7 @@ import ( "math/rand" "net" "os" + "path/filepath" "sync/atomic" "syscall" "unsafe" @@ -575,6 +576,31 @@ func NetworkSetMTU(iface *net.Interface, mtu int) error { return s.HandleAck(wb.Seq) } +// Set link queue length +// This is identical to running: ip link set dev $name txqueuelen $QLEN +func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error { + s, err := getNetlinkSocket() + if err != nil { + return err + } + defer s.Close() + + wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK) + + msg := newIfInfomsg(syscall.AF_UNSPEC) + msg.Type = syscall.RTM_SETLINK + msg.Flags = syscall.NLM_F_REQUEST + msg.Index = int32(iface.Index) + msg.Change = DEFAULT_CHANGE + wb.AddData(msg) + wb.AddData(uint32Attr(syscall.IFLA_TXQLEN, uint32(txQueueLen))) + + if err := s.Send(wb); err != nil { + return err + } + return s.HandleAck(wb.Seq) +} + func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error { s, err := getNetlinkSocket() if err != nil { @@ -768,26 +794,38 @@ func NetworkLinkAddVlan(masterDev, vlanDev string, vlanId uint16) error { return s.HandleAck(wb.Seq) } -// Add MAC VLAN network interface with masterDev as its upper device -// This is identical to running: -// ip link add name $name link $masterdev type macvlan mode $mode -func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error { - s, err := getNetlinkSocket() - if err != nil { - return err - } - defer s.Close() +// MacVlan link has LowerDev, UpperDev and operates in Mode mode +// This simplifies the code when creating MacVlan or MacVtap interface +type MacVlanLink struct { + MasterDev string + SlaveDev string + mode string +} - macVlan := map[string]uint32{ +func (m MacVlanLink) Mode() uint32 { + modeMap := map[string]uint32{ "private": MACVLAN_MODE_PRIVATE, "vepa": MACVLAN_MODE_VEPA, "bridge": MACVLAN_MODE_BRIDGE, "passthru": MACVLAN_MODE_PASSTHRU, } + return modeMap[m.mode] +} + +// Add MAC VLAN network interface with masterDev as its upper device +// This is identical to running: +// ip link add name $name link $masterdev type macvlan mode $mode +func networkLinkMacVlan(dev_type string, mcvln *MacVlanLink) error { + s, err := getNetlinkSocket() + if err != nil { + return err + } + defer s.Close() + wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) - masterDevIfc, err := net.InterfaceByName(masterDev) + masterDevIfc, err := net.InterfaceByName(mcvln.MasterDev) if err != nil { return err } @@ -796,16 +834,16 @@ func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error { wb.AddData(msg) nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil) - newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("macvlan")) + newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated(dev_type)) nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil) macVlanData := make([]byte, 4) - native.PutUint32(macVlanData, macVlan[mode]) + native.PutUint32(macVlanData, mcvln.Mode()) newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData) wb.AddData(nest1) wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index))) - wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(macVlanDev))) + wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(mcvln.SlaveDev))) if err := s.Send(wb); err != nil { return err @@ -813,6 +851,22 @@ func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error { return s.HandleAck(wb.Seq) } +func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error { + return networkLinkMacVlan("macvlan", &MacVlanLink{ + MasterDev: masterDev, + SlaveDev: macVlanDev, + mode: mode, + }) +} + +func NetworkLinkAddMacVtap(masterDev, macVlanDev string, mode string) error { + return networkLinkMacVlan("macvtap", &MacVlanLink{ + MasterDev: masterDev, + SlaveDev: macVlanDev, + mode: mode, + }) +} + func networkLinkIpAction(action, flags int, ifa IfAddr) error { s, err := getNetlinkSocket() if err != nil { @@ -1002,28 +1056,23 @@ func AddRoute(destination, source, gateway, device string) error { } if source != "" { - srcIP, srcNet, err := net.ParseCIDR(source) - if err != nil { - return fmt.Errorf("source CIDR %s couldn't be parsed", source) + srcIP := net.ParseIP(source) + if srcIP == nil { + return fmt.Errorf("source IP %s couldn't be parsed", source) } srcFamily := getIpFamily(srcIP) if currentFamily != -1 && currentFamily != srcFamily { return fmt.Errorf("source and destination ip were not the same IP family") } currentFamily = srcFamily - srcLen, bits := srcNet.Mask.Size() - if srcLen == 0 && bits == 0 { - return fmt.Errorf("source CIDR %s generated a non-canonical Mask", source) - } msg.Family = uint8(srcFamily) - msg.Src_len = uint8(srcLen) var srcData []byte if srcFamily == syscall.AF_INET { srcData = srcIP.To4() } else { srcData = srcIP.To16() } - rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_SRC, srcData)) + rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_PREFSRC, srcData)) } if gateway != "" { @@ -1204,6 +1253,28 @@ func SetMacAddress(name, addr string) error { return nil } +func SetHairpinMode(iface *net.Interface, enabled bool) error { + sysPath := filepath.Join("/sys/class/net", iface.Name, "brport/hairpin_mode") + + sysFile, err := os.OpenFile(sysPath, os.O_WRONLY, 0) + if err != nil { + return err + } + defer sysFile.Close() + + var writeVal []byte + if enabled { + writeVal = []byte("1") + } else { + writeVal = []byte("0") + } + if _, err := sysFile.Write(writeVal); err != nil { + return err + } + + return nil +} + func ChangeName(iface *net.Interface, newName string) error { if len(newName) >= IFNAMSIZ { return fmt.Errorf("Interface name %s too long", newName) @@ -1224,5 +1295,6 @@ func ChangeName(iface *net.Interface, newName string) error { if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 { return errno } + return nil } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux_test.go index 0320c472..3f6511ab 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux_test.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux_test.go @@ -116,7 +116,7 @@ func TestNetworkSetMacAddress(t *testing.T) { ifcBeforeSet := readLink(t, tl.name) if err := NetworkSetMacAddress(ifcBeforeSet, macaddr); err != nil { - t.Fatalf("Could not set %s MAC address on %#v interface: err", macaddr, tl, err) + t.Fatalf("Could not set %s MAC address on %#v interface: %s", macaddr, tl, err) } ifcAfterSet := readLink(t, tl.name) @@ -140,7 +140,7 @@ func TestNetworkSetMTU(t *testing.T) { ifcBeforeSet := readLink(t, tl.name) if err := NetworkSetMTU(ifcBeforeSet, mtu); err != nil { - t.Fatalf("Could not set %d MTU on %#v interface: err", mtu, tl, err) + t.Fatalf("Could not set %d MTU on %#v interface: %s", mtu, tl, err) } ifcAfterSet := readLink(t, tl.name) @@ -248,6 +248,30 @@ func TestNetworkLinkAddMacVlan(t *testing.T) { readLink(t, tl.name) } +func TestNetworkLinkAddMacVtap(t *testing.T) { + if testing.Short() { + return + } + + tl := struct { + name string + mode string + }{ + name: "tstVtap", + mode: "private", + } + masterLink := testLink{"tstEth", "dummy"} + + addLink(t, masterLink.name, masterLink.linkType) + defer deleteLink(t, masterLink.name) + + if err := NetworkLinkAddMacVtap(masterLink.name, tl.name, tl.mode); err != nil { + t.Fatalf("Unable to create %#v MAC VTAP interface: %s", tl, err) + } + + readLink(t, tl.name) +} + func TestAddDelNetworkIp(t *testing.T) { if testing.Short() { return @@ -280,6 +304,34 @@ func TestAddDelNetworkIp(t *testing.T) { } } +func TestAddRouteSourceSelection(t *testing.T) { + tstIp := "127.1.1.1" + tl := testLink{name: "tstEth", linkType: "dummy"} + + addLink(t, tl.name, tl.linkType) + defer deleteLink(t, tl.name) + + ip := net.ParseIP(tstIp) + mask := net.IPv4Mask(255, 255, 255, 255) + ipNet := &net.IPNet{IP: ip, Mask: mask} + + iface, err := net.InterfaceByName(tl.name) + if err != nil { + t.Fatalf("Lost created link %#v", tl) + } + + if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil { + t.Fatalf("Could not add IP address %s to interface %#v: %s", ip.String(), iface, err) + } + + upLink(t, tl.name) + defer downLink(t, tl.name) + + if err := AddRoute("127.0.0.0/8", tstIp, "", tl.name); err != nil { + t.Fatalf("Failed to add route with source address") + } +} + func TestCreateVethPair(t *testing.T) { if testing.Short() { return diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_unsupported.go b/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_unsupported.go index 747cd1d8..4b11bf8b 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_unsupported.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_unsupported.go @@ -47,6 +47,10 @@ func NetworkSetMTU(iface *net.Interface, mtu int) error { return ErrNotImplemented } +func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error { + return ErrNotImplemented +} + func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error { return ErrNotImplemented } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/network/netns.go b/Godeps/_workspace/src/github.com/docker/libcontainer/network/netns.go deleted file mode 100644 index 73cd8de5..00000000 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/network/netns.go +++ /dev/null @@ -1,39 +0,0 @@ -// +build linux - -package network - -import ( - "fmt" - "os" - "syscall" - - "github.com/docker/libcontainer/system" -) - -// crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace -type NetNS struct { -} - -func (v *NetNS) Create(n *Network, nspid int, networkState *NetworkState) error { - networkState.NsPath = n.NsPath - return nil -} - -func (v *NetNS) Initialize(config *Network, networkState *NetworkState) error { - if networkState.NsPath == "" { - return fmt.Errorf("nspath does is not specified in NetworkState") - } - - f, err := os.OpenFile(networkState.NsPath, os.O_RDONLY, 0) - if err != nil { - return fmt.Errorf("failed get network namespace fd: %v", err) - } - - if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil { - f.Close() - return fmt.Errorf("failed to setns current network namespace: %v", err) - } - - f.Close() - return nil -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/network/network.go b/Godeps/_workspace/src/github.com/docker/libcontainer/network/network.go index 2c3499b6..ba8f6f74 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/network/network.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/network/network.go @@ -95,3 +95,11 @@ func SetMtu(name string, mtu int) error { } return netlink.NetworkSetMTU(iface, mtu) } + +func SetHairpinMode(name string, enabled bool) error { + iface, err := net.InterfaceByName(name) + if err != nil { + return err + } + return netlink.SetHairpinMode(iface, enabled) +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/network/strategy.go b/Godeps/_workspace/src/github.com/docker/libcontainer/network/strategy.go index be5ec93b..019fe62f 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/network/strategy.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/network/strategy.go @@ -13,7 +13,6 @@ var ( var strategies = map[string]NetworkStrategy{ "veth": &Veth{}, "loopback": &Loopback{}, - "netns": &NetNS{}, } // NetworkStrategy represents a specific network configuration for diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/network/types.go b/Godeps/_workspace/src/github.com/docker/libcontainer/network/types.go index ea0741be..dcf00420 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/network/types.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/network/types.go @@ -8,9 +8,6 @@ type Network struct { // Type sets the networks type, commonly veth and loopback Type string `json:"type,omitempty"` - // Path to network namespace - NsPath string `json:"ns_path,omitempty"` - // The bridge to use. Bridge string `json:"bridge,omitempty"` @@ -50,6 +47,4 @@ type NetworkState struct { VethHost string `json:"veth_host,omitempty"` // The name of the veth interface created inside the container for the child. VethChild string `json:"veth_child,omitempty"` - // Net namespace path. - NsPath string `json:"ns_path,omitempty"` } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/init.go b/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/init.go index c091ee10..6df9b1d8 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/init.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/init.go @@ -8,7 +8,6 @@ import ( "github.com/codegangsta/cli" "github.com/docker/libcontainer/namespaces" - "github.com/docker/libcontainer/syncpipe" ) var ( @@ -41,12 +40,8 @@ func initAction(context *cli.Context) { log.Fatal(err) } - syncPipe, err := syncpipe.NewSyncPipeFromFd(0, uintptr(pipeFd)) - if err != nil { - log.Fatalf("unable to create sync pipe: %s", err) - } - - if err := namespaces.Init(container, rootfs, console, syncPipe, []string(context.Args())); err != nil { + pipe := os.NewFile(uintptr(pipeFd), "pipe") + if err := namespaces.Init(container, rootfs, console, pipe, []string(context.Args())); err != nil { log.Fatalf("unable to initialize for container: %s", err) } } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/utils.go b/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/utils.go index 7f515594..6a8aafbf 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/utils.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/utils.go @@ -8,7 +8,6 @@ import ( "github.com/codegangsta/cli" "github.com/docker/libcontainer" - "github.com/docker/libcontainer/syncpipe" ) // rFunc is a function registration for calling after an execin @@ -59,16 +58,13 @@ func findUserArgs() []string { // loadConfigFromFd loads a container's config from the sync pipe that is provided by // fd 3 when running a process func loadConfigFromFd() (*libcontainer.Config, error) { - syncPipe, err := syncpipe.NewSyncPipeFromFd(0, 3) - if err != nil { - return nil, err - } + pipe := os.NewFile(3, "pipe") + defer pipe.Close() var config *libcontainer.Config - if err := syncPipe.ReadFromParent(&config); err != nil { + if err := json.NewDecoder(pipe).Decode(&config); err != nil { return nil, err } - return config, nil } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/apparmor.json b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/apparmor.json index f739df10..50421ec8 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/apparmor.json +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/apparmor.json @@ -176,13 +176,13 @@ "TERM=xterm" ], "hostname": "koye", - "namespaces": { - "NEWIPC": true, - "NEWNET": true, - "NEWNS": true, - "NEWPID": true, - "NEWUTS": true - }, + "namespaces": [ + {"name":"NEWIPC"}, + {"name": "NEWNET"}, + {"name": "NEWNS"}, + {"name": "NEWPID"}, + {"name": "NEWUTS"} + ], "networks": [ { "address": "127.0.0.1/0", diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/attach_to_bridge.json b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/attach_to_bridge.json index 0795e6c1..9b190293 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/attach_to_bridge.json +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/attach_to_bridge.json @@ -175,13 +175,13 @@ "TERM=xterm" ], "hostname": "koye", - "namespaces": { - "NEWIPC": true, - "NEWNET": true, - "NEWNS": true, - "NEWPID": true, - "NEWUTS": true - }, + "namespaces": [ + {"name": "NEWIPC"}, + {"name": "NEWNET"}, + {"name": "NEWNS"}, + {"name": "NEWPID"}, + {"name": "NEWUTS"} + ], "networks": [ { "address": "127.0.0.1/0", diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/minimal.json b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/minimal.json index 8d85ddf7..720be64f 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/minimal.json +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/minimal.json @@ -181,13 +181,13 @@ "TERM=xterm" ], "hostname": "koye", - "namespaces": { - "NEWIPC": true, - "NEWNET": true, - "NEWNS": true, - "NEWPID": true, - "NEWUTS": true - }, + "namespaces": [ + {"name": "NEWIPC"}, + {"name": "NEWNET"}, + {"name": "NEWNS"}, + {"name": "NEWPID"}, + {"name": "NEWUTS"} + ], "networks": [ { "address": "127.0.0.1/0", diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/route_source_address_selection.json b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/route_source_address_selection.json new file mode 100644 index 00000000..f403996d --- /dev/null +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/route_source_address_selection.json @@ -0,0 +1,209 @@ +{ + "capabilities": [ + "CHOWN", + "DAC_OVERRIDE", + "FOWNER", + "MKNOD", + "NET_RAW", + "SETGID", + "SETUID", + "SETFCAP", + "SETPCAP", + "NET_BIND_SERVICE", + "SYS_CHROOT", + "KILL" + ], + "cgroups": { + "allowed_devices": [ + { + "cgroup_permissions": "m", + "major_number": -1, + "minor_number": -1, + "type": 99 + }, + { + "cgroup_permissions": "m", + "major_number": -1, + "minor_number": -1, + "type": 98 + }, + { + "cgroup_permissions": "rwm", + "major_number": 5, + "minor_number": 1, + "path": "/dev/console", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 4, + "path": "/dev/tty0", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 4, + "minor_number": 1, + "path": "/dev/tty1", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 136, + "minor_number": -1, + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 5, + "minor_number": 2, + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "major_number": 10, + "minor_number": 200, + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 3, + "path": "/dev/null", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 5, + "path": "/dev/zero", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 7, + "path": "/dev/full", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 5, + "path": "/dev/tty", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 9, + "path": "/dev/urandom", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 8, + "path": "/dev/random", + "type": 99 + } + ], + "name": "docker-koye", + "parent": "docker" + }, + "restrict_sys": true, + "mount_config": { + "device_nodes": [ + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 3, + "path": "/dev/null", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 5, + "path": "/dev/zero", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 7, + "path": "/dev/full", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 5, + "path": "/dev/tty", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 9, + "path": "/dev/urandom", + "type": 99 + }, + { + "cgroup_permissions": "rwm", + "file_mode": 438, + "major_number": 1, + "minor_number": 8, + "path": "/dev/random", + "type": 99 + } + ] + }, + "environment": [ + "HOME=/", + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "HOSTNAME=koye", + "TERM=xterm" + ], + "hostname": "koye", + "namespaces": [ + {"name": "NEWIPC"}, + {"name": "NEWNET"}, + {"name": "NEWNS"}, + {"name": "NEWPID"}, + {"name": "NEWUTS"} + ], + "networks": [ + { + "address": "127.0.0.1/0", + "gateway": "localhost", + "mtu": 1500, + "type": "loopback" + }, + { + "address": "172.17.0.101/16", + "bridge": "docker0", + "veth_prefix": "veth", + "mtu": 1500, + "type": "veth" + } + ], + "routes": [ + { + "destination": "0.0.0.0/0", + "source": "172.17.0.101", + "gateway": "172.17.42.1", + "interface_name": "eth0" + } + ], + "tty": true +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/selinux.json b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/selinux.json index ce383e2c..cfb83e09 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/selinux.json +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/selinux.json @@ -177,13 +177,13 @@ "TERM=xterm" ], "hostname": "koye", - "namespaces": { - "NEWIPC": true, - "NEWNET": true, - "NEWNS": true, - "NEWPID": true, - "NEWUTS": true - }, + "namespaces": [ + {"name": "NEWIPC"}, + {"name": "NEWNET"}, + {"name": "NEWNS"}, + {"name": "NEWPID"}, + {"name": "NEWUTS"} + ], "networks": [ { "address": "127.0.0.1/0", diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux.go b/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux.go index e0c90ee5..e5bd8209 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux.go @@ -434,3 +434,28 @@ func Chcon(fpath string, scon string, recurse bool) error { return Setfilecon(fpath, scon) } + +// DupSecOpt takes an SELinux process label and returns security options that +// can will set the SELinux Type and Level for future container processes +func DupSecOpt(src string) []string { + if src == "" { + return nil + } + con := NewContext(src) + if con["user"] == "" || + con["role"] == "" || + con["type"] == "" || + con["level"] == "" { + return nil + } + return []string{"label:user:" + con["user"], + "label:role:" + con["role"], + "label:type:" + con["type"], + "label:level:" + con["level"]} +} + +// DisableSecOpt returns a security opt that can be used to disabling SELinux +// labeling support for future container processes +func DisableSecOpt() []string { + return []string{"label:disable"} +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux_test.go index 34c34974..228ad836 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux_test.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux_test.go @@ -42,7 +42,7 @@ func TestSELinux(t *testing.T) { t.Log("getenforce ", selinux.SelinuxGetEnforce()) t.Log("getenforcemode ", selinux.SelinuxGetEnforceMode()) pid := os.Getpid() - t.Log("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023)) + t.Logf("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023)) err = selinux.Setfscreatecon("unconfined_u:unconfined_r:unconfined_t:s0") if err == nil { t.Log(selinux.Getfscreatecon()) diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe.go b/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe.go deleted file mode 100644 index f73c354d..00000000 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe.go +++ /dev/null @@ -1,105 +0,0 @@ -package syncpipe - -import ( - "encoding/json" - "fmt" - "io/ioutil" - "os" - "syscall" -) - -// SyncPipe allows communication to and from the child processes -// to it's parent and allows the two independent processes to -// syncronize their state. -type SyncPipe struct { - parent, child *os.File -} - -func NewSyncPipeFromFd(parentFd, childFd uintptr) (*SyncPipe, error) { - s := &SyncPipe{} - - if parentFd > 0 { - s.parent = os.NewFile(parentFd, "parentPipe") - } else if childFd > 0 { - s.child = os.NewFile(childFd, "childPipe") - } else { - return nil, fmt.Errorf("no valid sync pipe fd specified") - } - - return s, nil -} - -func (s *SyncPipe) Child() *os.File { - return s.child -} - -func (s *SyncPipe) Parent() *os.File { - return s.parent -} - -func (s *SyncPipe) SendToChild(v interface{}) error { - data, err := json.Marshal(v) - if err != nil { - return err - } - - s.parent.Write(data) - - return syscall.Shutdown(int(s.parent.Fd()), syscall.SHUT_WR) -} - -func (s *SyncPipe) ReadFromChild() error { - data, err := ioutil.ReadAll(s.parent) - if err != nil { - return err - } - - if len(data) > 0 { - return fmt.Errorf("%s", data) - } - - return nil -} - -func (s *SyncPipe) ReadFromParent(v interface{}) error { - data, err := ioutil.ReadAll(s.child) - if err != nil { - return fmt.Errorf("error reading from sync pipe %s", err) - } - - if len(data) > 0 { - if err := json.Unmarshal(data, v); err != nil { - return err - } - } - - return nil -} - -func (s *SyncPipe) ReportChildError(err error) { - // ensure that any data sent from the parent is consumed so it doesn't - // receive ECONNRESET when the child writes to the pipe. - ioutil.ReadAll(s.child) - - s.child.Write([]byte(err.Error())) - s.CloseChild() -} - -func (s *SyncPipe) Close() error { - if s.parent != nil { - s.parent.Close() - } - - if s.child != nil { - s.child.Close() - } - - return nil -} - -func (s *SyncPipe) CloseChild() { - if s.child != nil { - s.child.Close() - s.child = nil - } -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe_linux.go b/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe_linux.go deleted file mode 100644 index bea4b52f..00000000 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe_linux.go +++ /dev/null @@ -1,20 +0,0 @@ -package syncpipe - -import ( - "os" - "syscall" -) - -func NewSyncPipe() (s *SyncPipe, err error) { - s = &SyncPipe{} - - fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0) - if err != nil { - return nil, err - } - - s.child = os.NewFile(uintptr(fds[0]), "child syncpipe") - s.parent = os.NewFile(uintptr(fds[1]), "parent syncpipe") - - return s, nil -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe_test.go deleted file mode 100644 index 906e6ed2..00000000 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe_test.go +++ /dev/null @@ -1,72 +0,0 @@ -package syncpipe - -import ( - "fmt" - "syscall" - "testing" -) - -type testStruct struct { - Name string -} - -func TestSendErrorFromChild(t *testing.T) { - pipe, err := NewSyncPipe() - if err != nil { - t.Fatal(err) - } - defer func() { - if err := pipe.Close(); err != nil { - t.Fatal(err) - } - }() - - childfd, err := syscall.Dup(int(pipe.Child().Fd())) - if err != nil { - t.Fatal(err) - } - childPipe, _ := NewSyncPipeFromFd(0, uintptr(childfd)) - - pipe.CloseChild() - pipe.SendToChild(nil) - - expected := "something bad happened" - childPipe.ReportChildError(fmt.Errorf(expected)) - - childError := pipe.ReadFromChild() - if childError == nil { - t.Fatal("expected an error to be returned but did not receive anything") - } - - if childError.Error() != expected { - t.Fatalf("expected %q but received error message %q", expected, childError.Error()) - } -} - -func TestSendPayloadToChild(t *testing.T) { - pipe, err := NewSyncPipe() - if err != nil { - t.Fatal(err) - } - - defer func() { - if err := pipe.Close(); err != nil { - t.Fatal(err) - } - }() - - expected := "libcontainer" - - if err := pipe.SendToChild(testStruct{Name: expected}); err != nil { - t.Fatal(err) - } - - var s *testStruct - if err := pipe.ReadFromParent(&s); err != nil { - t.Fatal(err) - } - - if s.Name != expected { - t.Fatalf("expected name %q but received %q", expected, s.Name) - } -} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/system/setns_linux.go b/Godeps/_workspace/src/github.com/docker/libcontainer/system/setns_linux.go index 32821ee2..228e6ccd 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/system/setns_linux.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/system/setns_linux.go @@ -11,9 +11,12 @@ import ( // We need different setns values for the different platforms and arch // We are declaring the macro here because the SETNS syscall does not exist in th stdlib var setNsMap = map[string]uintptr{ - "linux/386": 346, - "linux/amd64": 308, - "linux/arm": 374, + "linux/386": 346, + "linux/amd64": 308, + "linux/arm": 374, + "linux/ppc64": 350, + "linux/ppc64le": 350, + "linux/s390x": 339, } func Setns(fd uintptr, flags uintptr) error { diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_amd64.go b/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_64.go similarity index 88% rename from Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_amd64.go rename to Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_64.go index 0a346c3b..6840c377 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_amd64.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_64.go @@ -1,4 +1,5 @@ -// +build linux,amd64 +// +build linux,amd64 linux,ppc64 linux,ppc64le linux,s390x + package system import ( diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_arm.go b/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_arm.go index faf17995..7d8cda9d 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_arm.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_arm.go @@ -7,7 +7,7 @@ import ( // Setuid sets the uid of the calling thread to the specified uid. func Setuid(uid int) (err error) { - _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0) + _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0) if e1 != 0 { err = e1 } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup.go b/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup.go new file mode 100644 index 00000000..6f8a982f --- /dev/null +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup.go @@ -0,0 +1,108 @@ +package user + +import ( + "errors" + "fmt" + "syscall" +) + +var ( + // The current operating system does not provide the required data for user lookups. + ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data") +) + +func lookupUser(filter func(u User) bool) (User, error) { + // Get operating system-specific passwd reader-closer. + passwd, err := GetPasswd() + if err != nil { + return User{}, err + } + defer passwd.Close() + + // Get the users. + users, err := ParsePasswdFilter(passwd, filter) + if err != nil { + return User{}, err + } + + // No user entries found. + if len(users) == 0 { + return User{}, fmt.Errorf("no matching entries in passwd file") + } + + // Assume the first entry is the "correct" one. + return users[0], nil +} + +// CurrentUser looks up the current user by their user id in /etc/passwd. If the +// user cannot be found (or there is no /etc/passwd file on the filesystem), +// then CurrentUser returns an error. +func CurrentUser() (User, error) { + return LookupUid(syscall.Getuid()) +} + +// LookupUser looks up a user by their username in /etc/passwd. If the user +// cannot be found (or there is no /etc/passwd file on the filesystem), then +// LookupUser returns an error. +func LookupUser(username string) (User, error) { + return lookupUser(func(u User) bool { + return u.Name == username + }) +} + +// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot +// be found (or there is no /etc/passwd file on the filesystem), then LookupId +// returns an error. +func LookupUid(uid int) (User, error) { + return lookupUser(func(u User) bool { + return u.Uid == uid + }) +} + +func lookupGroup(filter func(g Group) bool) (Group, error) { + // Get operating system-specific group reader-closer. + group, err := GetGroup() + if err != nil { + return Group{}, err + } + defer group.Close() + + // Get the users. + groups, err := ParseGroupFilter(group, filter) + if err != nil { + return Group{}, err + } + + // No user entries found. + if len(groups) == 0 { + return Group{}, fmt.Errorf("no matching entries in group file") + } + + // Assume the first entry is the "correct" one. + return groups[0], nil +} + +// CurrentGroup looks up the current user's group by their primary group id's +// entry in /etc/passwd. If the group cannot be found (or there is no +// /etc/group file on the filesystem), then CurrentGroup returns an error. +func CurrentGroup() (Group, error) { + return LookupGid(syscall.Getgid()) +} + +// LookupGroup looks up a group by its name in /etc/group. If the group cannot +// be found (or there is no /etc/group file on the filesystem), then LookupGroup +// returns an error. +func LookupGroup(groupname string) (Group, error) { + return lookupGroup(func(g Group) bool { + return g.Name == groupname + }) +} + +// LookupGid looks up a group by its group id in /etc/group. If the group cannot +// be found (or there is no /etc/group file on the filesystem), then LookupGid +// returns an error. +func LookupGid(gid int) (Group, error) { + return lookupGroup(func(g Group) bool { + return g.Gid == gid + }) +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup_unix.go b/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup_unix.go new file mode 100644 index 00000000..409c114e --- /dev/null +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup_unix.go @@ -0,0 +1,30 @@ +// +build darwin dragonfly freebsd linux netbsd openbsd solaris + +package user + +import ( + "io" + "os" +) + +// Unix-specific path to the passwd and group formatted files. +const ( + unixPasswdFile = "/etc/passwd" + unixGroupFile = "/etc/group" +) + +func GetPasswdFile() (string, error) { + return unixPasswdFile, nil +} + +func GetPasswd() (io.ReadCloser, error) { + return os.Open(unixPasswdFile) +} + +func GetGroupFile() (string, error) { + return unixGroupFile, nil +} + +func GetGroup() (io.ReadCloser, error) { + return os.Open(unixGroupFile) +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup_unsupported.go b/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup_unsupported.go new file mode 100644 index 00000000..0f15c57d --- /dev/null +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup_unsupported.go @@ -0,0 +1,21 @@ +// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris + +package user + +import "io" + +func GetPasswdFile() (string, error) { + return "", ErrUnsupported +} + +func GetPasswd() (io.ReadCloser, error) { + return nil, ErrUnsupported +} + +func GetGroupFile() (string, error) { + return "", ErrUnsupported +} + +func GetGroup() (io.ReadCloser, error) { + return nil, ErrUnsupported +} diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/user/user.go b/Godeps/_workspace/src/github.com/docker/libcontainer/user/user.go index 493dd86f..69387f2e 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/user/user.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/user/user.go @@ -69,23 +69,36 @@ func parseLine(line string, v ...interface{}) { } } -func ParsePasswd() ([]*User, error) { - return ParsePasswdFilter(nil) -} - -func ParsePasswdFilter(filter func(*User) bool) ([]*User, error) { - f, err := os.Open("/etc/passwd") +func ParsePasswdFile(path string) ([]User, error) { + passwd, err := os.Open(path) if err != nil { return nil, err } - defer f.Close() - return parsePasswdFile(f, filter) + defer passwd.Close() + return ParsePasswd(passwd) } -func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) { +func ParsePasswd(passwd io.Reader) ([]User, error) { + return ParsePasswdFilter(passwd, nil) +} + +func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) { + passwd, err := os.Open(path) + if err != nil { + return nil, err + } + defer passwd.Close() + return ParsePasswdFilter(passwd, filter) +} + +func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) { + if r == nil { + return nil, fmt.Errorf("nil source for passwd-formatted data") + } + var ( s = bufio.NewScanner(r) - out = []*User{} + out = []User{} ) for s.Scan() { @@ -103,7 +116,7 @@ func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) { // Name:Pass:Uid:Gid:Gecos:Home:Shell // root:x:0:0:root:/root:/bin/bash // adm:x:3:4:adm:/var/adm:/bin/false - p := &User{} + p := User{} parseLine( text, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell, @@ -117,23 +130,36 @@ func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) { return out, nil } -func ParseGroup() ([]*Group, error) { - return ParseGroupFilter(nil) -} - -func ParseGroupFilter(filter func(*Group) bool) ([]*Group, error) { - f, err := os.Open("/etc/group") +func ParseGroupFile(path string) ([]Group, error) { + group, err := os.Open(path) if err != nil { return nil, err } - defer f.Close() - return parseGroupFile(f, filter) + defer group.Close() + return ParseGroup(group) } -func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) { +func ParseGroup(group io.Reader) ([]Group, error) { + return ParseGroupFilter(group, nil) +} + +func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) { + group, err := os.Open(path) + if err != nil { + return nil, err + } + defer group.Close() + return ParseGroupFilter(group, filter) +} + +func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) { + if r == nil { + return nil, fmt.Errorf("nil source for group-formatted data") + } + var ( s = bufio.NewScanner(r) - out = []*Group{} + out = []Group{} ) for s.Scan() { @@ -151,7 +177,7 @@ func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) { // Name:Pass:Gid:List // root:x:0:root // adm:x:4:root,adm,daemon - p := &Group{} + p := Group{} parseLine( text, &p.Name, &p.Pass, &p.Gid, &p.List, @@ -165,94 +191,160 @@ func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) { return out, nil } -// Given a string like "user", "1000", "user:group", "1000:1000", returns the uid, gid, list of supplementary group IDs, and home directory, if available and/or applicable. -func GetUserGroupSupplementaryHome(userSpec string, defaultUid, defaultGid int, defaultHome string) (int, int, []int, string, error) { - var ( - uid = defaultUid - gid = defaultGid - suppGids = []int{} - home = defaultHome +type ExecUser struct { + Uid, Gid int + Sgids []int + Home string +} +// GetExecUserFile is a wrapper for GetExecUser. It reads data from each of the +// given file paths and uses that data as the arguments to GetExecUser. If the +// files cannot be opened for any reason, the error is ignored and a nil +// io.Reader is passed instead. +func GetExecUserFile(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) { + passwd, err := os.Open(passwdPath) + if err != nil { + passwd = nil + } else { + defer passwd.Close() + } + + group, err := os.Open(groupPath) + if err != nil { + group = nil + } else { + defer group.Close() + } + + return GetExecUser(userSpec, defaults, passwd, group) +} + +// GetExecUser parses a user specification string (using the passwd and group +// readers as sources for /etc/passwd and /etc/group data, respectively). In +// the case of blank fields or missing data from the sources, the values in +// defaults is used. +// +// GetExecUser will return an error if a user or group literal could not be +// found in any entry in passwd and group respectively. +// +// Examples of valid user specifications are: +// * "" +// * "user" +// * "uid" +// * "user:group" +// * "uid:gid +// * "user:gid" +// * "uid:group" +func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) { + var ( userArg, groupArg string + name string ) + if defaults == nil { + defaults = new(ExecUser) + } + + // Copy over defaults. + user := &ExecUser{ + Uid: defaults.Uid, + Gid: defaults.Gid, + Sgids: defaults.Sgids, + Home: defaults.Home, + } + + // Sgids slice *cannot* be nil. + if user.Sgids == nil { + user.Sgids = []int{} + } + // allow for userArg to have either "user" syntax, or optionally "user:group" syntax parseLine(userSpec, &userArg, &groupArg) - users, err := ParsePasswdFilter(func(u *User) bool { + users, err := ParsePasswdFilter(passwd, func(u User) bool { if userArg == "" { - return u.Uid == uid + return u.Uid == user.Uid } return u.Name == userArg || strconv.Itoa(u.Uid) == userArg }) - if err != nil && !os.IsNotExist(err) { + if err != nil && passwd != nil { if userArg == "" { - userArg = strconv.Itoa(uid) + userArg = strconv.Itoa(user.Uid) } - return 0, 0, nil, "", fmt.Errorf("Unable to find user %v: %v", userArg, err) + return nil, fmt.Errorf("Unable to find user %v: %v", userArg, err) } haveUser := users != nil && len(users) > 0 if haveUser { // if we found any user entries that matched our filter, let's take the first one as "correct" - uid = users[0].Uid - gid = users[0].Gid - home = users[0].Home + name = users[0].Name + user.Uid = users[0].Uid + user.Gid = users[0].Gid + user.Home = users[0].Home } else if userArg != "" { // we asked for a user but didn't find them... let's check to see if we wanted a numeric user - uid, err = strconv.Atoi(userArg) + user.Uid, err = strconv.Atoi(userArg) if err != nil { // not numeric - we have to bail - return 0, 0, nil, "", fmt.Errorf("Unable to find user %v", userArg) + return nil, fmt.Errorf("Unable to find user %v", userArg) } - if uid < minId || uid > maxId { - return 0, 0, nil, "", ErrRange + + // Must be inside valid uid range. + if user.Uid < minId || user.Uid > maxId { + return nil, ErrRange } // if userArg couldn't be found in /etc/passwd but is numeric, just roll with it - this is legit } - if groupArg != "" || (haveUser && users[0].Name != "") { - groups, err := ParseGroupFilter(func(g *Group) bool { + if groupArg != "" || name != "" { + groups, err := ParseGroupFilter(group, func(g Group) bool { + // Explicit group format takes precedence. if groupArg != "" { return g.Name == groupArg || strconv.Itoa(g.Gid) == groupArg } + + // Check if user is a member. for _, u := range g.List { - if u == users[0].Name { + if u == name { return true } } + return false }) - if err != nil && !os.IsNotExist(err) { - return 0, 0, nil, "", fmt.Errorf("Unable to find groups for user %v: %v", users[0].Name, err) + if err != nil && group != nil { + return nil, fmt.Errorf("Unable to find groups for user %v: %v", users[0].Name, err) } haveGroup := groups != nil && len(groups) > 0 if groupArg != "" { if haveGroup { // if we found any group entries that matched our filter, let's take the first one as "correct" - gid = groups[0].Gid + user.Gid = groups[0].Gid } else { // we asked for a group but didn't find id... let's check to see if we wanted a numeric group - gid, err = strconv.Atoi(groupArg) + user.Gid, err = strconv.Atoi(groupArg) if err != nil { // not numeric - we have to bail - return 0, 0, nil, "", fmt.Errorf("Unable to find group %v", groupArg) + return nil, fmt.Errorf("Unable to find group %v", groupArg) } - if gid < minId || gid > maxId { - return 0, 0, nil, "", ErrRange + + // Ensure gid is inside gid range. + if user.Gid < minId || user.Gid > maxId { + return nil, ErrRange } // if groupArg couldn't be found in /etc/group but is numeric, just roll with it - this is legit } } else if haveGroup { - suppGids = make([]int, len(groups)) + // If implicit group format, fill supplementary gids. + user.Sgids = make([]int, len(groups)) for i, group := range groups { - suppGids[i] = group.Gid + user.Sgids[i] = group.Gid } } } - return uid, gid, suppGids, home, nil + return user, nil } diff --git a/Godeps/_workspace/src/github.com/docker/libcontainer/user/user_test.go b/Godeps/_workspace/src/github.com/docker/libcontainer/user/user_test.go index 136632c2..4fe008fb 100644 --- a/Godeps/_workspace/src/github.com/docker/libcontainer/user/user_test.go +++ b/Godeps/_workspace/src/github.com/docker/libcontainer/user/user_test.go @@ -1,6 +1,8 @@ package user import ( + "io" + "reflect" "strings" "testing" ) @@ -54,7 +56,7 @@ func TestUserParseLine(t *testing.T) { } func TestUserParsePasswd(t *testing.T) { - users, err := parsePasswdFile(strings.NewReader(` + users, err := ParsePasswdFilter(strings.NewReader(` root:x:0:0:root:/root:/bin/bash adm:x:3:4:adm:/var/adm:/bin/false this is just some garbage data @@ -74,7 +76,7 @@ this is just some garbage data } func TestUserParseGroup(t *testing.T) { - groups, err := parseGroupFile(strings.NewReader(` + groups, err := ParseGroupFilter(strings.NewReader(` root:x:0:root adm:x:4:root,adm,daemon this is just some garbage data @@ -92,3 +94,259 @@ this is just some garbage data t.Fatalf("Expected groups[1] to be 4 - adm - 3 members, got %v - %v - %v", groups[1].Gid, groups[1].Name, len(groups[1].List)) } } + +func TestValidGetExecUser(t *testing.T) { + const passwdContent = ` +root:x:0:0:root user:/root:/bin/bash +adm:x:42:43:adm:/var/adm:/bin/false +this is just some garbage data +` + const groupContent = ` +root:x:0:root +adm:x:43: +grp:x:1234:root,adm +this is just some garbage data +` + defaultExecUser := ExecUser{ + Uid: 8888, + Gid: 8888, + Sgids: []int{8888}, + Home: "/8888", + } + + tests := []struct { + ref string + expected ExecUser + }{ + { + ref: "root", + expected: ExecUser{ + Uid: 0, + Gid: 0, + Sgids: []int{0, 1234}, + Home: "/root", + }, + }, + { + ref: "adm", + expected: ExecUser{ + Uid: 42, + Gid: 43, + Sgids: []int{1234}, + Home: "/var/adm", + }, + }, + { + ref: "root:adm", + expected: ExecUser{ + Uid: 0, + Gid: 43, + Sgids: defaultExecUser.Sgids, + Home: "/root", + }, + }, + { + ref: "adm:1234", + expected: ExecUser{ + Uid: 42, + Gid: 1234, + Sgids: defaultExecUser.Sgids, + Home: "/var/adm", + }, + }, + { + ref: "42:1234", + expected: ExecUser{ + Uid: 42, + Gid: 1234, + Sgids: defaultExecUser.Sgids, + Home: "/var/adm", + }, + }, + { + ref: "1337:1234", + expected: ExecUser{ + Uid: 1337, + Gid: 1234, + Sgids: defaultExecUser.Sgids, + Home: defaultExecUser.Home, + }, + }, + { + ref: "1337", + expected: ExecUser{ + Uid: 1337, + Gid: defaultExecUser.Gid, + Sgids: defaultExecUser.Sgids, + Home: defaultExecUser.Home, + }, + }, + { + ref: "", + expected: ExecUser{ + Uid: defaultExecUser.Uid, + Gid: defaultExecUser.Gid, + Sgids: defaultExecUser.Sgids, + Home: defaultExecUser.Home, + }, + }, + } + + for _, test := range tests { + passwd := strings.NewReader(passwdContent) + group := strings.NewReader(groupContent) + + execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group) + if err != nil { + t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error()) + t.Fail() + continue + } + + if !reflect.DeepEqual(test.expected, *execUser) { + t.Logf("got: %#v", execUser) + t.Logf("expected: %#v", test.expected) + t.Fail() + continue + } + } +} + +func TestInvalidGetExecUser(t *testing.T) { + const passwdContent = ` +root:x:0:0:root user:/root:/bin/bash +adm:x:42:43:adm:/var/adm:/bin/false +this is just some garbage data +` + const groupContent = ` +root:x:0:root +adm:x:43: +grp:x:1234:root,adm +this is just some garbage data +` + + tests := []string{ + // No such user/group. + "notuser", + "notuser:notgroup", + "root:notgroup", + "notuser:adm", + "8888:notgroup", + "notuser:8888", + + // Invalid user/group values. + "-1:0", + "0:-3", + "-5:-2", + } + + for _, test := range tests { + passwd := strings.NewReader(passwdContent) + group := strings.NewReader(groupContent) + + execUser, err := GetExecUser(test, nil, passwd, group) + if err == nil { + t.Logf("got unexpected success when parsing '%s': %#v", test, execUser) + t.Fail() + continue + } + } +} + +func TestGetExecUserNilSources(t *testing.T) { + const passwdContent = ` +root:x:0:0:root user:/root:/bin/bash +adm:x:42:43:adm:/var/adm:/bin/false +this is just some garbage data +` + const groupContent = ` +root:x:0:root +adm:x:43: +grp:x:1234:root,adm +this is just some garbage data +` + + defaultExecUser := ExecUser{ + Uid: 8888, + Gid: 8888, + Sgids: []int{8888}, + Home: "/8888", + } + + tests := []struct { + ref string + passwd, group bool + expected ExecUser + }{ + { + ref: "", + passwd: false, + group: false, + expected: ExecUser{ + Uid: 8888, + Gid: 8888, + Sgids: []int{8888}, + Home: "/8888", + }, + }, + { + ref: "root", + passwd: true, + group: false, + expected: ExecUser{ + Uid: 0, + Gid: 0, + Sgids: []int{8888}, + Home: "/root", + }, + }, + { + ref: "0", + passwd: false, + group: false, + expected: ExecUser{ + Uid: 0, + Gid: 8888, + Sgids: []int{8888}, + Home: "/8888", + }, + }, + { + ref: "0:0", + passwd: false, + group: false, + expected: ExecUser{ + Uid: 0, + Gid: 0, + Sgids: []int{8888}, + Home: "/8888", + }, + }, + } + + for _, test := range tests { + var passwd, group io.Reader + + if test.passwd { + passwd = strings.NewReader(passwdContent) + } + + if test.group { + group = strings.NewReader(groupContent) + } + + execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group) + if err != nil { + t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error()) + t.Fail() + continue + } + + if !reflect.DeepEqual(test.expected, *execUser) { + t.Logf("got: %#v", execUser) + t.Logf("expected: %#v", test.expected) + t.Fail() + continue + } + } +} diff --git a/container/docker/factory.go b/container/docker/factory.go index 383d32b5..07b414bc 100644 --- a/container/docker/factory.go +++ b/container/docker/factory.go @@ -27,6 +27,7 @@ import ( "github.com/fsouza/go-dockerclient" "github.com/golang/glog" "github.com/google/cadvisor/container" + "github.com/google/cadvisor/container/libcontainer" "github.com/google/cadvisor/info" "github.com/google/cadvisor/utils" ) @@ -68,6 +69,9 @@ type dockerFactory struct { usesAufsDriver bool client *docker.Client + + // Information about the mounted cgroup subsystems. + cgroupSubsystems libcontainer.CgroupSubsystems } func (self *dockerFactory) String() string { @@ -85,6 +89,7 @@ func (self *dockerFactory) NewContainerHandler(name string) (handler container.C self.machineInfoFactory, *dockerRootDir, self.usesAufsDriver, + &self.cgroupSubsystems, ) return } @@ -218,11 +223,17 @@ func Register(factory info.MachineInfoFactory) error { glog.Infof("System is using systemd") } + cgroupSubsystems, err := libcontainer.GetCgroupSubsystems() + if err != nil { + return fmt.Errorf("failed to get cgroup subsystems: %v", err) + } + glog.Infof("Registering Docker factory") f := &dockerFactory{ machineInfoFactory: factory, client: client, usesAufsDriver: usesAufsDriver, + cgroupSubsystems: cgroupSubsystems, } container.RegisterContainerHandlerFactory(f) return nil diff --git a/container/docker/handler.go b/container/docker/handler.go index 1a94be0d..7cc49a68 100644 --- a/container/docker/handler.go +++ b/container/docker/handler.go @@ -59,6 +59,10 @@ type dockerContainerHandler struct { // Path to the libcontainer pid file. libcontainerPidPath string + // Absolute path to the cgroup hierarchies of this container. + // (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test") + cgroupPaths map[string]string + cgroup cgroups.Cgroup usesAufsDriver bool fsInfo fs.FsInfo @@ -71,11 +75,19 @@ func newDockerContainerHandler( machineInfoFactory info.MachineInfoFactory, dockerRootDir string, usesAufsDriver bool, + cgroupSubsystems *containerLibcontainer.CgroupSubsystems, ) (container.ContainerHandler, error) { fsInfo, err := fs.NewFsInfo() if err != nil { return nil, err } + + // Create the cgroup paths. + cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints)) + for key, val := range cgroupSubsystems.MountPoints { + cgroupPaths[key] = path.Join(val, name) + } + id := ContainerNameToDockerId(name) handler := &dockerContainerHandler{ id: id, @@ -85,6 +97,7 @@ func newDockerContainerHandler( libcontainerConfigPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "container.json"), libcontainerStatePath: path.Join(dockerRootDir, pathToLibcontainerState, id, "state.json"), libcontainerPidPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "pid"), + cgroupPaths: cgroupPaths, cgroup: cgroups.Cgroup{ Parent: "/", Name: name, @@ -159,6 +172,11 @@ func (self *dockerContainerHandler) readLibcontainerState() (state *libcontainer } state = retState + // Create cgroup paths if they don't exist. This is since older Docker clients don't write it. + if len(state.CgroupPaths) == 0 { + state.CgroupPaths = self.cgroupPaths + } + return } @@ -259,7 +277,7 @@ func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err return } - stats, err = containerLibcontainer.GetStats(&self.cgroup, state) + stats, err = containerLibcontainer.GetStats(state) if err != nil { return } diff --git a/container/libcontainer/helpers.go b/container/libcontainer/helpers.go index 77ed030b..742f640f 100644 --- a/container/libcontainer/helpers.go +++ b/container/libcontainer/helpers.go @@ -15,6 +15,7 @@ package libcontainer import ( + "fmt" "time" "github.com/docker/libcontainer" @@ -24,13 +25,60 @@ import ( "github.com/google/cadvisor/info" ) +type CgroupSubsystems struct { + // Cgroup subsystem mounts. + // e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"] + Mounts []cgroups.Mount + + // Cgroup subsystem to their mount location. + // e.g.: "cpu" -> "/sys/fs/cgroup/cpu" + MountPoints map[string]string +} + +// Get information about the cgroup subsystems. +func GetCgroupSubsystems() (CgroupSubsystems, error) { + // Get all cgroup mounts. + allCgroups, err := cgroups.GetCgroupMounts() + if err != nil { + return CgroupSubsystems{}, err + } + if len(allCgroups) == 0 { + return CgroupSubsystems{}, fmt.Errorf("failed to find cgroup mounts") + } + + // Trim the mounts to only the subsystems we care about. + supportedCgroups := make([]cgroups.Mount, 0, len(allCgroups)) + mountPoints := make(map[string]string, len(allCgroups)) + for _, mount := range allCgroups { + for _, subsystem := range mount.Subsystems { + if _, ok := supportedSubsystems[subsystem]; ok { + supportedCgroups = append(supportedCgroups, mount) + mountPoints[subsystem] = mount.Mountpoint + } + } + } + + return CgroupSubsystems{ + Mounts: supportedCgroups, + MountPoints: mountPoints, + }, nil +} + +// Cgroup subsystems we support listing (should be the minimal set we need stats from). +var supportedSubsystems map[string]struct{} = map[string]struct{}{ + "cpu": {}, + "cpuacct": {}, + "memory": {}, + "cpuset": {}, +} + // Get stats of the specified container -func GetStats(cgroup *cgroups.Cgroup, state *libcontainer.State) (*info.ContainerStats, error) { +func GetStats(state *libcontainer.State) (*info.ContainerStats, error) { // TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready. stats := &libcontainer.ContainerStats{} var err error - stats.CgroupStats, err = cgroupfs.GetStats(cgroup) + stats.CgroupStats, err = cgroupfs.GetStats(state.CgroupPaths) if err != nil { return &info.ContainerStats{}, err } @@ -43,14 +91,6 @@ func GetStats(cgroup *cgroups.Cgroup, state *libcontainer.State) (*info.Containe return toContainerStats(stats), nil } -func GetStatsCgroupOnly(cgroup *cgroups.Cgroup) (*info.ContainerStats, error) { - s, err := cgroupfs.GetStats(cgroup) - if err != nil { - return nil, err - } - return toContainerStats(&libcontainer.ContainerStats{CgroupStats: s}), nil -} - func DiskStatsCopy(blkio_stats []cgroups.BlkioStatEntry) (stat []info.PerDiskStats) { if len(blkio_stats) == 0 { return diff --git a/container/raw/factory.go b/container/raw/factory.go index 9afe12b6..f8d072b1 100644 --- a/container/raw/factory.go +++ b/container/raw/factory.go @@ -17,26 +17,18 @@ package raw import ( "fmt" - "github.com/docker/libcontainer/cgroups" "github.com/golang/glog" "github.com/google/cadvisor/container" + "github.com/google/cadvisor/container/libcontainer" "github.com/google/cadvisor/info" ) -type cgroupSubsystems struct { - // Cgroup subsystem mounts. - // e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"] - mounts []cgroups.Mount - - // Cgroup subsystem to their mount location. - // e.g.: "cpu" -> "/sys/fs/cgroup/cpu" - mountPoints map[string]string -} - type rawFactory struct { // Factory for machine information. machineInfoFactory info.MachineInfoFactory - cgroupSubsystems *cgroupSubsystems + + // Information about the cgroup subsystems. + cgroupSubsystems *libcontainer.CgroupSubsystems } func (self *rawFactory) String() string { @@ -53,46 +45,19 @@ func (self *rawFactory) CanHandle(name string) (bool, error) { } func Register(machineInfoFactory info.MachineInfoFactory) error { - // Get all cgroup mounts. - allCgroups, err := cgroups.GetCgroupMounts() + cgroupSubsystems, err := libcontainer.GetCgroupSubsystems() if err != nil { - return err + return fmt.Errorf("failed to get cgroup subsystems: %v", err) } - if len(allCgroups) == 0 { - return fmt.Errorf("failed to find cgroup mounts for the raw factory") - } - - // Trim the mounts to only the subsystems we care about. - supportedCgroups := make([]cgroups.Mount, 0, len(allCgroups)) - mountPoints := make(map[string]string, len(allCgroups)) - for _, mount := range allCgroups { - for _, subsystem := range mount.Subsystems { - if _, ok := supportedSubsystems[subsystem]; ok { - supportedCgroups = append(supportedCgroups, mount) - mountPoints[subsystem] = mount.Mountpoint - } - } - } - if len(supportedCgroups) == 0 { + if len(cgroupSubsystems.Mounts) == 0 { return fmt.Errorf("failed to find supported cgroup mounts for the raw factory") } glog.Infof("Registering Raw factory") factory := &rawFactory{ machineInfoFactory: machineInfoFactory, - cgroupSubsystems: &cgroupSubsystems{ - mounts: supportedCgroups, - mountPoints: mountPoints, - }, + cgroupSubsystems: &cgroupSubsystems, } container.RegisterContainerHandlerFactory(factory) return nil } - -// Cgroup subsystems we support listing (should be the minimal set we need stats from). -var supportedSubsystems map[string]struct{} = map[string]struct{}{ - "cpu": {}, - "cpuacct": {}, - "memory": {}, - "cpuset": {}, -} diff --git a/container/raw/handler.go b/container/raw/handler.go index 433a07b4..a49bd51e 100644 --- a/container/raw/handler.go +++ b/container/raw/handler.go @@ -39,7 +39,7 @@ type rawContainerHandler struct { // Name of the container for this handler. name string cgroup *cgroups.Cgroup - cgroupSubsystems *cgroupSubsystems + cgroupSubsystems *libcontainer.CgroupSubsystems machineInfoFactory info.MachineInfoFactory // Inotify event watcher. @@ -54,12 +54,16 @@ type rawContainerHandler struct { // Cgroup paths being watchd for new subcontainers cgroupWatches map[string]struct{} + // Absolute path to the cgroup hierarchies of this container. + // (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test") + cgroupPaths map[string]string + fsInfo fs.FsInfo networkInterface *networkInterface externalMounts []mount } -func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, machineInfoFactory info.MachineInfoFactory) (container.ContainerHandler, error) { +func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSubsystems, machineInfoFactory info.MachineInfoFactory) (container.ContainerHandler, error) { fsInfo, err := fs.NewFsInfo() if err != nil { return nil, err @@ -77,6 +81,13 @@ func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, mac break } } + + // Create the cgroup paths. + cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints)) + for key, val := range cgroupSubsystems.MountPoints { + cgroupPaths[key] = path.Join(val, name) + } + return &rawContainerHandler{ name: name, cgroup: &cgroups.Cgroup{ @@ -88,6 +99,7 @@ func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, mac stopWatcher: make(chan error), watches: make(map[string]struct{}), cgroupWatches: make(map[string]struct{}), + cgroupPaths: cgroupPaths, fsInfo: fsInfo, networkInterface: networkInterface, externalMounts: externalMounts, @@ -145,9 +157,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) { } // CPU. - cpuRoot, ok := self.cgroupSubsystems.mountPoints["cpu"] + cpuRoot, ok := self.cgroupPaths["cpu"] if ok { - cpuRoot = path.Join(cpuRoot, self.name) if utils.FileExists(cpuRoot) { spec.HasCpu = true spec.Cpu.Limit = readInt64(cpuRoot, "cpu.shares") @@ -156,9 +167,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) { // Cpu Mask. // This will fail for non-unified hierarchies. We'll return the whole machine mask in that case. - cpusetRoot, ok := self.cgroupSubsystems.mountPoints["cpuset"] + cpusetRoot, ok := self.cgroupPaths["cpuset"] if ok { - cpusetRoot = path.Join(cpusetRoot, self.name) if utils.FileExists(cpusetRoot) { spec.HasCpu = true spec.Cpu.Mask = readString(cpusetRoot, "cpuset.cpus") @@ -169,9 +179,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) { } // Memory. - memoryRoot, ok := self.cgroupSubsystems.mountPoints["memory"] + memoryRoot, ok := self.cgroupPaths["memory"] if ok { - memoryRoot = path.Join(memoryRoot, self.name) if utils.FileExists(memoryRoot) { spec.HasMemory = true spec.Memory.Limit = readInt64(memoryRoot, "memory.limit_in_bytes") @@ -227,7 +236,10 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error { } for _, fs := range filesystems { stats.Filesystem = append(stats.Filesystem, - info.FsStats{fs.Device, fs.Capacity, fs.Capacity - fs.Free, + info.FsStats{ + fs.Device, + fs.Capacity, + fs.Capacity - fs.Free, fs.DiskStats.ReadsCompleted, fs.DiskStats.ReadsMerged, fs.DiskStats.SectorsRead, @@ -246,18 +258,20 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error { } func (self *rawContainerHandler) GetStats() (*info.ContainerStats, error) { - state := dockerlibcontainer.State{} + // TODO(vmarmol): Don't re-create this every time. + state := dockerlibcontainer.State{ + CgroupPaths: self.cgroupPaths, + } if self.networkInterface != nil { state = dockerlibcontainer.State{ NetworkState: network.NetworkState{ VethHost: self.networkInterface.VethHost, VethChild: self.networkInterface.VethChild, - NsPath: "unknown", }, } } - stats, err := libcontainer.GetStats(self.cgroup, &state) + stats, err := libcontainer.GetStats(&state) if err != nil { return nil, err } @@ -301,8 +315,8 @@ func listDirectories(dirpath string, parent string, recursive bool, output map[s func (self *rawContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) { containers := make(map[string]struct{}) - for _, subsystem := range self.cgroupSubsystems.mounts { - err := listDirectories(path.Join(subsystem.Mountpoint, self.name), self.name, listType == container.ListRecursive, containers) + for _, cgroupPath := range self.cgroupPaths { + err := listDirectories(cgroupPath, self.name, listType == container.ListRecursive, containers) if err != nil { return nil, err } @@ -372,7 +386,7 @@ func (self *rawContainerHandler) processEvent(event *inotify.Event, events chan // Derive the container name from the path name. var containerName string - for _, mount := range self.cgroupSubsystems.mounts { + for _, mount := range self.cgroupSubsystems.Mounts { mountLocation := path.Clean(mount.Mountpoint) + "/" if strings.HasPrefix(event.Name, mountLocation) { containerName = event.Name[len(mountLocation)-1:] @@ -437,8 +451,8 @@ func (self *rawContainerHandler) WatchSubcontainers(events chan container.Subcon } // Watch this container (all its cgroups) and all subdirectories. - for _, mnt := range self.cgroupSubsystems.mounts { - err := self.watchDirectory(path.Join(mnt.Mountpoint, self.name), self.name) + for _, cgroupPath := range self.cgroupPaths { + err := self.watchDirectory(cgroupPath, self.name) if err != nil { return err } @@ -481,8 +495,8 @@ func (self *rawContainerHandler) StopWatchingSubcontainers() error { func (self *rawContainerHandler) Exists() bool { // If any cgroup exists, the container is still alive. - for _, subsystem := range self.cgroupSubsystems.mounts { - if utils.FileExists(path.Join(subsystem.Mountpoint, self.name)) { + for _, cgroupPath := range self.cgroupPaths { + if utils.FileExists(cgroupPath) { return true } }