Merge pull request #358 from vmarmol/update-libcontainer

Update libcontainer dependency
2014-12-15 17:01:44 -08:00 · 2014-12-15 17:01:44 -08:00 · 8eec529106
commit 8eec529106
parent 702dda1995 91c974e913
69 changed files with 2021 additions and 1150 deletions
--- a/Godeps/Godeps.json
+++ b/Godeps/Godeps.json
@ -55,8 +55,8 @@
 		},
 		{
 			"ImportPath": "github.com/docker/libcontainer",
-			"Comment": "v1.2.0-99-gfe3801c",
-			"Rev": "fe3801ccd2f5d0cc3ec5d063067fc4a1c312fa81"
+			"Comment": "v1.2.0-173-g58fc931",
+			"Rev": "58fc93160e03387a4f41dcf4aed2e376c4a92db4"
 		},
 		{
 			"ImportPath": "github.com/fsouza/go-dockerclient",
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/.drone.yml
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/.drone.yml
@ -0,0 +1,9 @@
+image: dockercore/libcontainer
+script:
+# Setup the DockerInDocker environment.
+  - /dind
+  - sed -i 's!docker/docker!docker/libcontainer!' /go/src/github.com/docker/docker/hack/make/.validate
+  - bash /go/src/github.com/docker/docker/hack/make/validate-dco
+  - bash /go/src/github.com/docker/docker/hack/make/validate-gofmt
+  - export GOPATH="$GOPATH:/go:$(pwd)/vendor" # Drone mucks with our GOPATH
+  - make direct-test
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/.travis.yml
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/.travis.yml
@ -1,36 +0,0 @@
-language: go
-go: 1.3
-
-# let us have pretty experimental Docker-based Travis workers
-sudo: false
-
-env:
-    - TRAVIS_GLOBAL_WTF=1
-    - _GOOS=linux _GOARCH=amd64 CGO_ENABLED=1
-    - _GOOS=linux _GOARCH=amd64 CGO_ENABLED=0
-#    - _GOOS=linux _GOARCH=386 CGO_ENABLED=1 # TODO add this once Travis can handle it (https://github.com/travis-ci/travis-ci/issues/2207#issuecomment-49625061)
-    - _GOOS=linux _GOARCH=386 CGO_ENABLED=0
-    - _GOOS=linux _GOARCH=arm CGO_ENABLED=0
-
-install:
-    - go get code.google.com/p/go.tools/cmd/cover
-    - mkdir -pv "${GOPATH%%:*}/src/github.com/docker" && [ -d "${GOPATH%%:*}/src/github.com/docker/libcontainer" ] || ln -sv "$(readlink -f .)" "${GOPATH%%:*}/src/github.com/docker/libcontainer"
-    - if [ -z "$TRAVIS_GLOBAL_WTF" ]; then
-          gvm cross "$_GOOS" "$_GOARCH";
-          export GOOS="$_GOOS" GOARCH="$_GOARCH";
-      fi
-    - export GOPATH="$GOPATH:$(pwd)/vendor"
-    - if [ -z "$TRAVIS_GLOBAL_WTF" ]; then go env; fi
-    - go get -d -v ./... # TODO remove this if /docker/docker gets purged from our includes
-    - if [ "$TRAVIS_GLOBAL_WTF" ]; then
-          export DOCKER_PATH="${GOPATH%%:*}/src/github.com/docker/docker";
-          mkdir -p "$DOCKER_PATH/hack/make";
-          ( cd "$DOCKER_PATH/hack/make" && wget -c 'https://raw.githubusercontent.com/docker/docker/master/hack/make/'{.validate,validate-dco,validate-gofmt} );
-          sed -i 's!docker/docker!docker/libcontainer!' "$DOCKER_PATH/hack/make/.validate";
-      fi
-
-script:
-    - if [ "$TRAVIS_GLOBAL_WTF" ]; then bash "$DOCKER_PATH/hack/make/validate-dco"; fi
-    - if [ "$TRAVIS_GLOBAL_WTF" ]; then bash "$DOCKER_PATH/hack/make/validate-gofmt"; fi
-    - if [ -z "$TRAVIS_GLOBAL_WTF" ]; then make direct-build; fi
-    - if [ -z "$TRAVIS_GLOBAL_WTF" -a "$GOARCH" != 'arm' ]; then make direct-test-short; fi
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/Dockerfile
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/Dockerfile
@ -1,7 +1,7 @@
 FROM crosbymichael/golang

 RUN apt-get update && apt-get install -y gcc make
-RUN go get code.google.com/p/go.tools/cmd/cover
+RUN go get golang.org/x/tools/cmd/cover

 ENV GOPATH $GOPATH:/go/src/github.com/docker/libcontainer/vendor
 RUN go get github.com/docker/docker/pkg/term
@ -10,7 +10,7 @@ RUN go get github.com/docker/docker/pkg/term
 RUN mkdir /busybox && \
    curl -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.02/rootfs.tar' | tar -xC /busybox

-RUN curl -sSL https://raw.githubusercontent.com/docker/docker/master/hack/dind -o /dind && \
+RUN curl -sSL https://raw.githubusercontent.com/docker/docker/master/project/dind -o /dind && \
    chmod +x /dind

 COPY . /go/src/github.com/docker/libcontainer
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/MAINTAINERS
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/MAINTAINERS
@ -2,5 +2,4 @@ Michael Crosby <michael@docker.com> (@crosbymichael)
 Rohit Jnagal <jnagal@google.com> (@rjnagal)
 Victor Marmol <vmarmol@google.com> (@vmarmol)
 Mrunal Patel <mpatel@redhat.com> (@mrunalp)
-.travis.yml: Tianon Gravi <admwiggin@gmail.com> (@tianon)
 update-vendor.sh: Tianon Gravi <admwiggin@gmail.com> (@tianon)
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/Makefile
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/Makefile
@ -12,10 +12,10 @@ sh:
 GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u)

 direct-test:
-	go test -cover -v $(GO_PACKAGES)
+	go test $(TEST_TAGS) -cover -v $(GO_PACKAGES)

 direct-test-short:
-	go test -cover -test.short -v $(GO_PACKAGES)
+	go test $(TEST_TAGS) -cover -test.short -v $(GO_PACKAGES)

 direct-build:
 	go build -v $(GO_PACKAGES)
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/README.md
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/README.md
@ -1,4 +1,4 @@
-## libcontainer - reference implementation for containers [![Build Status](https://travis-ci.org/docker/libcontainer.png?branch=master)](https://travis-ci.org/docker/libcontainer)
+## libcontainer - reference implementation for containers [![Build Status](https://ci.dockerproject.com/github.com/docker/libcontainer/status.svg?branch=master)](https://ci.dockerproject.com/github.com/docker/libcontainer) 

 ### Note on API changes:

--- a/Godeps/_workspace/src/github.com/docker/libcontainer/SPEC.md
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/SPEC.md
@ -0,0 +1,321 @@
+## Container Specification - v1
+
+This is the standard configuration for version 1 containers.  It includes
+namespaces, standard filesystem setup, a default Linux capability set, and
+information about resource reservations.  It also has information about any 
+populated environment settings for the processes running inside a container.
+
+Along with the configuration of how a container is created the standard also
+discusses actions that can be performed on a container to manage and inspect
+information about the processes running inside.
+
+The v1 profile is meant to be able to accommodate the majority of applications
+with a strong security configuration.
+
+### System Requirements and Compatibility
+
+Minimum requirements:
+* Kernel version - 3.8 recommended 2.6.2x minimum(with backported patches) 
+* Mounted cgroups with each subsystem in its own hierarchy
+
+
+### Namespaces
+
+|     Flag      | Enabled | 
+| ------------  | ------- |
+| CLONE_NEWPID  |    1    |
+| CLONE_NEWUTS  |    1    |
+| CLONE_NEWIPC  |    1    |
+| CLONE_NEWNET  |    1    |
+| CLONE_NEWNS   |    1    |
+| CLONE_NEWUSER |    0    |
+
+In v1 the user namespace is not enabled by default for support of older kernels
+where the user namespace feature is not fully implemented.  Namespaces are 
+created for the container via the `clone` syscall.  
+
+
+### Filesystem
+
+A root filesystem must be provided to a container for execution.  The container
+will use this root filesystem (rootfs) to jail and spawn processes inside where
+the binaries and system libraries are local to that directory.  Any binaries
+to be executed must be contained within this rootfs.
+
+Mounts that happen inside the container are automatically cleaned up when the
+container exits as the mount namespace is destroyed and the kernel will 
+unmount all the mounts that were setup within that namespace.
+
+For a container to execute properly there are certain filesystems that 
+are required to be mounted within the rootfs that the runtime will setup.
+
+|     Path    |  Type  |                  Flags                 |                 Data                    |
+| ----------- | ------ | -------------------------------------- | --------------------------------------- |
+| /proc       | proc   | MS_NOEXEC,MS_NOSUID,MS_NODEV           |                                         |
+| /dev        | tmpfs  | MS_NOEXEC,MS_STRICTATIME               | mode=755                                |
+| /dev/shm    | shm    | MS_NOEXEC,MS_NOSUID,MS_NODEV           | mode=1777,size=65536k                   |
+| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV           |                                         |
+| /dev/pts    | devpts | MS_NOEXEC,MS_NOSUID                    | newinstance,ptmxmode=0666,mode=620,gid5 |
+| /sys        | sysfs  | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY |                                         |
+
+
+After a container's filesystems are mounted within the newly created 
+mount namespace `/dev` will need to be populated with a set of device nodes.
+It is expected that a rootfs does not need to have any device nodes specified
+for `/dev` witin the rootfs as the container will setup the correct devices
+that are required for executing a container's process.
+
+|      Path    | Mode |   Access   |
+| ------------ | ---- | ---------- |
+| /dev/null    | 0666 |  rwm       |
+| /dev/zero    | 0666 |  rwm       |
+| /dev/full    | 0666 |  rwm       |
+| /dev/tty     | 0666 |  rwm       |
+| /dev/random  | 0666 |  rwm       |
+| /dev/urandom | 0666 |  rwm       |
+| /dev/fuse    | 0666 |  rwm       |
+
+
+**ptmx**
+`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within
+the container.  
+
+The use of a pseudo TTY is optional within a container and it should support both.
+If a pseudo is provided to the container `/dev/console` will need to be 
+setup by binding the console in `/dev/` after it has been populated and mounted
+in tmpfs.
+
+|      Source     | Destination  | UID GID | Mode | Type |
+| --------------- | ------------ | ------- | ---- | ---- |
+| *pty host path* | /dev/console | 0 0     | 0600 | bind | 
+
+
+After `/dev/null` has been setup we check for any external links between
+the container's io, STDIN, STDOUT, STDERR.  If the container's io is pointing
+to `/dev/null` outside the container we close and `dup2` the the `/dev/null` 
+that is local to the container's rootfs.
+
+
+After the container has `/proc` mounted a few standard symlinks are setup 
+within `/dev/` for the io.
+
+|    Source    | Destination |
+| ------------ | ----------- |
+| /proc/1/fd   | /dev/fd     |
+| /proc/1/fd/0 | /dev/stdin  |
+| /proc/1/fd/1 | /dev/stdout |
+| /proc/1/fd/2 | /dev/stderr |
+
+A `pivot_root` is used to change the root for the process, effectively 
+jailing the process inside the rootfs.
+
+```c
+put_old = mkdir(...);
+pivot_root(rootfs, put_old);
+chdir("/");
+unmount(put_old, MS_DETACH);
+rmdir(put_old);
+```
+
+For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined
+with a `chroot` is required as `pivot_root` is not supported in `ramfs`.
+
+```c
+mount(rootfs, "/", NULL, MS_MOVE, NULL);
+chroot(".");
+chdir("/");
+```
+
+The `umask` is set back to `0022` after the filesystem setup has been completed.
+
+### Resources
+
+Cgroups are used to handle resource allocation for containers.  This includes
+system resources like cpu, memory, and device access.
+
+| Subsystem  | Enabled |
+| ---------- | ------- |
+| devices    | 1       |
+| memory     | 1       |
+| cpu        | 1       |
+| cpuacct    | 1       |
+| cpuset     | 1       |
+| blkio      | 1       |
+| perf_event | 1       |
+| freezer    | 1       |
+
+
+All cgroup subsystem are joined so that statistics can be collected from
+each of the subsystems.  Freezer does not expose any stats but is joined
+so that containers can be paused and resumed.
+
+The parent process of the container's init must place the init pid inside
+the correct cgroups before the initialization begins.  This is done so
+that no processes or threads escape the cgroups.  This sync is 
+done via a pipe ( specified in the runtime section below ) that the container's
+init process will block waiting for the parent to finish setup.
+
+### Security 
+
+The standard set of Linux capabilities that are set in a container
+provide a good default for security and flexibility for the applications.
+
+
+|     Capability       | Enabled |
+| -------------------- | ------- |
+| CAP_NET_RAW          | 1       |
+| CAP_NET_BIND_SERVICE | 1       |
+| CAP_AUDIT_WRITE      | 1       |
+| CAP_DAC_OVERRIDE     | 1       |
+| CAP_SETFCAP          | 1       |
+| CAP_SETPCAP          | 1       |
+| CAP_SETGID           | 1       |
+| CAP_SETUID           | 1       |
+| CAP_MKNOD            | 1       |
+| CAP_CHOWN            | 1       |
+| CAP_FOWNER           | 1       |
+| CAP_FSETID           | 1       |
+| CAP_KILL             | 1       |
+| CAP_SYS_CHROOT       | 1       |
+| CAP_NET_BROADCAST    | 0       |
+| CAP_SYS_MODULE       | 0       |
+| CAP_SYS_RAWIO        | 0       |
+| CAP_SYS_PACCT        | 0       |
+| CAP_SYS_ADMIN        | 0       |
+| CAP_SYS_NICE         | 0       |
+| CAP_SYS_RESOURCE     | 0       |
+| CAP_SYS_TIME         | 0       |
+| CAP_SYS_TTY_CONFIG   | 0       |
+| CAP_AUDIT_CONTROL    | 0       |
+| CAP_MAC_OVERRIDE     | 0       |
+| CAP_MAC_ADMIN        | 0       |
+| CAP_NET_ADMIN        | 0       |
+| CAP_SYSLOG           | 0       |
+| CAP_DAC_READ_SEARCH  | 0       |
+| CAP_LINUX_IMMUTABLE  | 0       |
+| CAP_IPC_LOCK         | 0       |
+| CAP_IPC_OWNER        | 0       |
+| CAP_SYS_PTRACE       | 0       |
+| CAP_SYS_BOOT         | 0       |
+| CAP_LEASE            | 0       |
+| CAP_WAKE_ALARM       | 0       |
+| CAP_BLOCK_SUSPE      | 0       |
+
+
+Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor)
+and [selinux](http://selinuxproject.org/page/Main_Page) can be used with
+the containers.  A container should support setting an apparmor profile or 
+selinux process and mount labels if provided in the configuration.  
+
+Standard apparmor profile:
+```c
+#include <tunables/global>
+profile <profile_name> flags=(attach_disconnected,mediate_deleted) {
+  #include <abstractions/base>
+  network,
+  capability,
+  file,
+  umount,
+
+  mount fstype=tmpfs,
+  mount fstype=mqueue,
+  mount fstype=fuse.*,
+  mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,
+  mount fstype=efivarfs -> /sys/firmware/efi/efivars/,
+  mount fstype=fusectl -> /sys/fs/fuse/connections/,
+  mount fstype=securityfs -> /sys/kernel/security/,
+  mount fstype=debugfs -> /sys/kernel/debug/,
+  mount fstype=proc -> /proc/,
+  mount fstype=sysfs -> /sys/,
+
+  deny @{PROC}/sys/fs/** wklx,
+  deny @{PROC}/sysrq-trigger rwklx,
+  deny @{PROC}/mem rwklx,
+  deny @{PROC}/kmem rwklx,
+  deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx,
+  deny @{PROC}/sys/kernel/*/** wklx,
+
+  deny mount options=(ro, remount) -> /,
+  deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
+  deny mount fstype=devpts,
+
+  deny /sys/[^f]*/** wklx,
+  deny /sys/f[^s]*/** wklx,
+  deny /sys/fs/[^c]*/** wklx,
+  deny /sys/fs/c[^g]*/** wklx,
+  deny /sys/fs/cg[^r]*/** wklx,
+  deny /sys/firmware/efi/efivars/** rwklx,
+  deny /sys/kernel/security/** rwklx,
+}
+```
+
+*TODO: seccomp work is being done to find a good default config*
+
+### Runtime and Init Process
+
+During container creation the parent process needs to talk to the container's init 
+process and have a form of synchronization.  This is accomplished by creating
+a pipe that is passed to the container's init.  When the init process first spawns 
+it will block on its side of the pipe until the parent closes its side.  This
+allows the parent to have time to set the new process inside a cgroup hierarchy 
+and/or write any uid/gid mappings required for user namespaces.  
+The pipe is passed to the init process via FD 3.
+
+The application consuming libcontainer should be compiled statically.  libcontainer
+does not define any init process and the arguments provided are used to `exec` the
+process inside the application.  There should be no long running init within the 
+container spec.
+
+If a pseudo tty is provided to a container it will open and `dup2` the console
+as the container's STDIN, STDOUT, STDERR as well as mounting the console
+as `/dev/console`.
+
+An extra set of mounts are provided to a container and setup for use.  A container's
+rootfs can contain some non portable files inside that can cause side effects during
+execution of a process.  These files are usually created and populated with the container
+specific information via the runtime.  
+
+**Extra runtime files:**
+* /etc/hosts 
+* /etc/resolv.conf
+* /etc/hostname
+* /etc/localtime
+
+
+#### Defaults
+
+There are a few defaults that can be overridden by users, but in their omission
+these apply to processes within a container.
+
+|       Type          |             Value              |
+| ------------------- | ------------------------------ |
+| Parent Death Signal | SIGKILL                        | 
+| UID                 | 0                              |
+| GID                 | 0                              |
+| GROUPS              | 0, NULL                        |
+| CWD                 | "/"                            |
+| $HOME               | Current user's home dir or "/" |
+| Readonly rootfs     | false                          |
+| Pseudo TTY          | false                          |
+
+
+## Actions
+
+After a container is created there is a standard set of actions that can
+be done to the container.  These actions are part of the public API for 
+a container.
+
+|     Action     |                         Description                                |
+| -------------- | ------------------------------------------------------------------ |
+| Get processes  | Return all the pids for processes running inside a container       | 
+| Get Stats      | Return resource statistics for the container as a whole            |
+| Wait           | Wait waits on the container's init process ( pid 1 )               |
+| Wait Process   | Wait on any of the container's processes returning the exit status | 
+| Destroy        | Kill the container's init process and remove any filesystem state  |
+| Signal         | Send a signal to the container's init process                      |
+| Signal Process | Send a signal to any of the container's processes                  |
+| Pause          | Pause all processes inside the container                           |
+| Resume         | Resume all processes inside the container if paused                |
+| Exec           | Execute a new process inside of the container  ( requires setns )  |
+
+
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/api_temp.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/api_temp.go
@ -5,30 +5,17 @@ package libcontainer

 import (
 	"github.com/docker/libcontainer/cgroups/fs"
-	"github.com/docker/libcontainer/cgroups/systemd"
 	"github.com/docker/libcontainer/network"
 )

 // TODO(vmarmol): Complete Stats() in final libcontainer API and move users to that.
 // DEPRECATED: The below portions are only to be used during the transition to the official API.
 // Returns all available stats for the given container.
-func GetStats(container *Config, state *State) (*ContainerStats, error) {
-	var (
-		err   error
-		stats = &ContainerStats{}
-	)
-
-	if systemd.UseSystemd() {
-		stats.CgroupStats, err = systemd.GetStats(container.Cgroups)
-	} else {
-		stats.CgroupStats, err = fs.GetStats(container.Cgroups)
-	}
-
-	if err != nil {
+func GetStats(container *Config, state *State) (stats *ContainerStats, err error) {
+	stats = &ContainerStats{}
+	if stats.CgroupStats, err = fs.GetStats(state.CgroupPaths); err != nil {
 		return stats, err
 	}
-
 	stats.NetworkStats, err = network.GetStats(&state.NetworkState)
-
 	return stats, err
 }
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgroups.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgroups.go
@ -50,11 +50,7 @@ type Cgroup struct {
 	CpuQuota          int64             `json:"cpu_quota,omitempty"`          // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
 	CpuPeriod         int64             `json:"cpu_period,omitempty"`         // CPU period to be used for hardcapping (in usecs). 0 to use system default.
 	CpusetCpus        string            `json:"cpuset_cpus,omitempty"`        // CPU to use
+	CpusetMems        string            `json:"cpuset_mems,omitempty"`        // MEM to use
 	Freezer           FreezerState      `json:"freezer,omitempty"`            // set the freeze value for the process
 	Slice             string            `json:"slice,omitempty"`              // Parent slice to use for systemd
 }
-
-type ActiveCgroup interface {
-	Cleanup() error
-	Paths() (map[string]string, error)
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgutil/cgutil.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgutil/cgutil.go
@ -1,264 +0,0 @@
-package main
-
-import (
-	"encoding/json"
-	"fmt"
-	"log"
-	"os"
-	"syscall"
-	"time"
-
-	"github.com/codegangsta/cli"
-	"github.com/docker/libcontainer/cgroups"
-	"github.com/docker/libcontainer/cgroups/fs"
-	"github.com/docker/libcontainer/cgroups/systemd"
-)
-
-var createCommand = cli.Command{
-	Name:  "create",
-	Usage: "Create a cgroup container using the supplied configuration and initial process.",
-	Flags: []cli.Flag{
-		cli.StringFlag{Name: "config, c", Value: "cgroup.json", Usage: "path to container configuration (cgroups.Cgroup object)"},
-		cli.IntFlag{Name: "pid, p", Value: 0, Usage: "pid of the initial process in the container"},
-	},
-	Action: createAction,
-}
-
-var destroyCommand = cli.Command{
-	Name:  "destroy",
-	Usage: "Destroy an existing cgroup container.",
-	Flags: []cli.Flag{
-		cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
-		cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
-	},
-	Action: destroyAction,
-}
-
-var statsCommand = cli.Command{
-	Name:  "stats",
-	Usage: "Get stats for cgroup",
-	Flags: []cli.Flag{
-		cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
-		cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
-	},
-	Action: statsAction,
-}
-
-var pauseCommand = cli.Command{
-	Name:  "pause",
-	Usage: "Pause cgroup",
-	Flags: []cli.Flag{
-		cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
-		cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
-	},
-	Action: pauseAction,
-}
-
-var resumeCommand = cli.Command{
-	Name:  "resume",
-	Usage: "Resume a paused cgroup",
-	Flags: []cli.Flag{
-		cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
-		cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
-	},
-	Action: resumeAction,
-}
-
-var psCommand = cli.Command{
-	Name:  "ps",
-	Usage: "Get list of pids for a cgroup",
-	Flags: []cli.Flag{
-		cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
-		cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
-	},
-	Action: psAction,
-}
-
-func getConfigFromFile(c *cli.Context) (*cgroups.Cgroup, error) {
-	f, err := os.Open(c.String("config"))
-	if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-
-	var config *cgroups.Cgroup
-	if err := json.NewDecoder(f).Decode(&config); err != nil {
-		log.Fatal(err)
-	}
-	return config, nil
-}
-
-func openLog(name string) error {
-	f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755)
-	if err != nil {
-		return err
-	}
-
-	log.SetOutput(f)
-	return nil
-}
-
-func getConfig(context *cli.Context) (*cgroups.Cgroup, error) {
-	name := context.String("name")
-	if name == "" {
-		log.Fatal(fmt.Errorf("Missing container name"))
-	}
-	parent := context.String("parent")
-	return &cgroups.Cgroup{
-		Name:   name,
-		Parent: parent,
-	}, nil
-}
-
-func killAll(config *cgroups.Cgroup) {
-	// We could use freezer here to prevent process spawning while we are trying
-	// to kill everything. But going with more portable solution of retrying for
-	// now.
-	pids := getPids(config)
-	retry := 10
-	for len(pids) != 0 || retry > 0 {
-		killPids(pids)
-		time.Sleep(100 * time.Millisecond)
-		retry--
-		pids = getPids(config)
-	}
-	if len(pids) != 0 {
-		log.Fatal(fmt.Errorf("Could not kill existing processes in the container."))
-	}
-}
-
-func getPids(config *cgroups.Cgroup) []int {
-	pids, err := fs.GetPids(config)
-	if err != nil {
-		log.Fatal(err)
-	}
-	return pids
-}
-
-func killPids(pids []int) {
-	for _, pid := range pids {
-		// pids might go away on their own. Ignore errors.
-		syscall.Kill(pid, syscall.SIGKILL)
-	}
-}
-
-func setFreezerState(context *cli.Context, state cgroups.FreezerState) {
-	config, err := getConfig(context)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	if systemd.UseSystemd() {
-		err = systemd.Freeze(config, state)
-	} else {
-		err = fs.Freeze(config, state)
-	}
-	if err != nil {
-		log.Fatal(err)
-	}
-}
-
-func createAction(context *cli.Context) {
-	config, err := getConfigFromFile(context)
-	if err != nil {
-		log.Fatal(err)
-	}
-	pid := context.Int("pid")
-	if pid <= 0 {
-		log.Fatal(fmt.Errorf("Invalid pid : %d", pid))
-	}
-	if systemd.UseSystemd() {
-		_, err := systemd.Apply(config, pid)
-		if err != nil {
-			log.Fatal(err)
-		}
-	} else {
-		_, err := fs.Apply(config, pid)
-		if err != nil {
-			log.Fatal(err)
-		}
-	}
-}
-
-func destroyAction(context *cli.Context) {
-	config, err := getConfig(context)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	killAll(config)
-	// Systemd will clean up cgroup state for empty container.
-	if !systemd.UseSystemd() {
-		err := fs.Cleanup(config)
-		if err != nil {
-			log.Fatal(err)
-		}
-	}
-}
-
-func statsAction(context *cli.Context) {
-	config, err := getConfig(context)
-	if err != nil {
-		log.Fatal(err)
-	}
-	stats, err := fs.GetStats(config)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	out, err := json.MarshalIndent(stats, "", "\t")
-	if err != nil {
-		log.Fatal(err)
-	}
-	fmt.Printf("Usage stats for '%s':\n %v\n", config.Name, string(out))
-}
-
-func pauseAction(context *cli.Context) {
-	setFreezerState(context, cgroups.Frozen)
-}
-
-func resumeAction(context *cli.Context) {
-	setFreezerState(context, cgroups.Thawed)
-}
-
-func psAction(context *cli.Context) {
-	config, err := getConfig(context)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	pids, err := fs.GetPids(config)
-	if err != nil {
-		log.Fatal(err)
-	}
-
-	fmt.Printf("Pids in '%s':\n", config.Name)
-	fmt.Println(pids)
-}
-
-func main() {
-	logPath := os.Getenv("log")
-	if logPath != "" {
-		if err := openLog(logPath); err != nil {
-			log.Fatal(err)
-		}
-	}
-
-	app := cli.NewApp()
-	app.Name = "cgutil"
-	app.Usage = "Test utility for libcontainer cgroups package"
-	app.Version = "0.1"
-
-	app.Commands = []cli.Command{
-		createCommand,
-		destroyCommand,
-		statsCommand,
-		pauseCommand,
-		resumeCommand,
-		psCommand,
-	}
-
-	if err := app.Run(os.Args); err != nil {
-		log.Fatal(err)
-	}
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgutil/sample_cgroup.json
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/cgutil/sample_cgroup.json
@ -1,10 +0,0 @@
-{
-	"name": "luke",
-	"parent": "darth",
-	"allow_all_devices": true,
-	"memory": 1073741824,
-	"memory_swap": -1,
-	"cpu_shares": 2048,
-	"cpu_quota": 500000,
-	"cpu_period": 250000
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/apply_raw.go
@ -57,20 +57,35 @@ type data struct {
 	pid    int
 }

-func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
+func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
 	d, err := getCgroupData(c, pid)
 	if err != nil {
 		return nil, err
 	}

-	for _, sys := range subsystems {
+	paths := make(map[string]string)
+	defer func() {
+		if err != nil {
+			cgroups.RemovePaths(paths)
+		}
+	}()
+	for name, sys := range subsystems {
 		if err := sys.Set(d); err != nil {
-			d.Cleanup()
 			return nil, err
 		}
+		// FIXME: Apply should, ideally, be reentrant or be broken up into a separate
+		// create and join phase so that the cgroup hierarchy for a container can be
+		// created then join consists of writing the process pids to cgroup.procs
+		p, err := d.path(name)
+		if err != nil {
+			if cgroups.IsNotFound(err) {
+				continue
+			}
+			return nil, err
+		}
+		paths[name] = p
 	}
-
-	return d, nil
+	return paths, nil
 }

 // Symmetrical public function to update device based cgroups.  Also available
@ -86,33 +101,13 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error {
 	return devices.Set(d)
 }

-func Cleanup(c *cgroups.Cgroup) error {
-	d, err := getCgroupData(c, 0)
-	if err != nil {
-		return fmt.Errorf("Could not get Cgroup data %s", err)
-	}
-	return d.Cleanup()
-}
-
-func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) {
+func GetStats(systemPaths map[string]string) (*cgroups.Stats, error) {
 	stats := cgroups.NewStats()
-
-	d, err := getCgroupData(c, 0)
-	if err != nil {
-		return nil, fmt.Errorf("getting CgroupData %s", err)
-	}
-
-	for sysname, sys := range subsystems {
-		path, err := d.path(sysname)
-		if err != nil {
-			// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
-			if cgroups.IsNotFound(err) {
-				continue
-			}
-
-			return nil, err
+	for name, path := range systemPaths {
+		sys, ok := subsystems[name]
+		if !ok {
+			continue
 		}
-
 		if err := sys.GetStats(path, stats); err != nil {
 			return nil, err
 		}
@ -176,26 +171,6 @@ func (raw *data) parent(subsystem string) (string, error) {
 	return filepath.Join(raw.root, subsystem, initPath), nil
 }

-func (raw *data) Paths() (map[string]string, error) {
-	paths := make(map[string]string)
-
-	for sysname := range subsystems {
-		path, err := raw.path(sysname)
-		if err != nil {
-			// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
-			if cgroups.IsNotFound(err) {
-				continue
-			}
-
-			return nil, err
-		}
-
-		paths[sysname] = path
-	}
-
-	return paths, nil
-}
-
 func (raw *data) path(subsystem string) (string, error) {
 	// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
 	if filepath.IsAbs(raw.cgroup) {
@ -234,13 +209,6 @@ func (raw *data) join(subsystem string) (string, error) {
 	return path, nil
 }

-func (raw *data) Cleanup() error {
-	for _, sys := range subsystems {
-		sys.Remove(raw)
-	}
-	return nil
-}
-
 func writeFile(dir, file, data string) error {
 	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
 }
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/cpuset.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/cpuset.go
@ -14,17 +14,11 @@ type CpusetGroup struct {
 }

 func (s *CpusetGroup) Set(d *data) error {
-	// we don't want to join this cgroup unless it is specified
-	if d.c.CpusetCpus != "" {
-		dir, err := d.path("cpuset")
-		if err != nil {
-			return err
-		}
-
-		return s.SetDir(dir, d.c.CpusetCpus, d.pid)
+	dir, err := d.path("cpuset")
+	if err != nil {
+		return err
 	}
-
-	return nil
+	return s.SetDir(dir, d.c.CpusetCpus, d.c.CpusetMems, d.pid)
 }

 func (s *CpusetGroup) Remove(d *data) error {
@ -35,7 +29,7 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
 	return nil
 }

-func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
+func (s *CpusetGroup) SetDir(dir, cpus string, mems string, pid int) error {
 	if err := s.ensureParent(dir); err != nil {
 		return err
 	}
@ -46,8 +40,17 @@ func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
 		return err
 	}

-	if err := writeFile(dir, "cpuset.cpus", value); err != nil {
-		return err
+	// If we don't use --cpuset-xxx, the default value inherit from parent cgroup
+	// is set in s.ensureParent, otherwise, use the value we set
+	if cpus != "" {
+		if err := writeFile(dir, "cpuset.cpus", cpus); err != nil {
+			return err
+		}
+	}
+	if mems != "" {
+		if err := writeFile(dir, "cpuset.mems", mems); err != nil {
+			return err
+		}
 	}

 	return nil
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/utils_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/fs/utils_test.go
@ -57,7 +57,7 @@ func TestGetCgroupParamsInt(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	} else if value != 0 {
-		t.Fatalf("Expected %d to equal %f", value, 0)
+		t.Fatalf("Expected %d to equal %d", value, 0)
 	}

 	// Success with negative values lesser than min int64
@ -70,7 +70,7 @@ func TestGetCgroupParamsInt(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	} else if value != 0 {
-		t.Fatalf("Expected %d to equal %f", value, 0)
+		t.Fatalf("Expected %d to equal %d", value, 0)
 	}

 	// Not a float.
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/stats.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/stats.go
@ -27,7 +27,7 @@ type CpuUsage struct {

 type CpuStats struct {
 	CpuUsage       CpuUsage       `json:"cpu_usage,omitempty"`
-	ThrottlingData ThrottlingData `json:"throlling_data,omitempty"`
+	ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
 }

 type MemoryStats struct {
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_nosystemd.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_nosystemd.go
@ -12,7 +12,7 @@ func UseSystemd() bool {
 	return false
 }

-func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
+func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
 	return nil, fmt.Errorf("Systemd not supported")
 }

@ -27,7 +27,3 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error {
 func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error {
 	return fmt.Errorf("Systemd not supported")
 }
-
-func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) {
-	return nil, fmt.Errorf("Systemd not supported")
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/systemd/apply_systemd.go
@ -31,18 +31,15 @@ var (
 	connLock              sync.Mutex
 	theConn               *systemd.Conn
 	hasStartTransientUnit bool
-	subsystems            = map[string]subsystem{
-		"devices":    &fs.DevicesGroup{},
-		"memory":     &fs.MemoryGroup{},
-		"cpu":        &fs.CpuGroup{},
-		"cpuset":     &fs.CpusetGroup{},
-		"cpuacct":    &fs.CpuacctGroup{},
-		"blkio":      &fs.BlkioGroup{},
-		"perf_event": &fs.PerfEventGroup{},
-		"freezer":    &fs.FreezerGroup{},
-	}
 )

+func newProp(name string, units interface{}) systemd.Property {
+	return systemd.Property{
+		Name:  name,
+		Value: dbus.MakeVariant(units),
+	}
+}
+
 func UseSystemd() bool {
 	s, err := os.Stat("/run/systemd/system")
 	if err != nil || !s.IsDir() {
@ -84,7 +81,7 @@ func getIfaceForUnit(unitName string) string {
 	return "Unit"
 }

-func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
+func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
 	var (
 		unitName   = getUnitName(c)
 		slice      = "system.slice"
@ -99,27 +96,27 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
 	}

 	properties = append(properties,
-		systemd.Property{"Slice", dbus.MakeVariant(slice)},
-		systemd.Property{"Description", dbus.MakeVariant("docker container " + c.Name)},
-		systemd.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})},
+		systemd.PropSlice(slice),
+		systemd.PropDescription("docker container "+c.Name),
+		newProp("PIDs", []uint32{uint32(pid)}),
 	)

 	// Always enable accounting, this gets us the same behaviour as the fs implementation,
 	// plus the kernel has some problems with joining the memory cgroup at a later time.
 	properties = append(properties,
-		systemd.Property{"MemoryAccounting", dbus.MakeVariant(true)},
-		systemd.Property{"CPUAccounting", dbus.MakeVariant(true)},
-		systemd.Property{"BlockIOAccounting", dbus.MakeVariant(true)})
+		newProp("MemoryAccounting", true),
+		newProp("CPUAccounting", true),
+		newProp("BlockIOAccounting", true))

 	if c.Memory != 0 {
 		properties = append(properties,
-			systemd.Property{"MemoryLimit", dbus.MakeVariant(uint64(c.Memory))})
+			newProp("MemoryLimit", uint64(c.Memory)))
 	}
 	// TODO: MemoryReservation and MemorySwap not available in systemd

 	if c.CpuShares != 0 {
 		properties = append(properties,
-			systemd.Property{"CPUShares", dbus.MakeVariant(uint64(c.CpuShares))})
+			newProp("CPUShares", uint64(c.CpuShares)))
 	}

 	if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil {
@ -140,57 +137,42 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {

 	}

-	// we need to manually join the freezer cgroup in systemd because it does not currently support it
-	// via the dbus api
+	// we need to manually join the freezer and cpuset cgroup in systemd
+	// because it does not currently support it via the dbus api.
 	if err := joinFreezer(c, pid); err != nil {
 		return nil, err
 	}

-	if c.CpusetCpus != "" {
-		if err := joinCpuset(c, pid); err != nil {
-			return nil, err
-		}
+	if err := joinCpuset(c, pid); err != nil {
+		return nil, err
 	}

-	return res, nil
-}
-
-func writeFile(dir, file, data string) error {
-	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
-}
-
-func (c *systemdCgroup) Paths() (map[string]string, error) {
 	paths := make(map[string]string)
-
-	for sysname := range subsystems {
-		subsystemPath, err := getSubsystemPath(c.cgroup, sysname)
+	for _, sysname := range []string{
+		"devices",
+		"memory",
+		"cpu",
+		"cpuset",
+		"cpuacct",
+		"blkio",
+		"perf_event",
+		"freezer",
+	} {
+		subsystemPath, err := getSubsystemPath(res.cgroup, sysname)
 		if err != nil {
 			// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
 			if cgroups.IsNotFound(err) {
 				continue
 			}
-
 			return nil, err
 		}
-
 		paths[sysname] = subsystemPath
 	}
-
 	return paths, nil
 }

-func (c *systemdCgroup) Cleanup() error {
-	// systemd cleans up, we don't need to do much
-	paths, err := c.Paths()
-	if err != nil {
-		return err
-	}
-
-	for _, path := range paths {
-		os.RemoveAll(path)
-	}
-
-	return nil
+func writeFile(dir, file, data string) error {
+	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
 }

 func joinFreezer(c *cgroups.Cgroup, pid int) error {
@ -260,35 +242,6 @@ func getUnitName(c *cgroups.Cgroup) string {
 	return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name)
 }

-/*
- * This would be nicer to get from the systemd API when accounting
- * is enabled, but sadly there is no way to do that yet.
- * The lack of this functionality in the API & the approach taken
- * is guided by
- * http://www.freedesktop.org/wiki/Software/systemd/ControlGroupInterface/#readingaccountinginformation.
- */
-func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) {
-	stats := cgroups.NewStats()
-
-	for sysname, sys := range subsystems {
-		subsystemPath, err := getSubsystemPath(c, sysname)
-		if err != nil {
-			// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
-			if cgroups.IsNotFound(err) {
-				continue
-			}
-
-			return nil, err
-		}
-
-		if err := sys.GetStats(subsystemPath, stats); err != nil {
-			return nil, err
-		}
-	}
-
-	return stats, nil
-}
-
 // Atm we can't use the systemd device support because of two missing things:
 // * Support for wildcards to allow mknod on any device
 // * Support for wildcards to allow /dev/pts support
@ -360,5 +313,5 @@ func joinCpuset(c *cgroups.Cgroup, pid int) error {

 	s := &fs.CpusetGroup{}

-	return s.SetDir(path, c.CpusetCpus, pid)
+	return s.SetDir(path, c.CpusetCpus, c.CpusetMems, pid)
 }
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/utils.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/cgroups/utils.go
@ -189,6 +189,17 @@ func EnterPid(cgroupPaths map[string]string, pid int) error {
 			}
 		}
 	}
-
 	return nil
 }
+
+// RemovePaths iterates over the provided paths removing them.
+// If an error is encountered the removal proceeds and the first error is
+// returned to ensure a partial removal is not possible.
+func RemovePaths(paths map[string]string) (err error) {
+	for _, path := range paths {
+		if rerr := os.RemoveAll(path); err == nil {
+			err = rerr
+		}
+	}
+	return err
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/config.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/config.go
@ -10,6 +10,13 @@ type MountConfig mount.MountConfig

 type Network network.Network

+// Namespace defines configuration for each namespace.  It specifies an
+// alternate path that is able to be joined via setns.
+type Namespace struct {
+	Name string `json:"name"`
+	Path string `json:"path,omitempty"`
+}
+
 // Config defines configuration options for executing a process inside a contained environment.
 type Config struct {
 	// Mount specific options.
@ -38,7 +45,7 @@ type Config struct {

 	// Namespaces specifies the container's namespaces that it should setup when cloning the init process
 	// If a namespace is not provided that namespace is shared from the container's parent process
-	Namespaces map[string]bool `json:"namespaces,omitempty"`
+	Namespaces []Namespace `json:"namespaces,omitempty"`

 	// Capabilities specify the capabilities to keep when executing the process inside the container
 	// All capbilities not specified will be dropped from the processes capability mask
@ -65,6 +72,10 @@ type Config struct {
 	// RestrictSys will remount /proc/sys, /sys, and mask over sysrq-trigger as well as /proc/irq and
 	// /proc/bus
 	RestrictSys bool `json:"restrict_sys,omitempty"`
+
+	// Rlimits specifies the resource limits, such as max open files, to set in the container
+	// If Rlimits are not set, the container will inherit rlimits from the parent process
+	Rlimits []Rlimit `json:"rlimits,omitempty"`
 }

 // Routes can be specified to create entries in the route table as the container is started
@ -87,3 +98,9 @@ type Route struct {
 	// The device to set this route up for, for example: eth0
 	InterfaceName string `json:"interface_name,omitempty"`
 }
+
+type Rlimit struct {
+	Type int    `json:"type,omitempty"`
+	Hard uint64 `json:"hard,omitempty"`
+	Soft uint64 `json:"soft,omitempty"`
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/config_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/config_test.go
@ -64,12 +64,12 @@ func TestConfigJsonFormat(t *testing.T) {
 		t.Fail()
 	}

-	if !container.Namespaces["NEWNET"] {
+	if getNamespaceIndex(container, "NEWNET") == -1 {
 		t.Log("namespaces should contain NEWNET")
 		t.Fail()
 	}

-	if container.Namespaces["NEWUSER"] {
+	if getNamespaceIndex(container, "NEWUSER") != -1 {
 		t.Log("namespaces should not contain NEWUSER")
 		t.Fail()
 	}
@ -158,3 +158,12 @@ func TestSelinuxLabels(t *testing.T) {
 		t.Fatalf("expected mount label %q but received %q", label, container.MountConfig.MountLabel)
 	}
 }
+
+func getNamespaceIndex(config *Config, name string) int {
+	for i, v := range config.Namespaces {
+		if v.Name == name {
+			return i
+		}
+	}
+	return -1
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/devices/devices.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/devices/devices.go
@ -103,7 +103,7 @@ func getDeviceNodes(path string) ([]*Device, error) {
 		switch {
 		case f.IsDir():
 			switch f.Name() {
-			case "pts", "shm", "fd":
+			case "pts", "shm", "fd", "mqueue":
 				continue
 			default:
 				sub, err := getDeviceNodes(filepath.Join(path, f.Name()))
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/integration/exec_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/integration/exec_test.go
@ -1,8 +1,11 @@
 package integration

 import (
+	"os"
 	"strings"
 	"testing"
+
+	"github.com/docker/libcontainer"
 )

 func TestExecPS(t *testing.T) {
@ -36,3 +39,152 @@ func TestExecPS(t *testing.T) {
 		t.Fatalf("expected output %q but received %q", expected, actual)
 	}
 }
+
+func TestIPCPrivate(t *testing.T) {
+	if testing.Short() {
+		return
+	}
+
+	rootfs, err := newRootFs()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer remove(rootfs)
+
+	l, err := os.Readlink("/proc/1/ns/ipc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	config := newTemplateConfig(rootfs)
+	buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if exitCode != 0 {
+		t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
+	}
+
+	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l {
+		t.Fatalf("ipc link should be private to the conatiner but equals host %q %q", actual, l)
+	}
+}
+
+func TestIPCHost(t *testing.T) {
+	if testing.Short() {
+		return
+	}
+
+	rootfs, err := newRootFs()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer remove(rootfs)
+
+	l, err := os.Readlink("/proc/1/ns/ipc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	config := newTemplateConfig(rootfs)
+	i := getNamespaceIndex(config, "NEWIPC")
+	config.Namespaces = append(config.Namespaces[:i], config.Namespaces[i+1:]...)
+	buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if exitCode != 0 {
+		t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
+	}
+
+	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
+		t.Fatalf("ipc link not equal to host link %q %q", actual, l)
+	}
+}
+
+func TestIPCJoinPath(t *testing.T) {
+	if testing.Short() {
+		return
+	}
+
+	rootfs, err := newRootFs()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer remove(rootfs)
+
+	l, err := os.Readlink("/proc/1/ns/ipc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	config := newTemplateConfig(rootfs)
+	i := getNamespaceIndex(config, "NEWIPC")
+	config.Namespaces[i].Path = "/proc/1/ns/ipc"
+
+	buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if exitCode != 0 {
+		t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
+	}
+
+	if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
+		t.Fatalf("ipc link not equal to host link %q %q", actual, l)
+	}
+}
+
+func TestIPCBadPath(t *testing.T) {
+	if testing.Short() {
+		return
+	}
+
+	rootfs, err := newRootFs()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer remove(rootfs)
+
+	config := newTemplateConfig(rootfs)
+	i := getNamespaceIndex(config, "NEWIPC")
+	config.Namespaces[i].Path = "/proc/1/ns/ipcc"
+
+	_, _, err = runContainer(config, "", "true")
+	if err == nil {
+		t.Fatal("container succeded with bad ipc path")
+	}
+}
+
+func TestRlimit(t *testing.T) {
+	if testing.Short() {
+		return
+	}
+
+	rootfs, err := newRootFs()
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer remove(rootfs)
+
+	config := newTemplateConfig(rootfs)
+	out, _, err := runContainer(config, "", "/bin/sh", "-c", "ulimit -n")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if limit := strings.TrimSpace(out.Stdout.String()); limit != "1024" {
+		t.Fatalf("expected rlimit to be 1024, got %s", limit)
+	}
+}
+
+func getNamespaceIndex(config *libcontainer.Config, name string) int {
+	for i, v := range config.Namespaces {
+		if v.Name == name {
+			return i
+		}
+	}
+	return -1
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/integration/init_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/integration/init_test.go
@ -6,7 +6,6 @@ import (
 	"runtime"

 	"github.com/docker/libcontainer/namespaces"
-	"github.com/docker/libcontainer/syncpipe"
 )

 // init runs the libcontainer initialization code because of the busybox style needs
@ -27,12 +26,7 @@ func init() {
 		log.Fatal(err)
 	}

-	syncPipe, err := syncpipe.NewSyncPipeFromFd(0, 3)
-	if err != nil {
-		log.Fatalf("unable to create sync pipe: %s", err)
-	}
-
-	if err := namespaces.Init(container, rootfs, "", syncPipe, os.Args[3:]); err != nil {
+	if err := namespaces.Init(container, rootfs, "", os.NewFile(3, "pipe"), os.Args[3:]); err != nil {
 		log.Fatalf("unable to initialize for container: %s", err)
 	}
 	os.Exit(1)
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/integration/template_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/integration/template_test.go
@ -1,6 +1,8 @@
 package integration

 import (
+	"syscall"
+
 	"github.com/docker/libcontainer"
 	"github.com/docker/libcontainer/cgroups"
 	"github.com/docker/libcontainer/devices"
@ -30,12 +32,12 @@ func newTemplateConfig(rootfs string) *libcontainer.Config {
 			"KILL",
 			"AUDIT_WRITE",
 		},
-		Namespaces: map[string]bool{
-			"NEWNS":  true,
-			"NEWUTS": true,
-			"NEWIPC": true,
-			"NEWPID": true,
-			"NEWNET": true,
+		Namespaces: []libcontainer.Namespace{
+			{Name: "NEWNS"},
+			{Name: "NEWUTS"},
+			{Name: "NEWIPC"},
+			{Name: "NEWPID"},
+			{Name: "NEWNET"},
 		},
 		Cgroups: &cgroups.Cgroup{
 			Parent:          "integration",
@ -60,5 +62,12 @@ func newTemplateConfig(rootfs string) *libcontainer.Config {
 				Gateway: "localhost",
 			},
 		},
+		Rlimits: []libcontainer.Rlimit{
+			{
+				Type: syscall.RLIMIT_NOFILE,
+				Hard: uint64(1024),
+				Soft: uint64(1024),
+			},
+		},
 	}
 }
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/label/label.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/label/label.go
@ -25,6 +25,10 @@ func SetFileLabel(path string, fileLabel string) error {
 	return nil
 }

+func SetFileCreateLabel(fileLabel string) error {
+	return nil
+}
+
 func Relabel(path string, fileLabel string, relabel string) error {
 	return nil
 }
@ -43,3 +47,15 @@ func ReserveLabel(label string) error {
 func UnreserveLabel(label string) error {
 	return nil
 }
+
+// DupSecOpt takes an process label and returns security options that
+// can be used to set duplicate labels on future container processes
+func DupSecOpt(src string) []string {
+	return nil
+}
+
+// DisableSecOpt returns a security opt that can disable labeling
+// support for future container processes
+func DisableSecOpt() []string {
+	return nil
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux.go
@ -17,7 +17,6 @@ func InitLabels(options []string) (string, string, error) {
 	if !selinux.SelinuxEnabled() {
 		return "", "", nil
 	}
-	var err error
 	processLabel, mountLabel := selinux.GetLxcContexts()
 	if processLabel != "" {
 		pcon := selinux.NewContext(processLabel)
@ -38,7 +37,7 @@ func InitLabels(options []string) (string, string, error) {
 		processLabel = pcon.Get()
 		mountLabel = mcon.Get()
 	}
-	return processLabel, mountLabel, err
+	return processLabel, mountLabel, nil
 }

 // DEPRECATED: The GenLabels function is only to be used during the transition to the official API.
@ -88,6 +87,14 @@ func SetFileLabel(path string, fileLabel string) error {
 	return nil
 }

+// Tell the kernel the label for all files to be created
+func SetFileCreateLabel(fileLabel string) error {
+	if selinux.SelinuxEnabled() {
+		return selinux.Setfscreatecon(fileLabel)
+	}
+	return nil
+}
+
 // Change the label of path to the filelabel string.  If the relabel string
 // is "z", relabel will change the MCS label to s0.  This will allow all
 // containers to share the content.  If the relabel string is a "Z" then
@ -130,3 +137,15 @@ func UnreserveLabel(label string) error {
 	selinux.FreeLxcContexts(label)
 	return nil
 }
+
+// DupSecOpt takes an process label and returns security options that
+// can be used to set duplicate labels on future container processes
+func DupSecOpt(src string) []string {
+	return selinux.DupSecOpt(src)
+}
+
+// DisableSecOpt returns a security opt that can disable labeling
+// support for future container processes
+func DisableSecOpt() []string {
+	return selinux.DisableSecOpt()
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/label/label_selinux_test.go
@ -3,6 +3,7 @@
 package label

 import (
+	"strings"
 	"testing"

 	"github.com/docker/libcontainer/selinux"
@ -33,7 +34,7 @@ func TestInit(t *testing.T) {
 			t.Fatal(err)
 		}
 		if plabel != "user_u:user_r:user_t:s0:c1,c15" || mlabel != "user_u:object_r:svirt_sandbox_file_t:s0:c1,c15" {
-			t.Log("InitLabels User Failed")
+			t.Log("InitLabels User Match Failed")
 			t.Log(plabel, mlabel)
 			t.Fatal(err)
 		}
@ -46,3 +47,43 @@ func TestInit(t *testing.T) {
 		}
 	}
 }
+func TestDuplicateLabel(t *testing.T) {
+	secopt := DupSecOpt("system_u:system_r:svirt_lxc_net_t:s0:c1,c2")
+	t.Log(secopt)
+	for _, opt := range secopt {
+		con := strings.SplitN(opt, ":", 3)
+		if len(con) != 3 || con[0] != "label" {
+			t.Errorf("Invalid DupSecOpt return value")
+			continue
+		}
+		if con[1] == "user" {
+			if con[2] != "system_u" {
+				t.Errorf("DupSecOpt Failed user incorrect")
+			}
+			continue
+		}
+		if con[1] == "role" {
+			if con[2] != "system_r" {
+				t.Errorf("DupSecOpt Failed role incorrect")
+			}
+			continue
+		}
+		if con[1] == "type" {
+			if con[2] != "svirt_lxc_net_t" {
+				t.Errorf("DupSecOpt Failed type incorrect")
+			}
+			continue
+		}
+		if con[1] == "level" {
+			if con[2] != "s0:c1,c2" {
+				t.Errorf("DupSecOpt Failed level incorrect")
+			}
+			continue
+		}
+		t.Errorf("DupSecOpt Failed invalid field %q", con[1])
+	}
+	secopt = DisableSecOpt()
+	if secopt[0] != "label:disable" {
+		t.Errorf("DisableSecOpt Failed level incorrect")
+	}
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/mount/init.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/mount/init.go
@ -97,7 +97,7 @@ func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountCon
 	return nil
 }

-// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts
+// mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts
 // inside the mount namespace
 func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) error {
 	for _, m := range newSystemMounts(rootfs, mountConfig.MountLabel, sysReadonly) {
@ -168,6 +168,7 @@ func newSystemMounts(rootfs, mountLabel string, sysReadonly bool) []mount {
 		{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
 		{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)},
 		{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
+		{source: "mqueue", path: filepath.Join(rootfs, "dev", "mqueue"), device: "mqueue", flags: defaultMountFlags},
 		{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
 	}

--- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/exec.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/exec.go
@ -3,6 +3,7 @@
 package namespaces

 import (
+	"encoding/json"
 	"io"
 	"os"
 	"os/exec"
@ -13,7 +14,6 @@ import (
 	"github.com/docker/libcontainer/cgroups/fs"
 	"github.com/docker/libcontainer/cgroups/systemd"
 	"github.com/docker/libcontainer/network"
-	"github.com/docker/libcontainer/syncpipe"
 	"github.com/docker/libcontainer/system"
 )

@ -22,19 +22,17 @@ import (
 // Exec performs setup outside of a namespace so that a container can be
 // executed.  Exec is a high level function for working with container namespaces.
 func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {
-	var (
-		err error
-	)
+	var err error

 	// create a pipe so that we can syncronize with the namespaced process and
-	// pass the veth name to the child
-	syncPipe, err := syncpipe.NewSyncPipe()
+	// pass the state and configuration to the child process
+	parent, child, err := newInitPipe()
 	if err != nil {
 		return -1, err
 	}
-	defer syncPipe.Close()
+	defer parent.Close()

-	command := createCommand(container, console, dataPath, os.Args[0], syncPipe.Child(), args)
+	command := createCommand(container, console, dataPath, os.Args[0], child, args)
 	// Note: these are only used in non-tty mode
 	// if there is a tty for the container it will be opened within the namespace and the
 	// fds will be duped to stdin, stdiout, and stderr
@ -43,39 +41,42 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
 	command.Stderr = stderr

 	if err := command.Start(); err != nil {
+		child.Close()
 		return -1, err
 	}
+	child.Close()

-	// Now we passed the pipe to the child, close our side
-	syncPipe.CloseChild()
+	terminate := func(terr error) (int, error) {
+		// TODO: log the errors for kill and wait
+		command.Process.Kill()
+		command.Wait()
+		return -1, terr
+	}

 	started, err := system.GetProcessStartTime(command.Process.Pid)
 	if err != nil {
-		return -1, err
+		return terminate(err)
 	}

 	// Do this before syncing with child so that no children
 	// can escape the cgroup
-	cgroupRef, err := SetupCgroups(container, command.Process.Pid)
+	cgroupPaths, err := SetupCgroups(container, command.Process.Pid)
 	if err != nil {
-		command.Process.Kill()
-		command.Wait()
-		return -1, err
-	}
-	defer cgroupRef.Cleanup()
-
-	cgroupPaths, err := cgroupRef.Paths()
-	if err != nil {
-		command.Process.Kill()
-		command.Wait()
-		return -1, err
+		return terminate(err)
 	}
+	defer cgroups.RemovePaths(cgroupPaths)

 	var networkState network.NetworkState
-	if err := InitializeNetworking(container, command.Process.Pid, syncPipe, &networkState); err != nil {
-		command.Process.Kill()
-		command.Wait()
-		return -1, err
+	if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil {
+		return terminate(err)
+	}
+	// send the state to the container's init process then shutdown writes for the parent
+	if err := json.NewEncoder(parent).Encode(networkState); err != nil {
+		return terminate(err)
+	}
+	// shutdown writes for the parent side of the pipe
+	if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
+		return terminate(err)
 	}

 	state := &libcontainer.State{
@ -86,17 +87,18 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
 	}

 	if err := libcontainer.SaveState(dataPath, state); err != nil {
-		command.Process.Kill()
-		command.Wait()
-		return -1, err
+		return terminate(err)
 	}
 	defer libcontainer.DeleteState(dataPath)

-	// Sync with child
-	if err := syncPipe.ReadFromChild(); err != nil {
-		command.Process.Kill()
-		command.Wait()
-		return -1, err
+	// wait for the child process to fully complete and receive an error message
+	// if one was encoutered
+	var ierr *initError
+	if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF {
+		return terminate(err)
+	}
+	if ierr != nil {
+		return terminate(ierr)
 	}

 	if startCallback != nil {
@ -108,7 +110,6 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
 			return -1, err
 		}
 	}
-
 	return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
 }

@ -129,16 +130,6 @@ func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, ini
 		"data_path=" + dataPath,
 	}

-	/*
-	   TODO: move user and wd into env
-	   if user != "" {
-	       env = append(env, "user="+user)
-	   }
-	   if workingDir != "" {
-	       env = append(env, "wd="+workingDir)
-	   }
-	*/
-
 	command := exec.Command(init, append([]string{"init", "--"}, args...)...)
 	// make sure the process is executed inside the context of the rootfs
 	command.Dir = container.RootFs
@ -157,23 +148,20 @@ func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, ini

 // SetupCgroups applies the cgroup restrictions to the process running in the container based
 // on the container's configuration
-func SetupCgroups(container *libcontainer.Config, nspid int) (cgroups.ActiveCgroup, error) {
+func SetupCgroups(container *libcontainer.Config, nspid int) (map[string]string, error) {
 	if container.Cgroups != nil {
 		c := container.Cgroups
-
 		if systemd.UseSystemd() {
 			return systemd.Apply(c, nspid)
 		}
-
 		return fs.Apply(c, nspid)
 	}
-
-	return nil, nil
+	return map[string]string{}, nil
 }

 // InitializeNetworking creates the container's network stack outside of the namespace and moves
 // interfaces into the container's net namespaces if necessary
-func InitializeNetworking(container *libcontainer.Config, nspid int, pipe *syncpipe.SyncPipe, networkState *network.NetworkState) error {
+func InitializeNetworking(container *libcontainer.Config, nspid int, networkState *network.NetworkState) error {
 	for _, config := range container.Networks {
 		strategy, err := network.GetStrategy(config.Type)
 		if err != nil {
@ -183,18 +171,5 @@ func InitializeNetworking(container *libcontainer.Config, nspid int, pipe *syncp
 			return err
 		}
 	}
-	return pipe.SendToChild(networkState)
-}
-
-// GetNamespaceFlags parses the container's Namespaces options to set the correct
-// flags on clone, unshare, and setns
-func GetNamespaceFlags(namespaces map[string]bool) (flag int) {
-	for key, enabled := range namespaces {
-		if enabled {
-			if ns := GetNamespace(key); ns != nil {
-				flag |= ns.Value
-			}
-		}
-	}
-	return flag
+	return nil
 }
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/execin.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/execin.go
@ -3,6 +3,7 @@
 package namespaces

 import (
+	"encoding/json"
 	"fmt"
 	"io"
 	"os"
@ -15,7 +16,6 @@ import (
 	"github.com/docker/libcontainer/apparmor"
 	"github.com/docker/libcontainer/cgroups"
 	"github.com/docker/libcontainer/label"
-	"github.com/docker/libcontainer/syncpipe"
 	"github.com/docker/libcontainer/system"
 )

@ -41,11 +41,11 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs
 		}
 	}

-	pipe, err := syncpipe.NewSyncPipe()
+	parent, child, err := newInitPipe()
 	if err != nil {
 		return -1, err
 	}
-	defer pipe.Close()
+	defer parent.Close()

 	// Note: these are only used in non-tty mode
 	// if there is a tty for the container it will be opened within the namespace and the
@ -53,23 +53,28 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs
 	cmd.Stdin = stdin
 	cmd.Stdout = stdout
 	cmd.Stderr = stderr
-
-	cmd.ExtraFiles = []*os.File{pipe.Child()}
+	cmd.ExtraFiles = []*os.File{child}

 	if err := cmd.Start(); err != nil {
+		child.Close()
 		return -1, err
 	}
-	pipe.CloseChild()
+	child.Close()
+
+	terminate := func(terr error) (int, error) {
+		// TODO: log the errors for kill and wait
+		cmd.Process.Kill()
+		cmd.Wait()
+		return -1, terr
+	}

 	// Enter cgroups.
 	if err := EnterCgroups(state, cmd.Process.Pid); err != nil {
-		return -1, err
+		return terminate(err)
 	}

-	if err := pipe.SendToChild(container); err != nil {
-		cmd.Process.Kill()
-		cmd.Wait()
-		return -1, err
+	if err := json.NewEncoder(parent).Encode(container); err != nil {
+		return terminate(err)
 	}

 	if startCallback != nil {
@ -81,7 +86,6 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs
 			return -1, err
 		}
 	}
-
 	return cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
 }

@ -107,7 +111,7 @@ func FinalizeSetns(container *libcontainer.Config, args []string) error {
 		}
 	}

-	if err := system.Execv(args[0], args[0:], container.Env); err != nil {
+	if err := system.Execv(args[0], args[0:], os.Environ()); err != nil {
 		return err
 	}

--- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/init.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/init.go
@ -3,7 +3,9 @@
 package namespaces

 import (
+	"encoding/json"
 	"fmt"
+	"io/ioutil"
 	"os"
 	"strings"
 	"syscall"
@ -17,7 +19,6 @@ import (
 	"github.com/docker/libcontainer/network"
 	"github.com/docker/libcontainer/security/capabilities"
 	"github.com/docker/libcontainer/security/restrict"
-	"github.com/docker/libcontainer/syncpipe"
 	"github.com/docker/libcontainer/system"
 	"github.com/docker/libcontainer/user"
 	"github.com/docker/libcontainer/utils"
@ -29,11 +30,22 @@ import (
 // and other options required for the new container.
 // The caller of Init function has to ensure that the go runtime is locked to an OS thread
 // (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended.
-func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syncPipe *syncpipe.SyncPipe, args []string) (err error) {
+func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) {
 	defer func() {
+		// if we have an error during the initialization of the container's init then send it back to the
+		// parent process in the form of an initError.
 		if err != nil {
-			syncPipe.ReportChildError(err)
+			// ensure that any data sent from the parent is consumed so it doesn't
+			// receive ECONNRESET when the child writes to the pipe.
+			ioutil.ReadAll(pipe)
+			if err := json.NewEncoder(pipe).Encode(initError{
+				Message: err.Error(),
+			}); err != nil {
+				panic(err)
+			}
 		}
+		// ensure that this pipe is always closed
+		pipe.Close()
 	}()

 	rootfs, err := utils.ResolveRootfs(uncleanRootfs)
@ -49,10 +61,13 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn

 	// We always read this as it is a way to sync with the parent as well
 	var networkState *network.NetworkState
-	if err := syncPipe.ReadFromParent(&networkState); err != nil {
+	if err := json.NewDecoder(pipe).Decode(&networkState); err != nil {
+		return err
+	}
+	// join any namespaces via a path to the namespace fd if provided
+	if err := joinExistingNamespaces(container.Namespaces); err != nil {
 		return err
 	}
-
 	if consolePath != "" {
 		if err := console.OpenAndDup(consolePath); err != nil {
 			return err
@ -66,6 +81,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
 			return fmt.Errorf("setctty %s", err)
 		}
 	}
+
 	if err := setupNetwork(container, networkState); err != nil {
 		return fmt.Errorf("setup networking %s", err)
 	}
@ -73,6 +89,10 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
 		return fmt.Errorf("setup route %s", err)
 	}

+	if err := setupRlimits(container); err != nil {
+		return fmt.Errorf("setup rlimits %s", err)
+	}
+
 	label.Init()

 	if err := mount.InitializeMountNamespace(rootfs,
@ -84,7 +104,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn

 	if container.Hostname != "" {
 		if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
-			return fmt.Errorf("sethostname %s", err)
+			return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
 		}
 	}

@ -151,26 +171,43 @@ func RestoreParentDeathSignal(old int) error {

 // SetupUser changes the groups, gid, and uid for the user inside the container
 func SetupUser(u string) error {
-	uid, gid, suppGids, home, err := user.GetUserGroupSupplementaryHome(u, syscall.Getuid(), syscall.Getgid(), "/")
+	// Set up defaults.
+	defaultExecUser := user.ExecUser{
+		Uid:  syscall.Getuid(),
+		Gid:  syscall.Getgid(),
+		Home: "/",
+	}
+
+	passwdFile, err := user.GetPasswdFile()
+	if err != nil {
+		return err
+	}
+
+	groupFile, err := user.GetGroupFile()
+	if err != nil {
+		return err
+	}
+
+	execUser, err := user.GetExecUserFile(u, &defaultExecUser, passwdFile, groupFile)
 	if err != nil {
 		return fmt.Errorf("get supplementary groups %s", err)
 	}

-	if err := syscall.Setgroups(suppGids); err != nil {
+	if err := syscall.Setgroups(execUser.Sgids); err != nil {
 		return fmt.Errorf("setgroups %s", err)
 	}

-	if err := syscall.Setgid(gid); err != nil {
+	if err := system.Setgid(execUser.Gid); err != nil {
 		return fmt.Errorf("setgid %s", err)
 	}

-	if err := syscall.Setuid(uid); err != nil {
+	if err := system.Setuid(execUser.Uid); err != nil {
 		return fmt.Errorf("setuid %s", err)
 	}

 	// if we didn't get HOME already, set it based on the user's HOME
 	if envHome := os.Getenv("HOME"); envHome == "" {
-		if err := os.Setenv("HOME", home); err != nil {
+		if err := os.Setenv("HOME", execUser.Home); err != nil {
 			return fmt.Errorf("set HOME %s", err)
 		}
 	}
@ -205,6 +242,16 @@ func setupRoute(container *libcontainer.Config) error {
 	return nil
 }

+func setupRlimits(container *libcontainer.Config) error {
+	for _, rlimit := range container.Rlimits {
+		l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
+		if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
+			return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
+		}
+	}
+	return nil
+}
+
 // FinalizeNamespace drops the caps, sets the correct user
 // and working dir, and closes any leaky file descriptors
 // before execing the command inside the namespace
@ -261,3 +308,22 @@ func LoadContainerEnvironment(container *libcontainer.Config) error {
 	}
 	return nil
 }
+
+// joinExistingNamespaces gets all the namespace paths specified for the container and
+// does a setns on the namespace fd so that the current process joins the namespace.
+func joinExistingNamespaces(namespaces []libcontainer.Namespace) error {
+	for _, ns := range namespaces {
+		if ns.Path != "" {
+			f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
+			if err != nil {
+				return err
+			}
+			err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Name]))
+			f.Close()
+			if err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/nsenter/nsenter.c
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/nsenter/nsenter.c
@ -10,6 +10,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/prctl.h>
 #include <sys/types.h>
 #include <unistd.h>
 #include <getopt.h>
@ -88,6 +89,11 @@ void nsenter()
 		return;
 	}

+	if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == -1) {
+                fprintf(stderr, "nsenter: failed to set child subreaper: %s", strerror(errno));
+                exit(1);
+        }
+
 	static const struct option longopts[] = {
 		{"nspid", required_argument, NULL, 'n'},
 		{"console", required_argument, NULL, 't'},
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types.go
@ -1,50 +0,0 @@
-package namespaces
-
-import "errors"
-
-type (
-	Namespace struct {
-		Key   string `json:"key,omitempty"`
-		Value int    `json:"value,omitempty"`
-		File  string `json:"file,omitempty"`
-	}
-	Namespaces []*Namespace
-)
-
-// namespaceList is used to convert the libcontainer types
-// into the names of the files located in /proc/<pid>/ns/* for
-// each namespace
-var (
-	namespaceList      = Namespaces{}
-	ErrUnkownNamespace = errors.New("Unknown namespace")
-	ErrUnsupported     = errors.New("Unsupported method")
-)
-
-func (ns *Namespace) String() string {
-	return ns.Key
-}
-
-func GetNamespace(key string) *Namespace {
-	for _, ns := range namespaceList {
-		if ns.Key == key {
-			cpy := *ns
-			return &cpy
-		}
-	}
-	return nil
-}
-
-// Contains returns true if the specified Namespace is
-// in the slice
-func (n Namespaces) Contains(ns string) bool {
-	return n.Get(ns) != nil
-}
-
-func (n Namespaces) Get(ns string) *Namespace {
-	for _, nsp := range n {
-		if nsp != nil && nsp.Key == ns {
-			return nsp
-		}
-	}
-	return nil
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types_linux.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types_linux.go
@ -1,16 +0,0 @@
-package namespaces
-
-import (
-	"syscall"
-)
-
-func init() {
-	namespaceList = Namespaces{
-		{Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"},
-		{Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"},
-		{Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"},
-		{Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"},
-		{Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"},
-		{Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"},
-	}
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/types_test.go
@ -1,30 +0,0 @@
-package namespaces
-
-import (
-	"testing"
-)
-
-func TestNamespacesContains(t *testing.T) {
-	ns := Namespaces{
-		GetNamespace("NEWPID"),
-		GetNamespace("NEWNS"),
-		GetNamespace("NEWUTS"),
-	}
-
-	if ns.Contains("NEWNET") {
-		t.Fatal("namespaces should not contain NEWNET")
-	}
-
-	if !ns.Contains("NEWPID") {
-		t.Fatal("namespaces should contain NEWPID but does not")
-	}
-
-	withNil := Namespaces{
-		GetNamespace("UNDEFINED"), // this element will be nil
-		GetNamespace("NEWPID"),
-	}
-
-	if !withNil.Contains("NEWPID") {
-		t.Fatal("namespaces should contain NEWPID but does not")
-	}
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/utils.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/namespaces/utils.go
@ -0,0 +1,45 @@
+// +build linux
+
+package namespaces
+
+import (
+	"os"
+	"syscall"
+
+	"github.com/docker/libcontainer"
+)
+
+type initError struct {
+	Message string `json:"message,omitempty"`
+}
+
+func (i initError) Error() string {
+	return i.Message
+}
+
+var namespaceInfo = map[string]int{
+	"NEWNET":  syscall.CLONE_NEWNET,
+	"NEWNS":   syscall.CLONE_NEWNS,
+	"NEWUSER": syscall.CLONE_NEWUSER,
+	"NEWIPC":  syscall.CLONE_NEWIPC,
+	"NEWUTS":  syscall.CLONE_NEWUTS,
+	"NEWPID":  syscall.CLONE_NEWPID,
+}
+
+// New returns a newly initialized Pipe for communication between processes
+func newInitPipe() (parent *os.File, child *os.File, err error) {
+	fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
+	if err != nil {
+		return nil, nil, err
+	}
+	return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
+}
+
+// GetNamespaceFlags parses the container's Namespaces options to set the correct
+// flags on clone, unshare, and setns
+func GetNamespaceFlags(namespaces []libcontainer.Namespace) (flag int) {
+	for _, v := range namespaces {
+		flag |= namespaceInfo[v.Name]
+	}
+	return flag
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux.go
@ -7,6 +7,7 @@ import (
 	"math/rand"
 	"net"
 	"os"
+	"path/filepath"
 	"sync/atomic"
 	"syscall"
 	"unsafe"
@ -575,6 +576,31 @@ func NetworkSetMTU(iface *net.Interface, mtu int) error {
 	return s.HandleAck(wb.Seq)
 }

+// Set link queue length
+// This is identical to running: ip link set dev $name txqueuelen $QLEN
+func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
+	s, err := getNetlinkSocket()
+	if err != nil {
+		return err
+	}
+	defer s.Close()
+
+	wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
+
+	msg := newIfInfomsg(syscall.AF_UNSPEC)
+	msg.Type = syscall.RTM_SETLINK
+	msg.Flags = syscall.NLM_F_REQUEST
+	msg.Index = int32(iface.Index)
+	msg.Change = DEFAULT_CHANGE
+	wb.AddData(msg)
+	wb.AddData(uint32Attr(syscall.IFLA_TXQLEN, uint32(txQueueLen)))
+
+	if err := s.Send(wb); err != nil {
+		return err
+	}
+	return s.HandleAck(wb.Seq)
+}
+
 func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error {
 	s, err := getNetlinkSocket()
 	if err != nil {
@ -768,26 +794,38 @@ func NetworkLinkAddVlan(masterDev, vlanDev string, vlanId uint16) error {
 	return s.HandleAck(wb.Seq)
 }

-// Add MAC VLAN network interface with masterDev as its upper device
-// This is identical to running:
-// ip link add name $name link $masterdev type macvlan mode $mode
-func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
-	s, err := getNetlinkSocket()
-	if err != nil {
-		return err
-	}
-	defer s.Close()
+// MacVlan link has LowerDev, UpperDev and operates in Mode mode
+// This simplifies the code when creating MacVlan or MacVtap interface
+type MacVlanLink struct {
+	MasterDev string
+	SlaveDev  string
+	mode      string
+}

-	macVlan := map[string]uint32{
+func (m MacVlanLink) Mode() uint32 {
+	modeMap := map[string]uint32{
 		"private":  MACVLAN_MODE_PRIVATE,
 		"vepa":     MACVLAN_MODE_VEPA,
 		"bridge":   MACVLAN_MODE_BRIDGE,
 		"passthru": MACVLAN_MODE_PASSTHRU,
 	}

+	return modeMap[m.mode]
+}
+
+// Add MAC VLAN network interface with masterDev as its upper device
+// This is identical to running:
+// ip link add name $name link $masterdev type macvlan mode $mode
+func networkLinkMacVlan(dev_type string, mcvln *MacVlanLink) error {
+	s, err := getNetlinkSocket()
+	if err != nil {
+		return err
+	}
+	defer s.Close()
+
 	wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)

-	masterDevIfc, err := net.InterfaceByName(masterDev)
+	masterDevIfc, err := net.InterfaceByName(mcvln.MasterDev)
 	if err != nil {
 		return err
 	}
@ -796,16 +834,16 @@ func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
 	wb.AddData(msg)

 	nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
-	newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("macvlan"))
+	newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated(dev_type))

 	nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
 	macVlanData := make([]byte, 4)
-	native.PutUint32(macVlanData, macVlan[mode])
+	native.PutUint32(macVlanData, mcvln.Mode())
 	newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData)
 	wb.AddData(nest1)

 	wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
-	wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(macVlanDev)))
+	wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(mcvln.SlaveDev)))

 	if err := s.Send(wb); err != nil {
 		return err
@ -813,6 +851,22 @@ func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
 	return s.HandleAck(wb.Seq)
 }

+func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
+	return networkLinkMacVlan("macvlan", &MacVlanLink{
+		MasterDev: masterDev,
+		SlaveDev:  macVlanDev,
+		mode:      mode,
+	})
+}
+
+func NetworkLinkAddMacVtap(masterDev, macVlanDev string, mode string) error {
+	return networkLinkMacVlan("macvtap", &MacVlanLink{
+		MasterDev: masterDev,
+		SlaveDev:  macVlanDev,
+		mode:      mode,
+	})
+}
+
 func networkLinkIpAction(action, flags int, ifa IfAddr) error {
 	s, err := getNetlinkSocket()
 	if err != nil {
@ -1002,28 +1056,23 @@ func AddRoute(destination, source, gateway, device string) error {
 	}

 	if source != "" {
-		srcIP, srcNet, err := net.ParseCIDR(source)
-		if err != nil {
-			return fmt.Errorf("source CIDR %s couldn't be parsed", source)
+		srcIP := net.ParseIP(source)
+		if srcIP == nil {
+			return fmt.Errorf("source IP %s couldn't be parsed", source)
 		}
 		srcFamily := getIpFamily(srcIP)
 		if currentFamily != -1 && currentFamily != srcFamily {
 			return fmt.Errorf("source and destination ip were not the same IP family")
 		}
 		currentFamily = srcFamily
-		srcLen, bits := srcNet.Mask.Size()
-		if srcLen == 0 && bits == 0 {
-			return fmt.Errorf("source CIDR %s generated a non-canonical Mask", source)
-		}
 		msg.Family = uint8(srcFamily)
-		msg.Src_len = uint8(srcLen)
 		var srcData []byte
 		if srcFamily == syscall.AF_INET {
 			srcData = srcIP.To4()
 		} else {
 			srcData = srcIP.To16()
 		}
-		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_SRC, srcData))
+		rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_PREFSRC, srcData))
 	}

 	if gateway != "" {
@ -1204,6 +1253,28 @@ func SetMacAddress(name, addr string) error {
 	return nil
 }

+func SetHairpinMode(iface *net.Interface, enabled bool) error {
+	sysPath := filepath.Join("/sys/class/net", iface.Name, "brport/hairpin_mode")
+
+	sysFile, err := os.OpenFile(sysPath, os.O_WRONLY, 0)
+	if err != nil {
+		return err
+	}
+	defer sysFile.Close()
+
+	var writeVal []byte
+	if enabled {
+		writeVal = []byte("1")
+	} else {
+		writeVal = []byte("0")
+	}
+	if _, err := sysFile.Write(writeVal); err != nil {
+		return err
+	}
+
+	return nil
+}
+
 func ChangeName(iface *net.Interface, newName string) error {
 	if len(newName) >= IFNAMSIZ {
 		return fmt.Errorf("Interface name %s too long", newName)
@ -1224,5 +1295,6 @@ func ChangeName(iface *net.Interface, newName string) error {
 	if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 {
 		return errno
 	}
+
 	return nil
 }
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_linux_test.go
@ -116,7 +116,7 @@ func TestNetworkSetMacAddress(t *testing.T) {
 	ifcBeforeSet := readLink(t, tl.name)

 	if err := NetworkSetMacAddress(ifcBeforeSet, macaddr); err != nil {
-		t.Fatalf("Could not set %s MAC address on %#v interface: err", macaddr, tl, err)
+		t.Fatalf("Could not set %s MAC address on %#v interface: %s", macaddr, tl, err)
 	}

 	ifcAfterSet := readLink(t, tl.name)
@ -140,7 +140,7 @@ func TestNetworkSetMTU(t *testing.T) {
 	ifcBeforeSet := readLink(t, tl.name)

 	if err := NetworkSetMTU(ifcBeforeSet, mtu); err != nil {
-		t.Fatalf("Could not set %d MTU on %#v interface: err", mtu, tl, err)
+		t.Fatalf("Could not set %d MTU on %#v interface: %s", mtu, tl, err)
 	}

 	ifcAfterSet := readLink(t, tl.name)
@ -248,6 +248,30 @@ func TestNetworkLinkAddMacVlan(t *testing.T) {
 	readLink(t, tl.name)
 }

+func TestNetworkLinkAddMacVtap(t *testing.T) {
+	if testing.Short() {
+		return
+	}
+
+	tl := struct {
+		name string
+		mode string
+	}{
+		name: "tstVtap",
+		mode: "private",
+	}
+	masterLink := testLink{"tstEth", "dummy"}
+
+	addLink(t, masterLink.name, masterLink.linkType)
+	defer deleteLink(t, masterLink.name)
+
+	if err := NetworkLinkAddMacVtap(masterLink.name, tl.name, tl.mode); err != nil {
+		t.Fatalf("Unable to create %#v MAC VTAP interface: %s", tl, err)
+	}
+
+	readLink(t, tl.name)
+}
+
 func TestAddDelNetworkIp(t *testing.T) {
 	if testing.Short() {
 		return
@ -280,6 +304,34 @@ func TestAddDelNetworkIp(t *testing.T) {
 	}
 }

+func TestAddRouteSourceSelection(t *testing.T) {
+	tstIp := "127.1.1.1"
+	tl := testLink{name: "tstEth", linkType: "dummy"}
+
+	addLink(t, tl.name, tl.linkType)
+	defer deleteLink(t, tl.name)
+
+	ip := net.ParseIP(tstIp)
+	mask := net.IPv4Mask(255, 255, 255, 255)
+	ipNet := &net.IPNet{IP: ip, Mask: mask}
+
+	iface, err := net.InterfaceByName(tl.name)
+	if err != nil {
+		t.Fatalf("Lost created link %#v", tl)
+	}
+
+	if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil {
+		t.Fatalf("Could not add IP address %s to interface %#v: %s", ip.String(), iface, err)
+	}
+
+	upLink(t, tl.name)
+	defer downLink(t, tl.name)
+
+	if err := AddRoute("127.0.0.0/8", tstIp, "", tl.name); err != nil {
+		t.Fatalf("Failed to add route with source address")
+	}
+}
+
 func TestCreateVethPair(t *testing.T) {
 	if testing.Short() {
 		return
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_unsupported.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/netlink/netlink_unsupported.go
@ -47,6 +47,10 @@ func NetworkSetMTU(iface *net.Interface, mtu int) error {
 	return ErrNotImplemented
 }

+func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
+	return ErrNotImplemented
+}
+
 func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error {
 	return ErrNotImplemented
 }
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/network/netns.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/network/netns.go
@ -1,39 +0,0 @@
-// +build linux
-
-package network
-
-import (
-	"fmt"
-	"os"
-	"syscall"
-
-	"github.com/docker/libcontainer/system"
-)
-
-//  crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace
-type NetNS struct {
-}
-
-func (v *NetNS) Create(n *Network, nspid int, networkState *NetworkState) error {
-	networkState.NsPath = n.NsPath
-	return nil
-}
-
-func (v *NetNS) Initialize(config *Network, networkState *NetworkState) error {
-	if networkState.NsPath == "" {
-		return fmt.Errorf("nspath does is not specified in NetworkState")
-	}
-
-	f, err := os.OpenFile(networkState.NsPath, os.O_RDONLY, 0)
-	if err != nil {
-		return fmt.Errorf("failed get network namespace fd: %v", err)
-	}
-
-	if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil {
-		f.Close()
-		return fmt.Errorf("failed to setns current network namespace: %v", err)
-	}
-
-	f.Close()
-	return nil
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/network/network.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/network/network.go
@ -95,3 +95,11 @@ func SetMtu(name string, mtu int) error {
 	}
 	return netlink.NetworkSetMTU(iface, mtu)
 }
+
+func SetHairpinMode(name string, enabled bool) error {
+	iface, err := net.InterfaceByName(name)
+	if err != nil {
+		return err
+	}
+	return netlink.SetHairpinMode(iface, enabled)
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/network/strategy.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/network/strategy.go
@ -13,7 +13,6 @@ var (
 var strategies = map[string]NetworkStrategy{
 	"veth":     &Veth{},
 	"loopback": &Loopback{},
-	"netns":    &NetNS{},
 }

 // NetworkStrategy represents a specific network configuration for
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/network/types.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/network/types.go
@ -8,9 +8,6 @@ type Network struct {
 	// Type sets the networks type, commonly veth and loopback
 	Type string `json:"type,omitempty"`

-	// Path to network namespace
-	NsPath string `json:"ns_path,omitempty"`
-
 	// The bridge to use.
 	Bridge string `json:"bridge,omitempty"`

@ -50,6 +47,4 @@ type NetworkState struct {
 	VethHost string `json:"veth_host,omitempty"`
 	// The name of the veth interface created inside the container for the child.
 	VethChild string `json:"veth_child,omitempty"`
-	// Net namespace path.
-	NsPath string `json:"ns_path,omitempty"`
 }
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/init.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/init.go
@ -8,7 +8,6 @@ import (

 	"github.com/codegangsta/cli"
 	"github.com/docker/libcontainer/namespaces"
-	"github.com/docker/libcontainer/syncpipe"
 )

 var (
@ -41,12 +40,8 @@ func initAction(context *cli.Context) {
 		log.Fatal(err)
 	}

-	syncPipe, err := syncpipe.NewSyncPipeFromFd(0, uintptr(pipeFd))
-	if err != nil {
-		log.Fatalf("unable to create sync pipe: %s", err)
-	}
-
-	if err := namespaces.Init(container, rootfs, console, syncPipe, []string(context.Args())); err != nil {
+	pipe := os.NewFile(uintptr(pipeFd), "pipe")
+	if err := namespaces.Init(container, rootfs, console, pipe, []string(context.Args())); err != nil {
 		log.Fatalf("unable to initialize for container: %s", err)
 	}
 }
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/utils.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/nsinit/utils.go
@ -8,7 +8,6 @@ import (

 	"github.com/codegangsta/cli"
 	"github.com/docker/libcontainer"
-	"github.com/docker/libcontainer/syncpipe"
 )

 // rFunc is a function registration for calling after an execin
@ -59,16 +58,13 @@ func findUserArgs() []string {
 // loadConfigFromFd loads a container's config from the sync pipe that is provided by
 // fd 3 when running a process
 func loadConfigFromFd() (*libcontainer.Config, error) {
-	syncPipe, err := syncpipe.NewSyncPipeFromFd(0, 3)
-	if err != nil {
-		return nil, err
-	}
+	pipe := os.NewFile(3, "pipe")
+	defer pipe.Close()

 	var config *libcontainer.Config
-	if err := syncPipe.ReadFromParent(&config); err != nil {
+	if err := json.NewDecoder(pipe).Decode(&config); err != nil {
 		return nil, err
 	}
-
 	return config, nil
 }

--- a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/apparmor.json
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/apparmor.json
@ -176,13 +176,13 @@
        "TERM=xterm"
    ],
    "hostname": "koye",
-    "namespaces": {
-        "NEWIPC": true,
-        "NEWNET": true,
-        "NEWNS": true,
-        "NEWPID": true,
-        "NEWUTS": true
-    },
+    "namespaces": [
+        {"name":"NEWIPC"},
+        {"name": "NEWNET"},
+        {"name": "NEWNS"},
+        {"name": "NEWPID"},
+        {"name": "NEWUTS"}
+    ],
    "networks": [
        {
            "address": "127.0.0.1/0",
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/attach_to_bridge.json
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/attach_to_bridge.json
@ -175,13 +175,13 @@
        "TERM=xterm"
    ],
    "hostname": "koye",
-    "namespaces": {
-        "NEWIPC": true,
-        "NEWNET": true,
-        "NEWNS": true,
-        "NEWPID": true,
-        "NEWUTS": true
-    },
+    "namespaces": [
+        {"name": "NEWIPC"},
+        {"name": "NEWNET"},
+        {"name": "NEWNS"},
+        {"name": "NEWPID"},
+        {"name": "NEWUTS"}
+    ],
    "networks": [
        {
            "address": "127.0.0.1/0",
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/minimal.json
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/minimal.json
@ -181,13 +181,13 @@
        "TERM=xterm"
    ],
    "hostname": "koye",
-    "namespaces": {
-        "NEWIPC": true,
-        "NEWNET": true,
-        "NEWNS": true,
-        "NEWPID": true,
-        "NEWUTS": true
-    },
+    "namespaces": [
+        {"name": "NEWIPC"},
+        {"name": "NEWNET"},
+        {"name": "NEWNS"},
+        {"name": "NEWPID"},
+        {"name": "NEWUTS"}
+    ],
    "networks": [
        {
            "address": "127.0.0.1/0",
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/route_source_address_selection.json
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/route_source_address_selection.json
@ -0,0 +1,209 @@
+{
+    "capabilities": [
+        "CHOWN",
+        "DAC_OVERRIDE",
+        "FOWNER",
+        "MKNOD",
+        "NET_RAW",
+        "SETGID",
+        "SETUID",
+        "SETFCAP",
+        "SETPCAP",
+        "NET_BIND_SERVICE",
+        "SYS_CHROOT",
+        "KILL"
+    ],
+    "cgroups": {
+        "allowed_devices": [
+            {
+                "cgroup_permissions": "m",
+                "major_number": -1,
+                "minor_number": -1,
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "m",
+                "major_number": -1,
+                "minor_number": -1,
+                "type": 98
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "major_number": 5,
+                "minor_number": 1,
+                "path": "/dev/console",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "major_number": 4,
+                "path": "/dev/tty0",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "major_number": 4,
+                "minor_number": 1,
+                "path": "/dev/tty1",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "major_number": 136,
+                "minor_number": -1,
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "major_number": 5,
+                "minor_number": 2,
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "major_number": 10,
+                "minor_number": 200,
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 1,
+                "minor_number": 3,
+                "path": "/dev/null",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 1,
+                "minor_number": 5,
+                "path": "/dev/zero",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 1,
+                "minor_number": 7,
+                "path": "/dev/full",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 5,
+                "path": "/dev/tty",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 1,
+                "minor_number": 9,
+                "path": "/dev/urandom",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 1,
+                "minor_number": 8,
+                "path": "/dev/random",
+                "type": 99
+            }
+        ],
+        "name": "docker-koye",
+        "parent": "docker"
+    },
+    "restrict_sys": true,
+    "mount_config": {
+        "device_nodes": [
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 1,
+                "minor_number": 3,
+                "path": "/dev/null",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 1,
+                "minor_number": 5,
+                "path": "/dev/zero",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 1,
+                "minor_number": 7,
+                "path": "/dev/full",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 5,
+                "path": "/dev/tty",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 1,
+                "minor_number": 9,
+                "path": "/dev/urandom",
+                "type": 99
+            },
+            {
+                "cgroup_permissions": "rwm",
+                "file_mode": 438,
+                "major_number": 1,
+                "minor_number": 8,
+                "path": "/dev/random",
+                "type": 99
+            }
+        ]
+    },
+    "environment": [
+        "HOME=/",
+        "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+        "HOSTNAME=koye",
+        "TERM=xterm"
+    ],
+    "hostname": "koye",
+    "namespaces": [
+        {"name": "NEWIPC"},
+        {"name": "NEWNET"},
+        {"name": "NEWNS"},
+        {"name": "NEWPID"},
+        {"name": "NEWUTS"}
+    ],
+    "networks": [
+        {
+            "address": "127.0.0.1/0",
+            "gateway": "localhost",
+            "mtu": 1500,
+            "type": "loopback"
+        },
+        {
+            "address": "172.17.0.101/16",
+            "bridge": "docker0",
+            "veth_prefix": "veth",
+            "mtu": 1500,
+            "type": "veth"
+        }
+    ],
+    "routes": [
+        {
+            "destination": "0.0.0.0/0",
+            "source": "172.17.0.101",
+            "gateway": "172.17.42.1",
+            "interface_name": "eth0"
+        }
+    ],
+    "tty": true
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/selinux.json
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/sample_configs/selinux.json
@ -177,13 +177,13 @@
        "TERM=xterm"
    ],
    "hostname": "koye",
-    "namespaces": {
-        "NEWIPC": true,
-        "NEWNET": true,
-        "NEWNS": true,
-        "NEWPID": true,
-        "NEWUTS": true
-    },
+    "namespaces": [
+        {"name": "NEWIPC"},
+        {"name": "NEWNET"},
+        {"name": "NEWNS"},
+        {"name": "NEWPID"},
+        {"name": "NEWUTS"}
+    ],
    "networks": [
        {
            "address": "127.0.0.1/0",
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux.go
@ -434,3 +434,28 @@ func Chcon(fpath string, scon string, recurse bool) error {

 	return Setfilecon(fpath, scon)
 }
+
+// DupSecOpt takes an SELinux process label and returns security options that
+// can will set the SELinux Type and Level for future container processes
+func DupSecOpt(src string) []string {
+	if src == "" {
+		return nil
+	}
+	con := NewContext(src)
+	if con["user"] == "" ||
+		con["role"] == "" ||
+		con["type"] == "" ||
+		con["level"] == "" {
+		return nil
+	}
+	return []string{"label:user:" + con["user"],
+		"label:role:" + con["role"],
+		"label:type:" + con["type"],
+		"label:level:" + con["level"]}
+}
+
+// DisableSecOpt returns a security opt that can be used to disabling SELinux
+// labeling support for future container processes
+func DisableSecOpt() []string {
+	return []string{"label:disable"}
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/selinux/selinux_test.go
@ -42,7 +42,7 @@ func TestSELinux(t *testing.T) {
 		t.Log("getenforce ", selinux.SelinuxGetEnforce())
 		t.Log("getenforcemode ", selinux.SelinuxGetEnforceMode())
 		pid := os.Getpid()
-		t.Log("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023))
+		t.Logf("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023))
 		err = selinux.Setfscreatecon("unconfined_u:unconfined_r:unconfined_t:s0")
 		if err == nil {
 			t.Log(selinux.Getfscreatecon())
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe.go
@ -1,105 +0,0 @@
-package syncpipe
-
-import (
-	"encoding/json"
-	"fmt"
-	"io/ioutil"
-	"os"
-	"syscall"
-)
-
-// SyncPipe allows communication to and from the child processes
-// to it's parent and allows the two independent processes to
-// syncronize their state.
-type SyncPipe struct {
-	parent, child *os.File
-}
-
-func NewSyncPipeFromFd(parentFd, childFd uintptr) (*SyncPipe, error) {
-	s := &SyncPipe{}
-
-	if parentFd > 0 {
-		s.parent = os.NewFile(parentFd, "parentPipe")
-	} else if childFd > 0 {
-		s.child = os.NewFile(childFd, "childPipe")
-	} else {
-		return nil, fmt.Errorf("no valid sync pipe fd specified")
-	}
-
-	return s, nil
-}
-
-func (s *SyncPipe) Child() *os.File {
-	return s.child
-}
-
-func (s *SyncPipe) Parent() *os.File {
-	return s.parent
-}
-
-func (s *SyncPipe) SendToChild(v interface{}) error {
-	data, err := json.Marshal(v)
-	if err != nil {
-		return err
-	}
-
-	s.parent.Write(data)
-
-	return syscall.Shutdown(int(s.parent.Fd()), syscall.SHUT_WR)
-}
-
-func (s *SyncPipe) ReadFromChild() error {
-	data, err := ioutil.ReadAll(s.parent)
-	if err != nil {
-		return err
-	}
-
-	if len(data) > 0 {
-		return fmt.Errorf("%s", data)
-	}
-
-	return nil
-}
-
-func (s *SyncPipe) ReadFromParent(v interface{}) error {
-	data, err := ioutil.ReadAll(s.child)
-	if err != nil {
-		return fmt.Errorf("error reading from sync pipe %s", err)
-	}
-
-	if len(data) > 0 {
-		if err := json.Unmarshal(data, v); err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func (s *SyncPipe) ReportChildError(err error) {
-	// ensure that any data sent from the parent is consumed so it doesn't
-	// receive ECONNRESET when the child writes to the pipe.
-	ioutil.ReadAll(s.child)
-
-	s.child.Write([]byte(err.Error()))
-	s.CloseChild()
-}
-
-func (s *SyncPipe) Close() error {
-	if s.parent != nil {
-		s.parent.Close()
-	}
-
-	if s.child != nil {
-		s.child.Close()
-	}
-
-	return nil
-}
-
-func (s *SyncPipe) CloseChild() {
-	if s.child != nil {
-		s.child.Close()
-		s.child = nil
-	}
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe_linux.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe_linux.go
@ -1,20 +0,0 @@
-package syncpipe
-
-import (
-	"os"
-	"syscall"
-)
-
-func NewSyncPipe() (s *SyncPipe, err error) {
-	s = &SyncPipe{}
-
-	fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
-	if err != nil {
-		return nil, err
-	}
-
-	s.child = os.NewFile(uintptr(fds[0]), "child syncpipe")
-	s.parent = os.NewFile(uintptr(fds[1]), "parent syncpipe")
-
-	return s, nil
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/syncpipe/sync_pipe_test.go
@ -1,72 +0,0 @@
-package syncpipe
-
-import (
-	"fmt"
-	"syscall"
-	"testing"
-)
-
-type testStruct struct {
-	Name string
-}
-
-func TestSendErrorFromChild(t *testing.T) {
-	pipe, err := NewSyncPipe()
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer func() {
-		if err := pipe.Close(); err != nil {
-			t.Fatal(err)
-		}
-	}()
-
-	childfd, err := syscall.Dup(int(pipe.Child().Fd()))
-	if err != nil {
-		t.Fatal(err)
-	}
-	childPipe, _ := NewSyncPipeFromFd(0, uintptr(childfd))
-
-	pipe.CloseChild()
-	pipe.SendToChild(nil)
-
-	expected := "something bad happened"
-	childPipe.ReportChildError(fmt.Errorf(expected))
-
-	childError := pipe.ReadFromChild()
-	if childError == nil {
-		t.Fatal("expected an error to be returned but did not receive anything")
-	}
-
-	if childError.Error() != expected {
-		t.Fatalf("expected %q but received error message %q", expected, childError.Error())
-	}
-}
-
-func TestSendPayloadToChild(t *testing.T) {
-	pipe, err := NewSyncPipe()
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	defer func() {
-		if err := pipe.Close(); err != nil {
-			t.Fatal(err)
-		}
-	}()
-
-	expected := "libcontainer"
-
-	if err := pipe.SendToChild(testStruct{Name: expected}); err != nil {
-		t.Fatal(err)
-	}
-
-	var s *testStruct
-	if err := pipe.ReadFromParent(&s); err != nil {
-		t.Fatal(err)
-	}
-
-	if s.Name != expected {
-		t.Fatalf("expected name %q but received %q", expected, s.Name)
-	}
-}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/system/setns_linux.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/system/setns_linux.go
@ -11,9 +11,12 @@ import (
 // We need different setns values for the different platforms and arch
 // We are declaring the macro here because the SETNS syscall does not exist in th stdlib
 var setNsMap = map[string]uintptr{
-	"linux/386":   346,
-	"linux/amd64": 308,
-	"linux/arm":   374,
+	"linux/386":     346,
+	"linux/amd64":   308,
+	"linux/arm":     374,
+	"linux/ppc64":   350,
+	"linux/ppc64le": 350,
+	"linux/s390x":   339,
 }

 func Setns(fd uintptr, flags uintptr) error {
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_amd64.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_amd64.go
@ -1,4 +1,5 @@
-// +build linux,amd64
+// +build linux,amd64 linux,ppc64 linux,ppc64le linux,s390x
+
 package system

 import (
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_arm.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/system/syscall_linux_arm.go
@ -7,7 +7,7 @@ import (

 // Setuid sets the uid of the calling thread to the specified uid.
 func Setuid(uid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
+	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup.go
@ -0,0 +1,108 @@
+package user
+
+import (
+	"errors"
+	"fmt"
+	"syscall"
+)
+
+var (
+	// The current operating system does not provide the required data for user lookups.
+	ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data")
+)
+
+func lookupUser(filter func(u User) bool) (User, error) {
+	// Get operating system-specific passwd reader-closer.
+	passwd, err := GetPasswd()
+	if err != nil {
+		return User{}, err
+	}
+	defer passwd.Close()
+
+	// Get the users.
+	users, err := ParsePasswdFilter(passwd, filter)
+	if err != nil {
+		return User{}, err
+	}
+
+	// No user entries found.
+	if len(users) == 0 {
+		return User{}, fmt.Errorf("no matching entries in passwd file")
+	}
+
+	// Assume the first entry is the "correct" one.
+	return users[0], nil
+}
+
+// CurrentUser looks up the current user by their user id in /etc/passwd. If the
+// user cannot be found (or there is no /etc/passwd file on the filesystem),
+// then CurrentUser returns an error.
+func CurrentUser() (User, error) {
+	return LookupUid(syscall.Getuid())
+}
+
+// LookupUser looks up a user by their username in /etc/passwd. If the user
+// cannot be found (or there is no /etc/passwd file on the filesystem), then
+// LookupUser returns an error.
+func LookupUser(username string) (User, error) {
+	return lookupUser(func(u User) bool {
+		return u.Name == username
+	})
+}
+
+// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
+// be found (or there is no /etc/passwd file on the filesystem), then LookupId
+// returns an error.
+func LookupUid(uid int) (User, error) {
+	return lookupUser(func(u User) bool {
+		return u.Uid == uid
+	})
+}
+
+func lookupGroup(filter func(g Group) bool) (Group, error) {
+	// Get operating system-specific group reader-closer.
+	group, err := GetGroup()
+	if err != nil {
+		return Group{}, err
+	}
+	defer group.Close()
+
+	// Get the users.
+	groups, err := ParseGroupFilter(group, filter)
+	if err != nil {
+		return Group{}, err
+	}
+
+	// No user entries found.
+	if len(groups) == 0 {
+		return Group{}, fmt.Errorf("no matching entries in group file")
+	}
+
+	// Assume the first entry is the "correct" one.
+	return groups[0], nil
+}
+
+// CurrentGroup looks up the current user's group by their primary group id's
+// entry in /etc/passwd. If the group cannot be found (or there is no
+// /etc/group file on the filesystem), then CurrentGroup returns an error.
+func CurrentGroup() (Group, error) {
+	return LookupGid(syscall.Getgid())
+}
+
+// LookupGroup looks up a group by its name in /etc/group. If the group cannot
+// be found (or there is no /etc/group file on the filesystem), then LookupGroup
+// returns an error.
+func LookupGroup(groupname string) (Group, error) {
+	return lookupGroup(func(g Group) bool {
+		return g.Name == groupname
+	})
+}
+
+// LookupGid looks up a group by its group id in /etc/group. If the group cannot
+// be found (or there is no /etc/group file on the filesystem), then LookupGid
+// returns an error.
+func LookupGid(gid int) (Group, error) {
+	return lookupGroup(func(g Group) bool {
+		return g.Gid == gid
+	})
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup_unix.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup_unix.go
@ -0,0 +1,30 @@
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package user
+
+import (
+	"io"
+	"os"
+)
+
+// Unix-specific path to the passwd and group formatted files.
+const (
+	unixPasswdFile = "/etc/passwd"
+	unixGroupFile  = "/etc/group"
+)
+
+func GetPasswdFile() (string, error) {
+	return unixPasswdFile, nil
+}
+
+func GetPasswd() (io.ReadCloser, error) {
+	return os.Open(unixPasswdFile)
+}
+
+func GetGroupFile() (string, error) {
+	return unixGroupFile, nil
+}
+
+func GetGroup() (io.ReadCloser, error) {
+	return os.Open(unixGroupFile)
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup_unsupported.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/user/lookup_unsupported.go
@ -0,0 +1,21 @@
+// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris
+
+package user
+
+import "io"
+
+func GetPasswdFile() (string, error) {
+	return "", ErrUnsupported
+}
+
+func GetPasswd() (io.ReadCloser, error) {
+	return nil, ErrUnsupported
+}
+
+func GetGroupFile() (string, error) {
+	return "", ErrUnsupported
+}
+
+func GetGroup() (io.ReadCloser, error) {
+	return nil, ErrUnsupported
+}
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/user/user.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/user/user.go
@ -69,23 +69,36 @@ func parseLine(line string, v ...interface{}) {
 	}
 }

-func ParsePasswd() ([]*User, error) {
-	return ParsePasswdFilter(nil)
-}
-
-func ParsePasswdFilter(filter func(*User) bool) ([]*User, error) {
-	f, err := os.Open("/etc/passwd")
+func ParsePasswdFile(path string) ([]User, error) {
+	passwd, err := os.Open(path)
 	if err != nil {
 		return nil, err
 	}
-	defer f.Close()
-	return parsePasswdFile(f, filter)
+	defer passwd.Close()
+	return ParsePasswd(passwd)
 }

-func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) {
+func ParsePasswd(passwd io.Reader) ([]User, error) {
+	return ParsePasswdFilter(passwd, nil)
+}
+
+func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) {
+	passwd, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer passwd.Close()
+	return ParsePasswdFilter(passwd, filter)
+}
+
+func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
+	if r == nil {
+		return nil, fmt.Errorf("nil source for passwd-formatted data")
+	}
+
 	var (
 		s   = bufio.NewScanner(r)
-		out = []*User{}
+		out = []User{}
 	)

 	for s.Scan() {
@ -103,7 +116,7 @@ func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) {
 		// Name:Pass:Uid:Gid:Gecos:Home:Shell
 		//  root:x:0:0:root:/root:/bin/bash
 		//  adm:x:3:4:adm:/var/adm:/bin/false
-		p := &User{}
+		p := User{}
 		parseLine(
 			text,
 			&p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell,
@ -117,23 +130,36 @@ func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) {
 	return out, nil
 }

-func ParseGroup() ([]*Group, error) {
-	return ParseGroupFilter(nil)
-}
-
-func ParseGroupFilter(filter func(*Group) bool) ([]*Group, error) {
-	f, err := os.Open("/etc/group")
+func ParseGroupFile(path string) ([]Group, error) {
+	group, err := os.Open(path)
 	if err != nil {
 		return nil, err
 	}
-	defer f.Close()
-	return parseGroupFile(f, filter)
+	defer group.Close()
+	return ParseGroup(group)
 }

-func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) {
+func ParseGroup(group io.Reader) ([]Group, error) {
+	return ParseGroupFilter(group, nil)
+}
+
+func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) {
+	group, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer group.Close()
+	return ParseGroupFilter(group, filter)
+}
+
+func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
+	if r == nil {
+		return nil, fmt.Errorf("nil source for group-formatted data")
+	}
+
 	var (
 		s   = bufio.NewScanner(r)
-		out = []*Group{}
+		out = []Group{}
 	)

 	for s.Scan() {
@ -151,7 +177,7 @@ func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) {
 		// Name:Pass:Gid:List
 		//  root:x:0:root
 		//  adm:x:4:root,adm,daemon
-		p := &Group{}
+		p := Group{}
 		parseLine(
 			text,
 			&p.Name, &p.Pass, &p.Gid, &p.List,
@ -165,94 +191,160 @@ func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) {
 	return out, nil
 }

-// Given a string like "user", "1000", "user:group", "1000:1000", returns the uid, gid, list of supplementary group IDs, and home directory, if available and/or applicable.
-func GetUserGroupSupplementaryHome(userSpec string, defaultUid, defaultGid int, defaultHome string) (int, int, []int, string, error) {
-	var (
-		uid      = defaultUid
-		gid      = defaultGid
-		suppGids = []int{}
-		home     = defaultHome
+type ExecUser struct {
+	Uid, Gid int
+	Sgids    []int
+	Home     string
+}

+// GetExecUserFile is a wrapper for GetExecUser. It reads data from each of the
+// given file paths and uses that data as the arguments to GetExecUser. If the
+// files cannot be opened for any reason, the error is ignored and a nil
+// io.Reader is passed instead.
+func GetExecUserFile(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
+	passwd, err := os.Open(passwdPath)
+	if err != nil {
+		passwd = nil
+	} else {
+		defer passwd.Close()
+	}
+
+	group, err := os.Open(groupPath)
+	if err != nil {
+		group = nil
+	} else {
+		defer group.Close()
+	}
+
+	return GetExecUser(userSpec, defaults, passwd, group)
+}
+
+// GetExecUser parses a user specification string (using the passwd and group
+// readers as sources for /etc/passwd and /etc/group data, respectively). In
+// the case of blank fields or missing data from the sources, the values in
+// defaults is used.
+//
+// GetExecUser will return an error if a user or group literal could not be
+// found in any entry in passwd and group respectively.
+//
+// Examples of valid user specifications are:
+//     * ""
+//     * "user"
+//     * "uid"
+//     * "user:group"
+//     * "uid:gid
+//     * "user:gid"
+//     * "uid:group"
+func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) {
+	var (
 		userArg, groupArg string
+		name              string
 	)

+	if defaults == nil {
+		defaults = new(ExecUser)
+	}
+
+	// Copy over defaults.
+	user := &ExecUser{
+		Uid:   defaults.Uid,
+		Gid:   defaults.Gid,
+		Sgids: defaults.Sgids,
+		Home:  defaults.Home,
+	}
+
+	// Sgids slice *cannot* be nil.
+	if user.Sgids == nil {
+		user.Sgids = []int{}
+	}
+
 	// allow for userArg to have either "user" syntax, or optionally "user:group" syntax
 	parseLine(userSpec, &userArg, &groupArg)

-	users, err := ParsePasswdFilter(func(u *User) bool {
+	users, err := ParsePasswdFilter(passwd, func(u User) bool {
 		if userArg == "" {
-			return u.Uid == uid
+			return u.Uid == user.Uid
 		}
 		return u.Name == userArg || strconv.Itoa(u.Uid) == userArg
 	})
-	if err != nil && !os.IsNotExist(err) {
+	if err != nil && passwd != nil {
 		if userArg == "" {
-			userArg = strconv.Itoa(uid)
+			userArg = strconv.Itoa(user.Uid)
 		}
-		return 0, 0, nil, "", fmt.Errorf("Unable to find user %v: %v", userArg, err)
+		return nil, fmt.Errorf("Unable to find user %v: %v", userArg, err)
 	}

 	haveUser := users != nil && len(users) > 0
 	if haveUser {
 		// if we found any user entries that matched our filter, let's take the first one as "correct"
-		uid = users[0].Uid
-		gid = users[0].Gid
-		home = users[0].Home
+		name = users[0].Name
+		user.Uid = users[0].Uid
+		user.Gid = users[0].Gid
+		user.Home = users[0].Home
 	} else if userArg != "" {
 		// we asked for a user but didn't find them...  let's check to see if we wanted a numeric user
-		uid, err = strconv.Atoi(userArg)
+		user.Uid, err = strconv.Atoi(userArg)
 		if err != nil {
 			// not numeric - we have to bail
-			return 0, 0, nil, "", fmt.Errorf("Unable to find user %v", userArg)
+			return nil, fmt.Errorf("Unable to find user %v", userArg)
 		}
-		if uid < minId || uid > maxId {
-			return 0, 0, nil, "", ErrRange
+
+		// Must be inside valid uid range.
+		if user.Uid < minId || user.Uid > maxId {
+			return nil, ErrRange
 		}

 		// if userArg couldn't be found in /etc/passwd but is numeric, just roll with it - this is legit
 	}

-	if groupArg != "" || (haveUser && users[0].Name != "") {
-		groups, err := ParseGroupFilter(func(g *Group) bool {
+	if groupArg != "" || name != "" {
+		groups, err := ParseGroupFilter(group, func(g Group) bool {
+			// Explicit group format takes precedence.
 			if groupArg != "" {
 				return g.Name == groupArg || strconv.Itoa(g.Gid) == groupArg
 			}
+
+			// Check if user is a member.
 			for _, u := range g.List {
-				if u == users[0].Name {
+				if u == name {
 					return true
 				}
 			}
+
 			return false
 		})
-		if err != nil && !os.IsNotExist(err) {
-			return 0, 0, nil, "", fmt.Errorf("Unable to find groups for user %v: %v", users[0].Name, err)
+		if err != nil && group != nil {
+			return nil, fmt.Errorf("Unable to find groups for user %v: %v", users[0].Name, err)
 		}

 		haveGroup := groups != nil && len(groups) > 0
 		if groupArg != "" {
 			if haveGroup {
 				// if we found any group entries that matched our filter, let's take the first one as "correct"
-				gid = groups[0].Gid
+				user.Gid = groups[0].Gid
 			} else {
 				// we asked for a group but didn't find id...  let's check to see if we wanted a numeric group
-				gid, err = strconv.Atoi(groupArg)
+				user.Gid, err = strconv.Atoi(groupArg)
 				if err != nil {
 					// not numeric - we have to bail
-					return 0, 0, nil, "", fmt.Errorf("Unable to find group %v", groupArg)
+					return nil, fmt.Errorf("Unable to find group %v", groupArg)
 				}
-				if gid < minId || gid > maxId {
-					return 0, 0, nil, "", ErrRange
+
+				// Ensure gid is inside gid range.
+				if user.Gid < minId || user.Gid > maxId {
+					return nil, ErrRange
 				}

 				// if groupArg couldn't be found in /etc/group but is numeric, just roll with it - this is legit
 			}
 		} else if haveGroup {
-			suppGids = make([]int, len(groups))
+			// If implicit group format, fill supplementary gids.
+			user.Sgids = make([]int, len(groups))
 			for i, group := range groups {
-				suppGids[i] = group.Gid
+				user.Sgids[i] = group.Gid
 			}
 		}
 	}

-	return uid, gid, suppGids, home, nil
+	return user, nil
 }
--- a/Godeps/_workspace/src/github.com/docker/libcontainer/user/user_test.go
+++ b/Godeps/_workspace/src/github.com/docker/libcontainer/user/user_test.go
@ -1,6 +1,8 @@
 package user

 import (
+	"io"
+	"reflect"
 	"strings"
 	"testing"
 )
@ -54,7 +56,7 @@ func TestUserParseLine(t *testing.T) {
 }

 func TestUserParsePasswd(t *testing.T) {
-	users, err := parsePasswdFile(strings.NewReader(`
+	users, err := ParsePasswdFilter(strings.NewReader(`
 root:x:0:0:root:/root:/bin/bash
 adm:x:3:4:adm:/var/adm:/bin/false
 this is just some garbage data
@ -74,7 +76,7 @@ this is just some garbage data
 }

 func TestUserParseGroup(t *testing.T) {
-	groups, err := parseGroupFile(strings.NewReader(`
+	groups, err := ParseGroupFilter(strings.NewReader(`
 root:x:0:root
 adm:x:4:root,adm,daemon
 this is just some garbage data
@ -92,3 +94,259 @@ this is just some garbage data
 		t.Fatalf("Expected groups[1] to be 4 - adm - 3 members, got %v - %v - %v", groups[1].Gid, groups[1].Name, len(groups[1].List))
 	}
 }
+
+func TestValidGetExecUser(t *testing.T) {
+	const passwdContent = `
+root:x:0:0:root user:/root:/bin/bash
+adm:x:42:43:adm:/var/adm:/bin/false
+this is just some garbage data
+`
+	const groupContent = `
+root:x:0:root
+adm:x:43:
+grp:x:1234:root,adm
+this is just some garbage data
+`
+	defaultExecUser := ExecUser{
+		Uid:   8888,
+		Gid:   8888,
+		Sgids: []int{8888},
+		Home:  "/8888",
+	}
+
+	tests := []struct {
+		ref      string
+		expected ExecUser
+	}{
+		{
+			ref: "root",
+			expected: ExecUser{
+				Uid:   0,
+				Gid:   0,
+				Sgids: []int{0, 1234},
+				Home:  "/root",
+			},
+		},
+		{
+			ref: "adm",
+			expected: ExecUser{
+				Uid:   42,
+				Gid:   43,
+				Sgids: []int{1234},
+				Home:  "/var/adm",
+			},
+		},
+		{
+			ref: "root:adm",
+			expected: ExecUser{
+				Uid:   0,
+				Gid:   43,
+				Sgids: defaultExecUser.Sgids,
+				Home:  "/root",
+			},
+		},
+		{
+			ref: "adm:1234",
+			expected: ExecUser{
+				Uid:   42,
+				Gid:   1234,
+				Sgids: defaultExecUser.Sgids,
+				Home:  "/var/adm",
+			},
+		},
+		{
+			ref: "42:1234",
+			expected: ExecUser{
+				Uid:   42,
+				Gid:   1234,
+				Sgids: defaultExecUser.Sgids,
+				Home:  "/var/adm",
+			},
+		},
+		{
+			ref: "1337:1234",
+			expected: ExecUser{
+				Uid:   1337,
+				Gid:   1234,
+				Sgids: defaultExecUser.Sgids,
+				Home:  defaultExecUser.Home,
+			},
+		},
+		{
+			ref: "1337",
+			expected: ExecUser{
+				Uid:   1337,
+				Gid:   defaultExecUser.Gid,
+				Sgids: defaultExecUser.Sgids,
+				Home:  defaultExecUser.Home,
+			},
+		},
+		{
+			ref: "",
+			expected: ExecUser{
+				Uid:   defaultExecUser.Uid,
+				Gid:   defaultExecUser.Gid,
+				Sgids: defaultExecUser.Sgids,
+				Home:  defaultExecUser.Home,
+			},
+		},
+	}
+
+	for _, test := range tests {
+		passwd := strings.NewReader(passwdContent)
+		group := strings.NewReader(groupContent)
+
+		execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group)
+		if err != nil {
+			t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error())
+			t.Fail()
+			continue
+		}
+
+		if !reflect.DeepEqual(test.expected, *execUser) {
+			t.Logf("got:      %#v", execUser)
+			t.Logf("expected: %#v", test.expected)
+			t.Fail()
+			continue
+		}
+	}
+}
+
+func TestInvalidGetExecUser(t *testing.T) {
+	const passwdContent = `
+root:x:0:0:root user:/root:/bin/bash
+adm:x:42:43:adm:/var/adm:/bin/false
+this is just some garbage data
+`
+	const groupContent = `
+root:x:0:root
+adm:x:43:
+grp:x:1234:root,adm
+this is just some garbage data
+`
+
+	tests := []string{
+		// No such user/group.
+		"notuser",
+		"notuser:notgroup",
+		"root:notgroup",
+		"notuser:adm",
+		"8888:notgroup",
+		"notuser:8888",
+
+		// Invalid user/group values.
+		"-1:0",
+		"0:-3",
+		"-5:-2",
+	}
+
+	for _, test := range tests {
+		passwd := strings.NewReader(passwdContent)
+		group := strings.NewReader(groupContent)
+
+		execUser, err := GetExecUser(test, nil, passwd, group)
+		if err == nil {
+			t.Logf("got unexpected success when parsing '%s': %#v", test, execUser)
+			t.Fail()
+			continue
+		}
+	}
+}
+
+func TestGetExecUserNilSources(t *testing.T) {
+	const passwdContent = `
+root:x:0:0:root user:/root:/bin/bash
+adm:x:42:43:adm:/var/adm:/bin/false
+this is just some garbage data
+`
+	const groupContent = `
+root:x:0:root
+adm:x:43:
+grp:x:1234:root,adm
+this is just some garbage data
+`
+
+	defaultExecUser := ExecUser{
+		Uid:   8888,
+		Gid:   8888,
+		Sgids: []int{8888},
+		Home:  "/8888",
+	}
+
+	tests := []struct {
+		ref           string
+		passwd, group bool
+		expected      ExecUser
+	}{
+		{
+			ref:    "",
+			passwd: false,
+			group:  false,
+			expected: ExecUser{
+				Uid:   8888,
+				Gid:   8888,
+				Sgids: []int{8888},
+				Home:  "/8888",
+			},
+		},
+		{
+			ref:    "root",
+			passwd: true,
+			group:  false,
+			expected: ExecUser{
+				Uid:   0,
+				Gid:   0,
+				Sgids: []int{8888},
+				Home:  "/root",
+			},
+		},
+		{
+			ref:    "0",
+			passwd: false,
+			group:  false,
+			expected: ExecUser{
+				Uid:   0,
+				Gid:   8888,
+				Sgids: []int{8888},
+				Home:  "/8888",
+			},
+		},
+		{
+			ref:    "0:0",
+			passwd: false,
+			group:  false,
+			expected: ExecUser{
+				Uid:   0,
+				Gid:   0,
+				Sgids: []int{8888},
+				Home:  "/8888",
+			},
+		},
+	}
+
+	for _, test := range tests {
+		var passwd, group io.Reader
+
+		if test.passwd {
+			passwd = strings.NewReader(passwdContent)
+		}
+
+		if test.group {
+			group = strings.NewReader(groupContent)
+		}
+
+		execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group)
+		if err != nil {
+			t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error())
+			t.Fail()
+			continue
+		}
+
+		if !reflect.DeepEqual(test.expected, *execUser) {
+			t.Logf("got:      %#v", execUser)
+			t.Logf("expected: %#v", test.expected)
+			t.Fail()
+			continue
+		}
+	}
+}
--- a/container/docker/factory.go
+++ b/container/docker/factory.go
@ -27,6 +27,7 @@ import (
 	"github.com/fsouza/go-dockerclient"
 	"github.com/golang/glog"
 	"github.com/google/cadvisor/container"
+	"github.com/google/cadvisor/container/libcontainer"
 	"github.com/google/cadvisor/info"
 	"github.com/google/cadvisor/utils"
 )
@ -68,6 +69,9 @@ type dockerFactory struct {
 	usesAufsDriver bool

 	client *docker.Client
+
+	// Information about the mounted cgroup subsystems.
+	cgroupSubsystems libcontainer.CgroupSubsystems
 }

 func (self *dockerFactory) String() string {
@ -85,6 +89,7 @@ func (self *dockerFactory) NewContainerHandler(name string) (handler container.C
 		self.machineInfoFactory,
 		*dockerRootDir,
 		self.usesAufsDriver,
+		&self.cgroupSubsystems,
 	)
 	return
 }
@ -218,11 +223,17 @@ func Register(factory info.MachineInfoFactory) error {
 		glog.Infof("System is using systemd")
 	}

+	cgroupSubsystems, err := libcontainer.GetCgroupSubsystems()
+	if err != nil {
+		return fmt.Errorf("failed to get cgroup subsystems: %v", err)
+	}
+
 	glog.Infof("Registering Docker factory")
 	f := &dockerFactory{
 		machineInfoFactory: factory,
 		client:             client,
 		usesAufsDriver:     usesAufsDriver,
+		cgroupSubsystems:   cgroupSubsystems,
 	}
 	container.RegisterContainerHandlerFactory(f)
 	return nil
--- a/container/docker/handler.go
+++ b/container/docker/handler.go
@ -59,6 +59,10 @@ type dockerContainerHandler struct {
 	// Path to the libcontainer pid file.
 	libcontainerPidPath string

+	// Absolute path to the cgroup hierarchies of this container.
+	// (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test")
+	cgroupPaths map[string]string
+
 	cgroup         cgroups.Cgroup
 	usesAufsDriver bool
 	fsInfo         fs.FsInfo
@ -71,11 +75,19 @@ func newDockerContainerHandler(
 	machineInfoFactory info.MachineInfoFactory,
 	dockerRootDir string,
 	usesAufsDriver bool,
+	cgroupSubsystems *containerLibcontainer.CgroupSubsystems,
 ) (container.ContainerHandler, error) {
 	fsInfo, err := fs.NewFsInfo()
 	if err != nil {
 		return nil, err
 	}
+
+	// Create the cgroup paths.
+	cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
+	for key, val := range cgroupSubsystems.MountPoints {
+		cgroupPaths[key] = path.Join(val, name)
+	}
+
 	id := ContainerNameToDockerId(name)
 	handler := &dockerContainerHandler{
 		id:                     id,
@ -85,6 +97,7 @@ func newDockerContainerHandler(
 		libcontainerConfigPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "container.json"),
 		libcontainerStatePath:  path.Join(dockerRootDir, pathToLibcontainerState, id, "state.json"),
 		libcontainerPidPath:    path.Join(dockerRootDir, pathToLibcontainerState, id, "pid"),
+		cgroupPaths:            cgroupPaths,
 		cgroup: cgroups.Cgroup{
 			Parent: "/",
 			Name:   name,
@ -159,6 +172,11 @@ func (self *dockerContainerHandler) readLibcontainerState() (state *libcontainer
 	}
 	state = retState

+	// Create cgroup paths if they don't exist. This is since older Docker clients don't write it.
+	if len(state.CgroupPaths) == 0 {
+		state.CgroupPaths = self.cgroupPaths
+	}
+
 	return
 }

@ -259,7 +277,7 @@ func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err
 		return
 	}

-	stats, err = containerLibcontainer.GetStats(&self.cgroup, state)
+	stats, err = containerLibcontainer.GetStats(state)
 	if err != nil {
 		return
 	}
--- a/container/libcontainer/helpers.go
+++ b/container/libcontainer/helpers.go
@ -15,6 +15,7 @@
 package libcontainer

 import (
+	"fmt"
 	"time"

 	"github.com/docker/libcontainer"
@ -24,13 +25,60 @@ import (
 	"github.com/google/cadvisor/info"
 )

+type CgroupSubsystems struct {
+	// Cgroup subsystem mounts.
+	// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
+	Mounts []cgroups.Mount
+
+	// Cgroup subsystem to their mount location.
+	// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
+	MountPoints map[string]string
+}
+
+// Get information about the cgroup subsystems.
+func GetCgroupSubsystems() (CgroupSubsystems, error) {
+	// Get all cgroup mounts.
+	allCgroups, err := cgroups.GetCgroupMounts()
+	if err != nil {
+		return CgroupSubsystems{}, err
+	}
+	if len(allCgroups) == 0 {
+		return CgroupSubsystems{}, fmt.Errorf("failed to find cgroup mounts")
+	}
+
+	// Trim the mounts to only the subsystems we care about.
+	supportedCgroups := make([]cgroups.Mount, 0, len(allCgroups))
+	mountPoints := make(map[string]string, len(allCgroups))
+	for _, mount := range allCgroups {
+		for _, subsystem := range mount.Subsystems {
+			if _, ok := supportedSubsystems[subsystem]; ok {
+				supportedCgroups = append(supportedCgroups, mount)
+				mountPoints[subsystem] = mount.Mountpoint
+			}
+		}
+	}
+
+	return CgroupSubsystems{
+		Mounts:      supportedCgroups,
+		MountPoints: mountPoints,
+	}, nil
+}
+
+// Cgroup subsystems we support listing (should be the minimal set we need stats from).
+var supportedSubsystems map[string]struct{} = map[string]struct{}{
+	"cpu":     {},
+	"cpuacct": {},
+	"memory":  {},
+	"cpuset":  {},
+}
+
 // Get stats of the specified container
-func GetStats(cgroup *cgroups.Cgroup, state *libcontainer.State) (*info.ContainerStats, error) {
+func GetStats(state *libcontainer.State) (*info.ContainerStats, error) {
 	// TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready.
 	stats := &libcontainer.ContainerStats{}

 	var err error
-	stats.CgroupStats, err = cgroupfs.GetStats(cgroup)
+	stats.CgroupStats, err = cgroupfs.GetStats(state.CgroupPaths)
 	if err != nil {
 		return &info.ContainerStats{}, err
 	}
@ -43,14 +91,6 @@ func GetStats(cgroup *cgroups.Cgroup, state *libcontainer.State) (*info.Containe
 	return toContainerStats(stats), nil
 }

-func GetStatsCgroupOnly(cgroup *cgroups.Cgroup) (*info.ContainerStats, error) {
-	s, err := cgroupfs.GetStats(cgroup)
-	if err != nil {
-		return nil, err
-	}
-	return toContainerStats(&libcontainer.ContainerStats{CgroupStats: s}), nil
-}
-
 func DiskStatsCopy(blkio_stats []cgroups.BlkioStatEntry) (stat []info.PerDiskStats) {
 	if len(blkio_stats) == 0 {
 		return
--- a/container/raw/factory.go
+++ b/container/raw/factory.go
@ -17,26 +17,18 @@ package raw
 import (
 	"fmt"

-	"github.com/docker/libcontainer/cgroups"
 	"github.com/golang/glog"
 	"github.com/google/cadvisor/container"
+	"github.com/google/cadvisor/container/libcontainer"
 	"github.com/google/cadvisor/info"
 )

-type cgroupSubsystems struct {
-	// Cgroup subsystem mounts.
-	// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
-	mounts []cgroups.Mount
-
-	// Cgroup subsystem to their mount location.
-	// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
-	mountPoints map[string]string
-}
-
 type rawFactory struct {
 	// Factory for machine information.
 	machineInfoFactory info.MachineInfoFactory
-	cgroupSubsystems   *cgroupSubsystems
+
+	// Information about the cgroup subsystems.
+	cgroupSubsystems *libcontainer.CgroupSubsystems
 }

 func (self *rawFactory) String() string {
@ -53,46 +45,19 @@ func (self *rawFactory) CanHandle(name string) (bool, error) {
 }

 func Register(machineInfoFactory info.MachineInfoFactory) error {
-	// Get all cgroup mounts.
-	allCgroups, err := cgroups.GetCgroupMounts()
+	cgroupSubsystems, err := libcontainer.GetCgroupSubsystems()
 	if err != nil {
-		return err
+		return fmt.Errorf("failed to get cgroup subsystems: %v", err)
 	}
-	if len(allCgroups) == 0 {
-		return fmt.Errorf("failed to find cgroup mounts for the raw factory")
-	}
-
-	// Trim the mounts to only the subsystems we care about.
-	supportedCgroups := make([]cgroups.Mount, 0, len(allCgroups))
-	mountPoints := make(map[string]string, len(allCgroups))
-	for _, mount := range allCgroups {
-		for _, subsystem := range mount.Subsystems {
-			if _, ok := supportedSubsystems[subsystem]; ok {
-				supportedCgroups = append(supportedCgroups, mount)
-				mountPoints[subsystem] = mount.Mountpoint
-			}
-		}
-	}
-	if len(supportedCgroups) == 0 {
+	if len(cgroupSubsystems.Mounts) == 0 {
 		return fmt.Errorf("failed to find supported cgroup mounts for the raw factory")
 	}

 	glog.Infof("Registering Raw factory")
 	factory := &rawFactory{
 		machineInfoFactory: machineInfoFactory,
-		cgroupSubsystems: &cgroupSubsystems{
-			mounts:      supportedCgroups,
-			mountPoints: mountPoints,
-		},
+		cgroupSubsystems:   &cgroupSubsystems,
 	}
 	container.RegisterContainerHandlerFactory(factory)
 	return nil
 }
-
-// Cgroup subsystems we support listing (should be the minimal set we need stats from).
-var supportedSubsystems map[string]struct{} = map[string]struct{}{
-	"cpu":     {},
-	"cpuacct": {},
-	"memory":  {},
-	"cpuset":  {},
-}
--- a/container/raw/handler.go
+++ b/container/raw/handler.go
@ -39,7 +39,7 @@ type rawContainerHandler struct {
 	// Name of the container for this handler.
 	name               string
 	cgroup             *cgroups.Cgroup
-	cgroupSubsystems   *cgroupSubsystems
+	cgroupSubsystems   *libcontainer.CgroupSubsystems
 	machineInfoFactory info.MachineInfoFactory

 	// Inotify event watcher.
@ -54,12 +54,16 @@ type rawContainerHandler struct {
 	// Cgroup paths being watchd for new subcontainers
 	cgroupWatches map[string]struct{}

+	// Absolute path to the cgroup hierarchies of this container.
+	// (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test")
+	cgroupPaths map[string]string
+
 	fsInfo           fs.FsInfo
 	networkInterface *networkInterface
 	externalMounts   []mount
 }

-func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, machineInfoFactory info.MachineInfoFactory) (container.ContainerHandler, error) {
+func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSubsystems, machineInfoFactory info.MachineInfoFactory) (container.ContainerHandler, error) {
 	fsInfo, err := fs.NewFsInfo()
 	if err != nil {
 		return nil, err
@ -77,6 +81,13 @@ func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, mac
 			break
 		}
 	}
+
+	// Create the cgroup paths.
+	cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
+	for key, val := range cgroupSubsystems.MountPoints {
+		cgroupPaths[key] = path.Join(val, name)
+	}
+
 	return &rawContainerHandler{
 		name: name,
 		cgroup: &cgroups.Cgroup{
@ -88,6 +99,7 @@ func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, mac
 		stopWatcher:        make(chan error),
 		watches:            make(map[string]struct{}),
 		cgroupWatches:      make(map[string]struct{}),
+		cgroupPaths:        cgroupPaths,
 		fsInfo:             fsInfo,
 		networkInterface:   networkInterface,
 		externalMounts:     externalMounts,
@ -145,9 +157,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
 	}

 	// CPU.
-	cpuRoot, ok := self.cgroupSubsystems.mountPoints["cpu"]
+	cpuRoot, ok := self.cgroupPaths["cpu"]
 	if ok {
-		cpuRoot = path.Join(cpuRoot, self.name)
 		if utils.FileExists(cpuRoot) {
 			spec.HasCpu = true
 			spec.Cpu.Limit = readInt64(cpuRoot, "cpu.shares")
@ -156,9 +167,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {

 	// Cpu Mask.
 	// This will fail for non-unified hierarchies. We'll return the whole machine mask in that case.
-	cpusetRoot, ok := self.cgroupSubsystems.mountPoints["cpuset"]
+	cpusetRoot, ok := self.cgroupPaths["cpuset"]
 	if ok {
-		cpusetRoot = path.Join(cpusetRoot, self.name)
 		if utils.FileExists(cpusetRoot) {
 			spec.HasCpu = true
 			spec.Cpu.Mask = readString(cpusetRoot, "cpuset.cpus")
@ -169,9 +179,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
 	}

 	// Memory.
-	memoryRoot, ok := self.cgroupSubsystems.mountPoints["memory"]
+	memoryRoot, ok := self.cgroupPaths["memory"]
 	if ok {
-		memoryRoot = path.Join(memoryRoot, self.name)
 		if utils.FileExists(memoryRoot) {
 			spec.HasMemory = true
 			spec.Memory.Limit = readInt64(memoryRoot, "memory.limit_in_bytes")
@ -227,7 +236,10 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
 		}
 		for _, fs := range filesystems {
 			stats.Filesystem = append(stats.Filesystem,
-				info.FsStats{fs.Device, fs.Capacity, fs.Capacity - fs.Free,
+				info.FsStats{
+					fs.Device,
+					fs.Capacity,
+					fs.Capacity - fs.Free,
 					fs.DiskStats.ReadsCompleted,
 					fs.DiskStats.ReadsMerged,
 					fs.DiskStats.SectorsRead,
@ -246,18 +258,20 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
 }

 func (self *rawContainerHandler) GetStats() (*info.ContainerStats, error) {
-	state := dockerlibcontainer.State{}
+	// TODO(vmarmol): Don't re-create this every time.
+	state := dockerlibcontainer.State{
+		CgroupPaths: self.cgroupPaths,
+	}
 	if self.networkInterface != nil {
 		state = dockerlibcontainer.State{
 			NetworkState: network.NetworkState{
 				VethHost:  self.networkInterface.VethHost,
 				VethChild: self.networkInterface.VethChild,
-				NsPath:    "unknown",
 			},
 		}
 	}

-	stats, err := libcontainer.GetStats(self.cgroup, &state)
+	stats, err := libcontainer.GetStats(&state)
 	if err != nil {
 		return nil, err
 	}
@ -301,8 +315,8 @@ func listDirectories(dirpath string, parent string, recursive bool, output map[s

 func (self *rawContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
 	containers := make(map[string]struct{})
-	for _, subsystem := range self.cgroupSubsystems.mounts {
-		err := listDirectories(path.Join(subsystem.Mountpoint, self.name), self.name, listType == container.ListRecursive, containers)
+	for _, cgroupPath := range self.cgroupPaths {
+		err := listDirectories(cgroupPath, self.name, listType == container.ListRecursive, containers)
 		if err != nil {
 			return nil, err
 		}
@ -372,7 +386,7 @@ func (self *rawContainerHandler) processEvent(event *inotify.Event, events chan

 	// Derive the container name from the path name.
 	var containerName string
-	for _, mount := range self.cgroupSubsystems.mounts {
+	for _, mount := range self.cgroupSubsystems.Mounts {
 		mountLocation := path.Clean(mount.Mountpoint) + "/"
 		if strings.HasPrefix(event.Name, mountLocation) {
 			containerName = event.Name[len(mountLocation)-1:]
@ -437,8 +451,8 @@ func (self *rawContainerHandler) WatchSubcontainers(events chan container.Subcon
 	}

 	// Watch this container (all its cgroups) and all subdirectories.
-	for _, mnt := range self.cgroupSubsystems.mounts {
-		err := self.watchDirectory(path.Join(mnt.Mountpoint, self.name), self.name)
+	for _, cgroupPath := range self.cgroupPaths {
+		err := self.watchDirectory(cgroupPath, self.name)
 		if err != nil {
 			return err
 		}
@ -481,8 +495,8 @@ func (self *rawContainerHandler) StopWatchingSubcontainers() error {

 func (self *rawContainerHandler) Exists() bool {
 	// If any cgroup exists, the container is still alive.
-	for _, subsystem := range self.cgroupSubsystems.mounts {
-		if utils.FileExists(path.Join(subsystem.Mountpoint, self.name)) {
+	for _, cgroupPath := range self.cgroupPaths {
+		if utils.FileExists(cgroupPath) {
 			return true
 		}
 	}