Merge pull request #930 from jimmidyson/docker-client-update

Docker client update including reuse
This commit is contained in:
Vish Kannan 2015-10-20 14:20:03 -07:00
commit ec2a73d0c0
297 changed files with 27229 additions and 3594 deletions

54
Godeps/Godeps.json generated
View File

@ -1,6 +1,6 @@
{
"ImportPath": "github.com/google/cadvisor",
"GoVersion": "go1.4",
"GoVersion": "go1.5.1",
"Packages": [
"./..."
],
@ -32,60 +32,30 @@
"Comment": "v2-27-g97e243d",
"Rev": "97e243d21a8e232e9d8af38ba2366dfcfceebeba"
},
{
"ImportPath": "github.com/docker/docker/pkg/archive",
"Comment": "v1.4.1-1379-g8e107a9",
"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
},
{
"ImportPath": "github.com/docker/docker/pkg/fileutils",
"Comment": "v1.4.1-1379-g8e107a9",
"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
},
{
"ImportPath": "github.com/docker/docker/pkg/ioutils",
"Comment": "v1.4.1-1379-g8e107a9",
"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
},
{
"ImportPath": "github.com/docker/docker/pkg/mount",
"Comment": "v1.4.1-1379-g8e107a9",
"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
"Comment": "v1.8.3-1-g2732df1",
"Rev": "2732df1dfb35f797a90e163afffe23564600407b"
},
{
"ImportPath": "github.com/docker/docker/pkg/pools",
"Comment": "v1.4.1-1379-g8e107a9",
"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
},
{
"ImportPath": "github.com/docker/docker/pkg/promise",
"Comment": "v1.4.1-1379-g8e107a9",
"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
},
{
"ImportPath": "github.com/docker/docker/pkg/system",
"Comment": "v1.4.1-1379-g8e107a9",
"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
"ImportPath": "github.com/docker/docker/pkg/symlink",
"Comment": "v1.8.3-1-g2732df1",
"Rev": "2732df1dfb35f797a90e163afffe23564600407b"
},
{
"ImportPath": "github.com/docker/docker/pkg/units",
"Comment": "v1.4.1-1379-g8e107a9",
"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
},
{
"ImportPath": "github.com/docker/docker/vendor/src/code.google.com/p/go/src/pkg/archive/tar",
"Comment": "v1.4.1-1379-g8e107a9",
"Rev": "8e107a93210c54f22ec1354d969c771b1abfbe05"
"Comment": "v1.8.3-1-g2732df1",
"Rev": "2732df1dfb35f797a90e163afffe23564600407b"
},
{
"ImportPath": "github.com/docker/libcontainer",
"Comment": "v1.4.0-501-ga1fe3f1",
"Rev": "a1fe3f1c7ad2e8eebe6d59e573f04d2b10961cf6"
"Comment": "v2.2.1",
"Rev": "5dc7ba0f24332273461e45bc49edcb4d5aa6c44c"
},
{
"ImportPath": "github.com/fsouza/go-dockerclient",
"Comment": "0.2.1-461-g09334c5",
"Rev": "09334c56c63bab2cd6c4ccab924d89e2419a361f"
"Comment": "0.2.1-764-g412c004",
"Rev": "412c004d923b7b89701e7a1632de83f843657a03"
},
{
"ImportPath": "github.com/garyburd/redigo/internal",

View File

@ -1,2 +0,0 @@
Cristian Staretu <cristian.staretu@gmail.com> (@unclejack)
Tibor Vass <teabee89@gmail.com> (@tiborvass)

View File

@ -1,46 +0,0 @@
// +build !windows
package archive
import (
"errors"
"syscall"
"github.com/docker/docker/vendor/src/code.google.com/p/go/src/pkg/archive/tar"
)
// canonicalTarNameForPath returns platform-specific filepath
// to canonical posix-style path for tar archival. p is relative
// path.
func CanonicalTarNameForPath(p string) (string, error) {
return p, nil // already unix-style
}
func setHeaderForSpecialDevice(hdr *tar.Header, ta *tarAppender, name string, stat interface{}) (nlink uint32, inode uint64, err error) {
s, ok := stat.(*syscall.Stat_t)
if !ok {
err = errors.New("cannot convert stat value to syscall.Stat_t")
return
}
nlink = uint32(s.Nlink)
inode = uint64(s.Ino)
// Currently go does not fil in the major/minors
if s.Mode&syscall.S_IFBLK == syscall.S_IFBLK ||
s.Mode&syscall.S_IFCHR == syscall.S_IFCHR {
hdr.Devmajor = int64(major(uint64(s.Rdev)))
hdr.Devminor = int64(minor(uint64(s.Rdev)))
}
return
}
func major(device uint64) uint64 {
return (device >> 8) & 0xfff
}
func minor(device uint64) uint64 {
return (device & 0xff) | ((device >> 12) & 0xfff00)
}

View File

@ -1,29 +0,0 @@
// +build windows
package archive
import (
"fmt"
"strings"
"github.com/docker/docker/vendor/src/code.google.com/p/go/src/pkg/archive/tar"
)
// canonicalTarNameForPath returns platform-specific filepath
// to canonical posix-style path for tar archival. p is relative
// path.
func CanonicalTarNameForPath(p string) (string, error) {
// windows: convert windows style relative path with backslashes
// into forward slashes. since windows does not allow '/' or '\'
// in file names, it is mostly safe to replace however we must
// check just in case
if strings.Contains(p, "/") {
return "", fmt.Errorf("windows path contains forward slash: %s", p)
}
return strings.Replace(p, "\\", "/", -1), nil
}
func setHeaderForSpecialDevice(hdr *tar.Header, ta *tarAppender, name string, stat interface{}) (nlink uint32, inode uint64, err error) {
// do nothing. no notion of Rdev, Inode, Nlink in stat on Windows
return
}

View File

@ -1,26 +0,0 @@
package fileutils
import (
log "github.com/Sirupsen/logrus"
"path/filepath"
)
// Matches returns true if relFilePath matches any of the patterns
func Matches(relFilePath string, patterns []string) (bool, error) {
for _, exclude := range patterns {
matched, err := filepath.Match(exclude, relFilePath)
if err != nil {
log.Errorf("Error matching: %s (pattern: %s)", relFilePath, exclude)
return false, err
}
if matched {
if filepath.Clean(relFilePath) == "." {
log.Errorf("Can't exclude whole path, excluding pattern: %s", exclude)
continue
}
log.Debugf("Skipping excluded path: %s", relFilePath)
return true, nil
}
}
return false, nil
}

View File

@ -1,114 +0,0 @@
package ioutils
import (
"bytes"
"io"
"sync"
)
type readCloserWrapper struct {
io.Reader
closer func() error
}
func (r *readCloserWrapper) Close() error {
return r.closer()
}
func NewReadCloserWrapper(r io.Reader, closer func() error) io.ReadCloser {
return &readCloserWrapper{
Reader: r,
closer: closer,
}
}
type readerErrWrapper struct {
reader io.Reader
closer func()
}
func (r *readerErrWrapper) Read(p []byte) (int, error) {
n, err := r.reader.Read(p)
if err != nil {
r.closer()
}
return n, err
}
func NewReaderErrWrapper(r io.Reader, closer func()) io.Reader {
return &readerErrWrapper{
reader: r,
closer: closer,
}
}
type bufReader struct {
sync.Mutex
buf *bytes.Buffer
reader io.Reader
err error
wait sync.Cond
drainBuf []byte
}
func NewBufReader(r io.Reader) *bufReader {
reader := &bufReader{
buf: &bytes.Buffer{},
drainBuf: make([]byte, 1024),
reader: r,
}
reader.wait.L = &reader.Mutex
go reader.drain()
return reader
}
func NewBufReaderWithDrainbufAndBuffer(r io.Reader, drainBuffer []byte, buffer *bytes.Buffer) *bufReader {
reader := &bufReader{
buf: buffer,
drainBuf: drainBuffer,
reader: r,
}
reader.wait.L = &reader.Mutex
go reader.drain()
return reader
}
func (r *bufReader) drain() {
for {
n, err := r.reader.Read(r.drainBuf)
r.Lock()
if err != nil {
r.err = err
} else {
r.buf.Write(r.drainBuf[0:n])
}
r.wait.Signal()
r.Unlock()
if err != nil {
break
}
}
}
func (r *bufReader) Read(p []byte) (n int, err error) {
r.Lock()
defer r.Unlock()
for {
n, err = r.buf.Read(p)
if n > 0 {
return n, err
}
if r.err != nil {
return 0, r.err
}
r.wait.Wait()
}
}
func (r *bufReader) Close() error {
closer, ok := r.reader.(io.ReadCloser)
if !ok {
return nil
}
return closer.Close()
}

View File

@ -1 +0,0 @@
Michael Crosby <michael@crosbymichael.com> (@crosbymichael)

View File

@ -8,12 +8,25 @@ package mount
import "C"
const (
RDONLY = C.MNT_RDONLY
NOSUID = C.MNT_NOSUID
NOEXEC = C.MNT_NOEXEC
SYNCHRONOUS = C.MNT_SYNCHRONOUS
NOATIME = C.MNT_NOATIME
// RDONLY will mount the filesystem as read-only.
RDONLY = C.MNT_RDONLY
// NOSUID will not allow set-user-identifier or set-group-identifier bits to
// take effect.
NOSUID = C.MNT_NOSUID
// NOEXEC will not allow execution of any binaries on the mounted file system.
NOEXEC = C.MNT_NOEXEC
// SYNCHRONOUS will allow any I/O to the file system to be done synchronously.
SYNCHRONOUS = C.MNT_SYNCHRONOUS
// NOATIME will not update the file access time when reading from a file.
NOATIME = C.MNT_NOATIME
)
// These flags are unsupported.
const (
BIND = 0
DIRSYNC = 0
MANDLOCK = 0

View File

@ -5,26 +5,81 @@ import (
)
const (
RDONLY = syscall.MS_RDONLY
NOSUID = syscall.MS_NOSUID
NODEV = syscall.MS_NODEV
NOEXEC = syscall.MS_NOEXEC
// RDONLY will mount the file system read-only.
RDONLY = syscall.MS_RDONLY
// NOSUID will not allow set-user-identifier or set-group-identifier bits to
// take effect.
NOSUID = syscall.MS_NOSUID
// NODEV will not interpret character or block special devices on the file
// system.
NODEV = syscall.MS_NODEV
// NOEXEC will not allow execution of any binaries on the mounted file system.
NOEXEC = syscall.MS_NOEXEC
// SYNCHRONOUS will allow I/O to the file system to be done synchronously.
SYNCHRONOUS = syscall.MS_SYNCHRONOUS
DIRSYNC = syscall.MS_DIRSYNC
REMOUNT = syscall.MS_REMOUNT
MANDLOCK = syscall.MS_MANDLOCK
NOATIME = syscall.MS_NOATIME
NODIRATIME = syscall.MS_NODIRATIME
BIND = syscall.MS_BIND
RBIND = syscall.MS_BIND | syscall.MS_REC
UNBINDABLE = syscall.MS_UNBINDABLE
// DIRSYNC will force all directory updates within the file system to be done
// synchronously. This affects the following system calls: creat, link,
// unlink, symlink, mkdir, rmdir, mknod and rename.
DIRSYNC = syscall.MS_DIRSYNC
// REMOUNT will attempt to remount an already-mounted file system. This is
// commonly used to change the mount flags for a file system, especially to
// make a readonly file system writeable. It does not change device or mount
// point.
REMOUNT = syscall.MS_REMOUNT
// MANDLOCK will force mandatory locks on a filesystem.
MANDLOCK = syscall.MS_MANDLOCK
// NOATIME will not update the file access time when reading from a file.
NOATIME = syscall.MS_NOATIME
// NODIRATIME will not update the directory access time.
NODIRATIME = syscall.MS_NODIRATIME
// BIND remounts a subtree somewhere else.
BIND = syscall.MS_BIND
// RBIND remounts a subtree and all possible submounts somewhere else.
RBIND = syscall.MS_BIND | syscall.MS_REC
// UNBINDABLE creates a mount which cannot be cloned through a bind operation.
UNBINDABLE = syscall.MS_UNBINDABLE
// RUNBINDABLE marks the entire mount tree as UNBINDABLE.
RUNBINDABLE = syscall.MS_UNBINDABLE | syscall.MS_REC
PRIVATE = syscall.MS_PRIVATE
RPRIVATE = syscall.MS_PRIVATE | syscall.MS_REC
SLAVE = syscall.MS_SLAVE
RSLAVE = syscall.MS_SLAVE | syscall.MS_REC
SHARED = syscall.MS_SHARED
RSHARED = syscall.MS_SHARED | syscall.MS_REC
RELATIME = syscall.MS_RELATIME
// PRIVATE creates a mount which carries no propagation abilities.
PRIVATE = syscall.MS_PRIVATE
// RPRIVATE marks the entire mount tree as PRIVATE.
RPRIVATE = syscall.MS_PRIVATE | syscall.MS_REC
// SLAVE creates a mount which receives propagation from its master, but not
// vice versa.
SLAVE = syscall.MS_SLAVE
// RSLAVE marks the entire mount tree as SLAVE.
RSLAVE = syscall.MS_SLAVE | syscall.MS_REC
// SHARED creates a mount which provides the ability to create mirrors of
// that mount such that mounts and unmounts within any of the mirrors
// propagate to the other mirrors.
SHARED = syscall.MS_SHARED
// RSHARED marks the entire mount tree as SHARED.
RSHARED = syscall.MS_SHARED | syscall.MS_REC
// RELATIME updates inode access times relative to modify or change time.
RELATIME = syscall.MS_RELATIME
// STRICTATIME allows to explicitly request full atime updates. This makes
// it possible for the kernel to default to relatime or noatime but still
// allow userspace to override it.
STRICTATIME = syscall.MS_STRICTATIME
)

View File

@ -2,6 +2,7 @@
package mount
// These flags are unsupported.
const (
BIND = 0
DIRSYNC = 0

View File

@ -4,11 +4,12 @@ import (
"time"
)
func GetMounts() ([]*MountInfo, error) {
// GetMounts retrieves a list of mounts for the current running process.
func GetMounts() ([]*Info, error) {
return parseMountTable()
}
// Looks at /proc/self/mountinfo to determine of the specified
// Mounted looks at /proc/self/mountinfo to determine of the specified
// mountpoint has been mounted
func Mounted(mountpoint string) (bool, error) {
entries, err := parseMountTable()
@ -25,9 +26,10 @@ func Mounted(mountpoint string) (bool, error) {
return false, nil
}
// Mount the specified options at the target path only if
// the target is not mounted
// Options must be specified as fstab style
// Mount will mount filesystem according to the specified configuration, on the
// condition that the target path is *not* already mounted. Options must be
// specified like the mount or fstab unix commands: "opt1=val1,opt2=val2". See
// flags.go for supported option flags.
func Mount(device, target, mType, options string) error {
flag, _ := parseOptions(options)
if flag&REMOUNT != REMOUNT {
@ -38,9 +40,10 @@ func Mount(device, target, mType, options string) error {
return ForceMount(device, target, mType, options)
}
// Mount the specified options at the target path
// reguardless if the target is mounted or not
// Options must be specified as fstab style
// ForceMount will mount a filesystem according to the specified configuration,
// *regardless* if the target path is not already mounted. Options must be
// specified like the mount or fstab unix commands: "opt1=val1,opt2=val2". See
// flags.go for supported option flags.
func ForceMount(device, target, mType, options string) error {
flag, data := parseOptions(options)
if err := mount(device, target, mType, uintptr(flag), data); err != nil {
@ -49,7 +52,7 @@ func ForceMount(device, target, mType, options string) error {
return nil
}
// Unmount the target only if it is mounted
// Unmount will unmount the target filesystem, so long as it is mounted.
func Unmount(target string) error {
if mounted, err := Mounted(target); err != nil || !mounted {
return err
@ -57,7 +60,8 @@ func Unmount(target string) error {
return ForceUnmount(target)
}
// Unmount the target reguardless if it is mounted or not
// ForceUnmount will force an unmount of the target filesystem, regardless if
// it is mounted or not.
func ForceUnmount(target string) (err error) {
// Simple retry logic for unmount
for i := 0; i < 10; i++ {

View File

@ -1,7 +1,40 @@
package mount
type MountInfo struct {
Id, Parent, Major, Minor int
Root, Mountpoint, Opts, Optional string
Fstype, Source, VfsOpts string
// Info reveals information about a particular mounted filesystem. This
// struct is populated from the content in the /proc/<pid>/mountinfo file.
type Info struct {
// ID is a unique identifier of the mount (may be reused after umount).
ID int
// Parent indicates the ID of the mount parent (or of self for the top of the
// mount tree).
Parent int
// Major indicates one half of the device ID which identifies the device class.
Major int
// Minor indicates one half of the device ID which identifies a specific
// instance of device.
Minor int
// Root of the mount within the filesystem.
Root string
// Mountpoint indicates the mount point relative to the process's root.
Mountpoint string
// Opts represents mount-specific options.
Opts string
// Optional represents optional fields.
Optional string
// Fstype indicates the type of filesystem, such as EXT3.
Fstype string
// Source indicates filesystem specific information or "none".
Source string
// VfsOpts represents per super block options.
VfsOpts string
}

View File

@ -13,8 +13,9 @@ import (
"unsafe"
)
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from bind mounts
func parseMountTable() ([]*MountInfo, error) {
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
// bind mounts.
func parseMountTable() ([]*Info, error) {
var rawEntries *C.struct_statfs
count := int(C.getmntinfo(&rawEntries, C.MNT_WAIT))
@ -28,9 +29,9 @@ func parseMountTable() ([]*MountInfo, error) {
header.Len = count
header.Data = uintptr(unsafe.Pointer(rawEntries))
var out []*MountInfo
var out []*Info
for _, entry := range entries {
var mountinfo MountInfo
var mountinfo Info
mountinfo.Mountpoint = C.GoString(&entry.f_mntonname[0])
mountinfo.Source = C.GoString(&entry.f_mntfromname[0])
mountinfo.Fstype = C.GoString(&entry.f_fstypename[0])

View File

@ -28,8 +28,9 @@ const (
mountinfoFormat = "%d %d %d:%d %s %s %s %s"
)
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from bind mounts
func parseMountTable() ([]*MountInfo, error) {
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
// bind mounts
func parseMountTable() ([]*Info, error) {
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return nil, err
@ -39,10 +40,10 @@ func parseMountTable() ([]*MountInfo, error) {
return parseInfoFile(f)
}
func parseInfoFile(r io.Reader) ([]*MountInfo, error) {
func parseInfoFile(r io.Reader) ([]*Info, error) {
var (
s = bufio.NewScanner(r)
out = []*MountInfo{}
out = []*Info{}
)
for s.Scan() {
@ -51,13 +52,13 @@ func parseInfoFile(r io.Reader) ([]*MountInfo, error) {
}
var (
p = &MountInfo{}
p = &Info{}
text = s.Text()
optionalFields string
)
if _, err := fmt.Sscanf(text, mountinfoFormat,
&p.Id, &p.Parent, &p.Major, &p.Minor,
&p.ID, &p.Parent, &p.Major, &p.Minor,
&p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil {
return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err)
}
@ -80,8 +81,10 @@ func parseInfoFile(r io.Reader) ([]*MountInfo, error) {
return out, nil
}
// PidMountInfo collects the mounts for a specific Pid
func PidMountInfo(pid int) ([]*MountInfo, error) {
// PidMountInfo collects the mounts for a specific process ID. If the process
// ID is unknown, it is better to use `GetMounts` which will inspect
// "/proc/self/mountinfo" instead.
func PidMountInfo(pid int) ([]*Info, error) {
f, err := os.Open(fmt.Sprintf("/proc/%d/mountinfo", pid))
if err != nil {
return nil, err

View File

@ -7,6 +7,6 @@ import (
"runtime"
)
func parseMountTable() ([]*MountInfo, error) {
func parseMountTable() ([]*Info, error) {
return nil, fmt.Errorf("mount.parseMountTable is not implemented on %s/%s", runtime.GOOS, runtime.GOARCH)
}

View File

@ -2,34 +2,50 @@
package mount
// MakeShared ensures a mounted filesystem has the SHARED mount option enabled.
// See the supported options in flags.go for further reference.
func MakeShared(mountPoint string) error {
return ensureMountedAs(mountPoint, "shared")
}
// MakeRShared ensures a mounted filesystem has the RSHARED mount option enabled.
// See the supported options in flags.go for further reference.
func MakeRShared(mountPoint string) error {
return ensureMountedAs(mountPoint, "rshared")
}
// MakePrivate ensures a mounted filesystem has the PRIVATE mount option enabled.
// See the supported options in flags.go for further reference.
func MakePrivate(mountPoint string) error {
return ensureMountedAs(mountPoint, "private")
}
// MakeRPrivate ensures a mounted filesystem has the RPRIVATE mount option
// enabled. See the supported options in flags.go for further reference.
func MakeRPrivate(mountPoint string) error {
return ensureMountedAs(mountPoint, "rprivate")
}
// MakeSlave ensures a mounted filesystem has the SLAVE mount option enabled.
// See the supported options in flags.go for further reference.
func MakeSlave(mountPoint string) error {
return ensureMountedAs(mountPoint, "slave")
}
// MakeRSlave ensures a mounted filesystem has the RSLAVE mount option enabled.
// See the supported options in flags.go for further reference.
func MakeRSlave(mountPoint string) error {
return ensureMountedAs(mountPoint, "rslave")
}
// MakeUnbindable ensures a mounted filesystem has the UNBINDABLE mount option
// enabled. See the supported options in flags.go for further reference.
func MakeUnbindable(mountPoint string) error {
return ensureMountedAs(mountPoint, "unbindable")
}
// MakeRUnbindable ensures a mounted filesystem has the RUNBINDABLE mount
// option enabled. See the supported options in flags.go for further reference.
func MakeRUnbindable(mountPoint string) error {
return ensureMountedAs(mountPoint, "runbindable")
}

View File

@ -1,73 +0,0 @@
// +build !go1.3
package pools
import (
"bufio"
"io"
"github.com/docker/docker/pkg/ioutils"
)
var (
BufioReader32KPool *BufioReaderPool
BufioWriter32KPool *BufioWriterPool
)
const buffer32K = 32 * 1024
type BufioReaderPool struct {
size int
}
func init() {
BufioReader32KPool = newBufioReaderPoolWithSize(buffer32K)
BufioWriter32KPool = newBufioWriterPoolWithSize(buffer32K)
}
func newBufioReaderPoolWithSize(size int) *BufioReaderPool {
return &BufioReaderPool{size: size}
}
func (bufPool *BufioReaderPool) Get(r io.Reader) *bufio.Reader {
return bufio.NewReaderSize(r, bufPool.size)
}
func (bufPool *BufioReaderPool) Put(b *bufio.Reader) {
b.Reset(nil)
}
func (bufPool *BufioReaderPool) NewReadCloserWrapper(buf *bufio.Reader, r io.Reader) io.ReadCloser {
return ioutils.NewReadCloserWrapper(r, func() error {
if readCloser, ok := r.(io.ReadCloser); ok {
return readCloser.Close()
}
return nil
})
}
type BufioWriterPool struct {
size int
}
func newBufioWriterPoolWithSize(size int) *BufioWriterPool {
return &BufioWriterPool{size: size}
}
func (bufPool *BufioWriterPool) Get(w io.Writer) *bufio.Writer {
return bufio.NewWriterSize(w, bufPool.size)
}
func (bufPool *BufioWriterPool) Put(b *bufio.Writer) {
b.Reset(nil)
}
func (bufPool *BufioWriterPool) NewWriteCloserWrapper(buf *bufio.Writer, w io.Writer) io.WriteCloser {
return ioutils.NewWriteCloserWrapper(w, func() error {
buf.Flush()
if writeCloser, ok := w.(io.WriteCloser); ok {
return writeCloser.Close()
}
return nil
})
}

View File

@ -0,0 +1,191 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2014-2015 Docker, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,27 @@
Copyright (c) 2014-2015 The Docker & Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,5 @@
Package symlink implements EvalSymlinksInScope which is an extension of filepath.EvalSymlinks
from the [Go standard library](https://golang.org/pkg/path/filepath).
The code from filepath.EvalSymlinks has been adapted in fs.go.
Please read the LICENSE.BSD file that governs fs.go and LICENSE.APACHE for fs_test.go.

View File

@ -0,0 +1,131 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.BSD file.
// This code is a modified version of path/filepath/symlink.go from the Go standard library.
package symlink
import (
"bytes"
"errors"
"os"
"path/filepath"
"strings"
)
// FollowSymlinkInScope is a wrapper around evalSymlinksInScope that returns an absolute path
func FollowSymlinkInScope(path, root string) (string, error) {
path, err := filepath.Abs(path)
if err != nil {
return "", err
}
root, err = filepath.Abs(root)
if err != nil {
return "", err
}
return evalSymlinksInScope(path, root)
}
// evalSymlinksInScope will evaluate symlinks in `path` within a scope `root` and return
// a result guaranteed to be contained within the scope `root`, at the time of the call.
// Symlinks in `root` are not evaluated and left as-is.
// Errors encountered while attempting to evaluate symlinks in path will be returned.
// Non-existing paths are valid and do not constitute an error.
// `path` has to contain `root` as a prefix, or else an error will be returned.
// Trying to break out from `root` does not constitute an error.
//
// Example:
// If /foo/bar -> /outside,
// FollowSymlinkInScope("/foo/bar", "/foo") == "/foo/outside" instead of "/oustide"
//
// IMPORTANT: it is the caller's responsibility to call evalSymlinksInScope *after* relevant symlinks
// are created and not to create subsequently, additional symlinks that could potentially make a
// previously-safe path, unsafe. Example: if /foo/bar does not exist, evalSymlinksInScope("/foo/bar", "/foo")
// would return "/foo/bar". If one makes /foo/bar a symlink to /baz subsequently, then "/foo/bar" should
// no longer be considered safely contained in "/foo".
func evalSymlinksInScope(path, root string) (string, error) {
root = filepath.Clean(root)
if path == root {
return path, nil
}
if !strings.HasPrefix(path, root) {
return "", errors.New("evalSymlinksInScope: " + path + " is not in " + root)
}
const maxIter = 255
originalPath := path
// given root of "/a" and path of "/a/b/../../c" we want path to be "/b/../../c"
path = path[len(root):]
if root == string(filepath.Separator) {
path = string(filepath.Separator) + path
}
if !strings.HasPrefix(path, string(filepath.Separator)) {
return "", errors.New("evalSymlinksInScope: " + path + " is not in " + root)
}
path = filepath.Clean(path)
// consume path by taking each frontmost path element,
// expanding it if it's a symlink, and appending it to b
var b bytes.Buffer
// b here will always be considered to be the "current absolute path inside
// root" when we append paths to it, we also append a slash and use
// filepath.Clean after the loop to trim the trailing slash
for n := 0; path != ""; n++ {
if n > maxIter {
return "", errors.New("evalSymlinksInScope: too many links in " + originalPath)
}
// find next path component, p
i := strings.IndexRune(path, filepath.Separator)
var p string
if i == -1 {
p, path = path, ""
} else {
p, path = path[:i], path[i+1:]
}
if p == "" {
continue
}
// this takes a b.String() like "b/../" and a p like "c" and turns it
// into "/b/../c" which then gets filepath.Cleaned into "/c" and then
// root gets prepended and we Clean again (to remove any trailing slash
// if the first Clean gave us just "/")
cleanP := filepath.Clean(string(filepath.Separator) + b.String() + p)
if cleanP == string(filepath.Separator) {
// never Lstat "/" itself
b.Reset()
continue
}
fullP := filepath.Clean(root + cleanP)
fi, err := os.Lstat(fullP)
if os.IsNotExist(err) {
// if p does not exist, accept it
b.WriteString(p)
b.WriteRune(filepath.Separator)
continue
}
if err != nil {
return "", err
}
if fi.Mode()&os.ModeSymlink == 0 {
b.WriteString(p + string(filepath.Separator))
continue
}
// it's a symlink, put it at the front of path
dest, err := os.Readlink(fullP)
if err != nil {
return "", err
}
if filepath.IsAbs(dest) {
b.Reset()
}
path = dest + string(filepath.Separator) + path
}
// see note above on "fullP := ..." for why this is double-cleaned and
// what's happening here
return filepath.Clean(root + filepath.Clean(string(filepath.Separator)+b.String())), nil
}

View File

@ -1,2 +0,0 @@
Michael Crosby <michael@crosbymichael.com> (@crosbymichael)
Victor Vieux <vieux@docker.com> (@vieux)

View File

@ -1,16 +0,0 @@
// +build !windows
package system
import (
"syscall"
)
func Lstat(path string) (*Stat, error) {
s := &syscall.Stat_t{}
err := syscall.Lstat(path, s)
if err != nil {
return nil, err
}
return fromStatT(s)
}

View File

@ -1,8 +0,0 @@
// +build windows
package system
func Lstat(path string) (*Stat, error) {
// should not be called on cli code path
return nil, ErrNotSupportedPlatform
}

View File

@ -1,42 +0,0 @@
package system
import (
"syscall"
)
type Stat struct {
mode uint32
uid uint32
gid uint32
rdev uint64
size int64
mtim syscall.Timespec
}
func (s Stat) Mode() uint32 {
return s.mode
}
func (s Stat) Uid() uint32 {
return s.uid
}
func (s Stat) Gid() uint32 {
return s.gid
}
func (s Stat) Rdev() uint64 {
return s.rdev
}
func (s Stat) Size() int64 {
return s.size
}
func (s Stat) Mtim() syscall.Timespec {
return s.mtim
}
func (s Stat) GetLastModification() syscall.Timespec {
return s.Mtim()
}

View File

@ -1,14 +0,0 @@
package system
import (
"syscall"
)
func fromStatT(s *syscall.Stat_t) (*Stat, error) {
return &Stat{size: s.Size,
mode: s.Mode,
uid: s.Uid,
gid: s.Gid,
rdev: s.Rdev,
mtim: s.Mtim}, nil
}

View File

@ -1,16 +0,0 @@
// +build !linux,!windows
package system
import (
"syscall"
)
func fromStatT(s *syscall.Stat_t) (*Stat, error) {
return &Stat{size: s.Size,
mode: uint32(s.Mode),
uid: s.Uid,
gid: s.Gid,
rdev: uint64(s.Rdev),
mtim: s.Mtimespec}, nil
}

View File

@ -1,12 +0,0 @@
// +build windows
package system
import (
"errors"
"syscall"
)
func fromStatT(s *syscall.Win32FileAttributeData) (*Stat, error) {
return nil, errors.New("fromStatT should not be called on windows path")
}

View File

@ -1,2 +0,0 @@
Victor Vieux <vieux@docker.com> (@vieux)
Jessie Frazelle <jess@docker.com> (@jfrazelle)

View File

@ -27,5 +27,5 @@ func HumanDuration(d time.Duration) string {
} else if hours < 24*365*2 {
return fmt.Sprintf("%d months", hours/24/30)
}
return fmt.Sprintf("%f years", d.Hours()/24/365)
return fmt.Sprintf("%d years", int(d.Hours())/24/365)
}

View File

@ -37,23 +37,25 @@ var (
var decimapAbbrs = []string{"B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"}
var binaryAbbrs = []string{"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"}
// CustomSize returns a human-readable approximation of a size
// using custom format
func CustomSize(format string, size float64, base float64, _map []string) string {
i := 0
for size >= base {
size = size / base
i++
}
return fmt.Sprintf(format, size, _map[i])
}
// HumanSize returns a human-readable approximation of a size
// using SI standard (eg. "44kB", "17MB")
func HumanSize(size float64) string {
return intToString(float64(size), 1000.0, decimapAbbrs)
return CustomSize("%.4g %s", size, 1000.0, decimapAbbrs)
}
func BytesSize(size float64) string {
return intToString(size, 1024.0, binaryAbbrs)
}
func intToString(size, unit float64, _map []string) string {
i := 0
for size >= unit {
size = size / unit
i++
}
return fmt.Sprintf("%.4g %s", size, _map[i])
return CustomSize("%.4g %s", size, 1024.0, binaryAbbrs)
}
// FromHumanSize returns an integer from a human-readable specification of a

View File

@ -1,305 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package tar implements access to tar archives.
// It aims to cover most of the variations, including those produced
// by GNU and BSD tars.
//
// References:
// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5
// http://www.gnu.org/software/tar/manual/html_node/Standard.html
// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html
package tar
import (
"bytes"
"errors"
"fmt"
"os"
"path"
"time"
)
const (
blockSize = 512
// Types
TypeReg = '0' // regular file
TypeRegA = '\x00' // regular file
TypeLink = '1' // hard link
TypeSymlink = '2' // symbolic link
TypeChar = '3' // character device node
TypeBlock = '4' // block device node
TypeDir = '5' // directory
TypeFifo = '6' // fifo node
TypeCont = '7' // reserved
TypeXHeader = 'x' // extended header
TypeXGlobalHeader = 'g' // global extended header
TypeGNULongName = 'L' // Next file has a long name
TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name
TypeGNUSparse = 'S' // sparse file
)
// A Header represents a single header in a tar archive.
// Some fields may not be populated.
type Header struct {
Name string // name of header file entry
Mode int64 // permission and mode bits
Uid int // user id of owner
Gid int // group id of owner
Size int64 // length in bytes
ModTime time.Time // modified time
Typeflag byte // type of header entry
Linkname string // target name of link
Uname string // user name of owner
Gname string // group name of owner
Devmajor int64 // major number of character or block device
Devminor int64 // minor number of character or block device
AccessTime time.Time // access time
ChangeTime time.Time // status change time
Xattrs map[string]string
}
// File name constants from the tar spec.
const (
fileNameSize = 100 // Maximum number of bytes in a standard tar name.
fileNamePrefixSize = 155 // Maximum number of ustar extension bytes.
)
// FileInfo returns an os.FileInfo for the Header.
func (h *Header) FileInfo() os.FileInfo {
return headerFileInfo{h}
}
// headerFileInfo implements os.FileInfo.
type headerFileInfo struct {
h *Header
}
func (fi headerFileInfo) Size() int64 { return fi.h.Size }
func (fi headerFileInfo) IsDir() bool { return fi.Mode().IsDir() }
func (fi headerFileInfo) ModTime() time.Time { return fi.h.ModTime }
func (fi headerFileInfo) Sys() interface{} { return fi.h }
// Name returns the base name of the file.
func (fi headerFileInfo) Name() string {
if fi.IsDir() {
return path.Base(path.Clean(fi.h.Name))
}
return path.Base(fi.h.Name)
}
// Mode returns the permission and mode bits for the headerFileInfo.
func (fi headerFileInfo) Mode() (mode os.FileMode) {
// Set file permission bits.
mode = os.FileMode(fi.h.Mode).Perm()
// Set setuid, setgid and sticky bits.
if fi.h.Mode&c_ISUID != 0 {
// setuid
mode |= os.ModeSetuid
}
if fi.h.Mode&c_ISGID != 0 {
// setgid
mode |= os.ModeSetgid
}
if fi.h.Mode&c_ISVTX != 0 {
// sticky
mode |= os.ModeSticky
}
// Set file mode bits.
// clear perm, setuid, setgid and sticky bits.
m := os.FileMode(fi.h.Mode) &^ 07777
if m == c_ISDIR {
// directory
mode |= os.ModeDir
}
if m == c_ISFIFO {
// named pipe (FIFO)
mode |= os.ModeNamedPipe
}
if m == c_ISLNK {
// symbolic link
mode |= os.ModeSymlink
}
if m == c_ISBLK {
// device file
mode |= os.ModeDevice
}
if m == c_ISCHR {
// Unix character device
mode |= os.ModeDevice
mode |= os.ModeCharDevice
}
if m == c_ISSOCK {
// Unix domain socket
mode |= os.ModeSocket
}
switch fi.h.Typeflag {
case TypeLink, TypeSymlink:
// hard link, symbolic link
mode |= os.ModeSymlink
case TypeChar:
// character device node
mode |= os.ModeDevice
mode |= os.ModeCharDevice
case TypeBlock:
// block device node
mode |= os.ModeDevice
case TypeDir:
// directory
mode |= os.ModeDir
case TypeFifo:
// fifo node
mode |= os.ModeNamedPipe
}
return mode
}
// sysStat, if non-nil, populates h from system-dependent fields of fi.
var sysStat func(fi os.FileInfo, h *Header) error
// Mode constants from the tar spec.
const (
c_ISUID = 04000 // Set uid
c_ISGID = 02000 // Set gid
c_ISVTX = 01000 // Save text (sticky bit)
c_ISDIR = 040000 // Directory
c_ISFIFO = 010000 // FIFO
c_ISREG = 0100000 // Regular file
c_ISLNK = 0120000 // Symbolic link
c_ISBLK = 060000 // Block special file
c_ISCHR = 020000 // Character special file
c_ISSOCK = 0140000 // Socket
)
// Keywords for the PAX Extended Header
const (
paxAtime = "atime"
paxCharset = "charset"
paxComment = "comment"
paxCtime = "ctime" // please note that ctime is not a valid pax header.
paxGid = "gid"
paxGname = "gname"
paxLinkpath = "linkpath"
paxMtime = "mtime"
paxPath = "path"
paxSize = "size"
paxUid = "uid"
paxUname = "uname"
paxXattr = "SCHILY.xattr."
paxNone = ""
)
// FileInfoHeader creates a partially-populated Header from fi.
// If fi describes a symlink, FileInfoHeader records link as the link target.
// If fi describes a directory, a slash is appended to the name.
// Because os.FileInfo's Name method returns only the base name of
// the file it describes, it may be necessary to modify the Name field
// of the returned header to provide the full path name of the file.
func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) {
if fi == nil {
return nil, errors.New("tar: FileInfo is nil")
}
fm := fi.Mode()
h := &Header{
Name: fi.Name(),
ModTime: fi.ModTime(),
Mode: int64(fm.Perm()), // or'd with c_IS* constants later
}
switch {
case fm.IsRegular():
h.Mode |= c_ISREG
h.Typeflag = TypeReg
h.Size = fi.Size()
case fi.IsDir():
h.Typeflag = TypeDir
h.Mode |= c_ISDIR
h.Name += "/"
case fm&os.ModeSymlink != 0:
h.Typeflag = TypeSymlink
h.Mode |= c_ISLNK
h.Linkname = link
case fm&os.ModeDevice != 0:
if fm&os.ModeCharDevice != 0 {
h.Mode |= c_ISCHR
h.Typeflag = TypeChar
} else {
h.Mode |= c_ISBLK
h.Typeflag = TypeBlock
}
case fm&os.ModeNamedPipe != 0:
h.Typeflag = TypeFifo
h.Mode |= c_ISFIFO
case fm&os.ModeSocket != 0:
h.Mode |= c_ISSOCK
default:
return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm)
}
if fm&os.ModeSetuid != 0 {
h.Mode |= c_ISUID
}
if fm&os.ModeSetgid != 0 {
h.Mode |= c_ISGID
}
if fm&os.ModeSticky != 0 {
h.Mode |= c_ISVTX
}
if sysStat != nil {
return h, sysStat(fi, h)
}
return h, nil
}
var zeroBlock = make([]byte, blockSize)
// POSIX specifies a sum of the unsigned byte values, but the Sun tar uses signed byte values.
// We compute and return both.
func checksum(header []byte) (unsigned int64, signed int64) {
for i := 0; i < len(header); i++ {
if i == 148 {
// The chksum field (header[148:156]) is special: it should be treated as space bytes.
unsigned += ' ' * 8
signed += ' ' * 8
i += 7
continue
}
unsigned += int64(header[i])
signed += int64(int8(header[i]))
}
return
}
type slicer []byte
func (sp *slicer) next(n int) (b []byte) {
s := *sp
b, *sp = s[0:n], s[n:]
return
}
func isASCII(s string) bool {
for _, c := range s {
if c >= 0x80 {
return false
}
}
return true
}
func toASCII(s string) string {
if isASCII(s) {
return s
}
var buf bytes.Buffer
for _, c := range s {
if c < 0x80 {
buf.WriteByte(byte(c))
}
}
return buf.String()
}

View File

@ -1,820 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tar
// TODO(dsymonds):
// - pax extensions
import (
"bytes"
"errors"
"io"
"io/ioutil"
"os"
"strconv"
"strings"
"time"
)
var (
ErrHeader = errors.New("archive/tar: invalid tar header")
)
const maxNanoSecondIntSize = 9
// A Reader provides sequential access to the contents of a tar archive.
// A tar archive consists of a sequence of files.
// The Next method advances to the next file in the archive (including the first),
// and then it can be treated as an io.Reader to access the file's data.
type Reader struct {
r io.Reader
err error
pad int64 // amount of padding (ignored) after current file entry
curr numBytesReader // reader for current file entry
hdrBuff [blockSize]byte // buffer to use in readHeader
}
// A numBytesReader is an io.Reader with a numBytes method, returning the number
// of bytes remaining in the underlying encoded data.
type numBytesReader interface {
io.Reader
numBytes() int64
}
// A regFileReader is a numBytesReader for reading file data from a tar archive.
type regFileReader struct {
r io.Reader // underlying reader
nb int64 // number of unread bytes for current file entry
}
// A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
type sparseFileReader struct {
rfr *regFileReader // reads the sparse-encoded file data
sp []sparseEntry // the sparse map for the file
pos int64 // keeps track of file position
tot int64 // total size of the file
}
// Keywords for GNU sparse files in a PAX extended header
const (
paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
paxGNUSparseOffset = "GNU.sparse.offset"
paxGNUSparseNumBytes = "GNU.sparse.numbytes"
paxGNUSparseMap = "GNU.sparse.map"
paxGNUSparseName = "GNU.sparse.name"
paxGNUSparseMajor = "GNU.sparse.major"
paxGNUSparseMinor = "GNU.sparse.minor"
paxGNUSparseSize = "GNU.sparse.size"
paxGNUSparseRealSize = "GNU.sparse.realsize"
)
// Keywords for old GNU sparse headers
const (
oldGNUSparseMainHeaderOffset = 386
oldGNUSparseMainHeaderIsExtendedOffset = 482
oldGNUSparseMainHeaderNumEntries = 4
oldGNUSparseExtendedHeaderIsExtendedOffset = 504
oldGNUSparseExtendedHeaderNumEntries = 21
oldGNUSparseOffsetSize = 12
oldGNUSparseNumBytesSize = 12
)
// NewReader creates a new Reader reading from r.
func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
// Next advances to the next entry in the tar archive.
func (tr *Reader) Next() (*Header, error) {
var hdr *Header
if tr.err == nil {
tr.skipUnread()
}
if tr.err != nil {
return hdr, tr.err
}
hdr = tr.readHeader()
if hdr == nil {
return hdr, tr.err
}
// Check for PAX/GNU header.
switch hdr.Typeflag {
case TypeXHeader:
// PAX extended header
headers, err := parsePAX(tr)
if err != nil {
return nil, err
}
// We actually read the whole file,
// but this skips alignment padding
tr.skipUnread()
hdr = tr.readHeader()
mergePAX(hdr, headers)
// Check for a PAX format sparse file
sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers)
if err != nil {
tr.err = err
return nil, err
}
if sp != nil {
// Current file is a PAX format GNU sparse file.
// Set the current file reader to a sparse file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
}
return hdr, nil
case TypeGNULongName:
// We have a GNU long name header. Its contents are the real file name.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
hdr, err := tr.Next()
hdr.Name = cString(realname)
return hdr, err
case TypeGNULongLink:
// We have a GNU long link header.
realname, err := ioutil.ReadAll(tr)
if err != nil {
return nil, err
}
hdr, err := tr.Next()
hdr.Linkname = cString(realname)
return hdr, err
}
return hdr, tr.err
}
// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
// be treated as a regular file.
func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
var sparseFormat string
// Check for sparse format indicators
major, majorOk := headers[paxGNUSparseMajor]
minor, minorOk := headers[paxGNUSparseMinor]
sparseName, sparseNameOk := headers[paxGNUSparseName]
_, sparseMapOk := headers[paxGNUSparseMap]
sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
// Identify which, if any, sparse format applies from which PAX headers are set
if majorOk && minorOk {
sparseFormat = major + "." + minor
} else if sparseNameOk && sparseMapOk {
sparseFormat = "0.1"
} else if sparseSizeOk {
sparseFormat = "0.0"
} else {
// Not a PAX format GNU sparse file.
return nil, nil
}
// Check for unknown sparse format
if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
return nil, nil
}
// Update hdr from GNU sparse PAX headers
if sparseNameOk {
hdr.Name = sparseName
}
if sparseSizeOk {
realSize, err := strconv.ParseInt(sparseSize, 10, 0)
if err != nil {
return nil, ErrHeader
}
hdr.Size = realSize
} else if sparseRealSizeOk {
realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
if err != nil {
return nil, ErrHeader
}
hdr.Size = realSize
}
// Set up the sparse map, according to the particular sparse format in use
var sp []sparseEntry
var err error
switch sparseFormat {
case "0.0", "0.1":
sp, err = readGNUSparseMap0x1(headers)
case "1.0":
sp, err = readGNUSparseMap1x0(tr.curr)
}
return sp, err
}
// mergePAX merges well known headers according to PAX standard.
// In general headers with the same name as those found
// in the header struct overwrite those found in the header
// struct with higher precision or longer values. Esp. useful
// for name and linkname fields.
func mergePAX(hdr *Header, headers map[string]string) error {
for k, v := range headers {
switch k {
case paxPath:
hdr.Name = v
case paxLinkpath:
hdr.Linkname = v
case paxGname:
hdr.Gname = v
case paxUname:
hdr.Uname = v
case paxUid:
uid, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return err
}
hdr.Uid = int(uid)
case paxGid:
gid, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return err
}
hdr.Gid = int(gid)
case paxAtime:
t, err := parsePAXTime(v)
if err != nil {
return err
}
hdr.AccessTime = t
case paxMtime:
t, err := parsePAXTime(v)
if err != nil {
return err
}
hdr.ModTime = t
case paxCtime:
t, err := parsePAXTime(v)
if err != nil {
return err
}
hdr.ChangeTime = t
case paxSize:
size, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return err
}
hdr.Size = int64(size)
default:
if strings.HasPrefix(k, paxXattr) {
if hdr.Xattrs == nil {
hdr.Xattrs = make(map[string]string)
}
hdr.Xattrs[k[len(paxXattr):]] = v
}
}
}
return nil
}
// parsePAXTime takes a string of the form %d.%d as described in
// the PAX specification.
func parsePAXTime(t string) (time.Time, error) {
buf := []byte(t)
pos := bytes.IndexByte(buf, '.')
var seconds, nanoseconds int64
var err error
if pos == -1 {
seconds, err = strconv.ParseInt(t, 10, 0)
if err != nil {
return time.Time{}, err
}
} else {
seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
if err != nil {
return time.Time{}, err
}
nano_buf := string(buf[pos+1:])
// Pad as needed before converting to a decimal.
// For example .030 -> .030000000 -> 30000000 nanoseconds
if len(nano_buf) < maxNanoSecondIntSize {
// Right pad
nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
} else if len(nano_buf) > maxNanoSecondIntSize {
// Right truncate
nano_buf = nano_buf[:maxNanoSecondIntSize]
}
nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
if err != nil {
return time.Time{}, err
}
}
ts := time.Unix(seconds, nanoseconds)
return ts, nil
}
// parsePAX parses PAX headers.
// If an extended header (type 'x') is invalid, ErrHeader is returned
func parsePAX(r io.Reader) (map[string]string, error) {
buf, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
// For GNU PAX sparse format 0.0 support.
// This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
var sparseMap bytes.Buffer
headers := make(map[string]string)
// Each record is constructed as
// "%d %s=%s\n", length, keyword, value
for len(buf) > 0 {
// or the header was empty to start with.
var sp int
// The size field ends at the first space.
sp = bytes.IndexByte(buf, ' ')
if sp == -1 {
return nil, ErrHeader
}
// Parse the first token as a decimal integer.
n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
if err != nil {
return nil, ErrHeader
}
// Extract everything between the decimal and the n -1 on the
// beginning to eat the ' ', -1 on the end to skip the newline.
var record []byte
record, buf = buf[sp+1:n-1], buf[n:]
// The first equals is guaranteed to mark the end of the key.
// Everything else is value.
eq := bytes.IndexByte(record, '=')
if eq == -1 {
return nil, ErrHeader
}
key, value := record[:eq], record[eq+1:]
keyStr := string(key)
if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
// GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
sparseMap.Write(value)
sparseMap.Write([]byte{','})
} else {
// Normal key. Set the value in the headers map.
headers[keyStr] = string(value)
}
}
if sparseMap.Len() != 0 {
// Add sparse info to headers, chopping off the extra comma
sparseMap.Truncate(sparseMap.Len() - 1)
headers[paxGNUSparseMap] = sparseMap.String()
}
return headers, nil
}
// cString parses bytes as a NUL-terminated C-style string.
// If a NUL byte is not found then the whole slice is returned as a string.
func cString(b []byte) string {
n := 0
for n < len(b) && b[n] != 0 {
n++
}
return string(b[0:n])
}
func (tr *Reader) octal(b []byte) int64 {
// Check for binary format first.
if len(b) > 0 && b[0]&0x80 != 0 {
var x int64
for i, c := range b {
if i == 0 {
c &= 0x7f // ignore signal bit in first byte
}
x = x<<8 | int64(c)
}
return x
}
// Because unused fields are filled with NULs, we need
// to skip leading NULs. Fields may also be padded with
// spaces or NULs.
// So we remove leading and trailing NULs and spaces to
// be sure.
b = bytes.Trim(b, " \x00")
if len(b) == 0 {
return 0
}
x, err := strconv.ParseUint(cString(b), 8, 64)
if err != nil {
tr.err = err
}
return int64(x)
}
// skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
func (tr *Reader) skipUnread() {
nr := tr.numBytes() + tr.pad // number of bytes to skip
tr.curr, tr.pad = nil, 0
if sr, ok := tr.r.(io.Seeker); ok {
if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
return
}
}
_, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
}
func (tr *Reader) verifyChecksum(header []byte) bool {
if tr.err != nil {
return false
}
given := tr.octal(header[148:156])
unsigned, signed := checksum(header)
return given == unsigned || given == signed
}
func (tr *Reader) readHeader() *Header {
header := tr.hdrBuff[:]
copy(header, zeroBlock)
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
}
// Two blocks of zero bytes marks the end of the archive.
if bytes.Equal(header, zeroBlock[0:blockSize]) {
if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
return nil
}
if bytes.Equal(header, zeroBlock[0:blockSize]) {
tr.err = io.EOF
} else {
tr.err = ErrHeader // zero block and then non-zero block
}
return nil
}
if !tr.verifyChecksum(header) {
tr.err = ErrHeader
return nil
}
// Unpack
hdr := new(Header)
s := slicer(header)
hdr.Name = cString(s.next(100))
hdr.Mode = tr.octal(s.next(8))
hdr.Uid = int(tr.octal(s.next(8)))
hdr.Gid = int(tr.octal(s.next(8)))
hdr.Size = tr.octal(s.next(12))
hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
s.next(8) // chksum
hdr.Typeflag = s.next(1)[0]
hdr.Linkname = cString(s.next(100))
// The remainder of the header depends on the value of magic.
// The original (v7) version of tar had no explicit magic field,
// so its magic bytes, like the rest of the block, are NULs.
magic := string(s.next(8)) // contains version field as well.
var format string
switch {
case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
if string(header[508:512]) == "tar\x00" {
format = "star"
} else {
format = "posix"
}
case magic == "ustar \x00": // old GNU tar
format = "gnu"
}
switch format {
case "posix", "gnu", "star":
hdr.Uname = cString(s.next(32))
hdr.Gname = cString(s.next(32))
devmajor := s.next(8)
devminor := s.next(8)
if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
hdr.Devmajor = tr.octal(devmajor)
hdr.Devminor = tr.octal(devminor)
}
var prefix string
switch format {
case "posix", "gnu":
prefix = cString(s.next(155))
case "star":
prefix = cString(s.next(131))
hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
}
if len(prefix) > 0 {
hdr.Name = prefix + "/" + hdr.Name
}
}
if tr.err != nil {
tr.err = ErrHeader
return nil
}
// Maximum value of hdr.Size is 64 GB (12 octal digits),
// so there's no risk of int64 overflowing.
nb := int64(hdr.Size)
tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
// Set the current file reader.
tr.curr = &regFileReader{r: tr.r, nb: nb}
// Check for old GNU sparse format entry.
if hdr.Typeflag == TypeGNUSparse {
// Get the real size of the file.
hdr.Size = tr.octal(header[483:495])
// Read the sparse map.
sp := tr.readOldGNUSparseMap(header)
if tr.err != nil {
return nil
}
// Current file is a GNU sparse file. Update the current file reader.
tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
}
return hdr
}
// A sparseEntry holds a single entry in a sparse file's sparse map.
// A sparse entry indicates the offset and size in a sparse file of a
// block of data.
type sparseEntry struct {
offset int64
numBytes int64
}
// readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
// The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
// then one or more extension headers are used to store the rest of the sparse map.
func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
spCap := oldGNUSparseMainHeaderNumEntries
if isExtended {
spCap += oldGNUSparseExtendedHeaderNumEntries
}
sp := make([]sparseEntry, 0, spCap)
s := slicer(header[oldGNUSparseMainHeaderOffset:])
// Read the four entries from the main tar header
for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
offset := tr.octal(s.next(oldGNUSparseOffsetSize))
numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
if tr.err != nil {
tr.err = ErrHeader
return nil
}
if offset == 0 && numBytes == 0 {
break
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
for isExtended {
// There are more entries. Read an extension header and parse its entries.
sparseHeader := make([]byte, blockSize)
if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
return nil
}
isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
s = slicer(sparseHeader)
for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
offset := tr.octal(s.next(oldGNUSparseOffsetSize))
numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
if tr.err != nil {
tr.err = ErrHeader
return nil
}
if offset == 0 && numBytes == 0 {
break
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
}
return sp
}
// readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0.
// The sparse map is stored just before the file data and padded out to the nearest block boundary.
func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
buf := make([]byte, 2*blockSize)
sparseHeader := buf[:blockSize]
// readDecimal is a helper function to read a decimal integer from the sparse map
// while making sure to read from the file in blocks of size blockSize
readDecimal := func() (int64, error) {
// Look for newline
nl := bytes.IndexByte(sparseHeader, '\n')
if nl == -1 {
if len(sparseHeader) >= blockSize {
// This is an error
return 0, ErrHeader
}
oldLen := len(sparseHeader)
newLen := oldLen + blockSize
if cap(sparseHeader) < newLen {
// There's more header, but we need to make room for the next block
copy(buf, sparseHeader)
sparseHeader = buf[:newLen]
} else {
// There's more header, and we can just reslice
sparseHeader = sparseHeader[:newLen]
}
// Now that sparseHeader is large enough, read next block
if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil {
return 0, err
}
// Look for a newline in the new data
nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n')
if nl == -1 {
// This is an error
return 0, ErrHeader
}
nl += oldLen // We want the position from the beginning
}
// Now that we've found a newline, read a number
n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
if err != nil {
return 0, ErrHeader
}
// Update sparseHeader to consume this number
sparseHeader = sparseHeader[nl+1:]
return n, nil
}
// Read the first block
if _, err := io.ReadFull(r, sparseHeader); err != nil {
return nil, err
}
// The first line contains the number of entries
numEntries, err := readDecimal()
if err != nil {
return nil, err
}
// Read all the entries
sp := make([]sparseEntry, 0, numEntries)
for i := int64(0); i < numEntries; i++ {
// Read the offset
offset, err := readDecimal()
if err != nil {
return nil, err
}
// Read numBytes
numBytes, err := readDecimal()
if err != nil {
return nil, err
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
return sp, nil
}
// readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
// The sparse map is stored in the PAX headers.
func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) {
// Get number of entries
numEntriesStr, ok := headers[paxGNUSparseNumBlocks]
if !ok {
return nil, ErrHeader
}
numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
if err != nil {
return nil, ErrHeader
}
sparseMap := strings.Split(headers[paxGNUSparseMap], ",")
// There should be two numbers in sparseMap for each entry
if int64(len(sparseMap)) != 2*numEntries {
return nil, ErrHeader
}
// Loop through the entries in the sparse map
sp := make([]sparseEntry, 0, numEntries)
for i := int64(0); i < numEntries; i++ {
offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0)
if err != nil {
return nil, ErrHeader
}
numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0)
if err != nil {
return nil, ErrHeader
}
sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
}
return sp, nil
}
// numBytes returns the number of bytes left to read in the current file's entry
// in the tar archive, or 0 if there is no current file.
func (tr *Reader) numBytes() int64 {
if tr.curr == nil {
// No current file, so no bytes
return 0
}
return tr.curr.numBytes()
}
// Read reads from the current entry in the tar archive.
// It returns 0, io.EOF when it reaches the end of that entry,
// until Next is called to advance to the next entry.
func (tr *Reader) Read(b []byte) (n int, err error) {
if tr.curr == nil {
return 0, io.EOF
}
n, err = tr.curr.Read(b)
if err != nil && err != io.EOF {
tr.err = err
}
return
}
func (rfr *regFileReader) Read(b []byte) (n int, err error) {
if rfr.nb == 0 {
// file consumed
return 0, io.EOF
}
if int64(len(b)) > rfr.nb {
b = b[0:rfr.nb]
}
n, err = rfr.r.Read(b)
rfr.nb -= int64(n)
if err == io.EOF && rfr.nb > 0 {
err = io.ErrUnexpectedEOF
}
return
}
// numBytes returns the number of bytes left to read in the file's data in the tar archive.
func (rfr *regFileReader) numBytes() int64 {
return rfr.nb
}
// readHole reads a sparse file hole ending at offset toOffset
func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
n64 := toOffset - sfr.pos
if n64 > int64(len(b)) {
n64 = int64(len(b))
}
n := int(n64)
for i := 0; i < n; i++ {
b[i] = 0
}
sfr.pos += n64
return n
}
// Read reads the sparse file data in expanded form.
func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
if len(sfr.sp) == 0 {
// No more data fragments to read from.
if sfr.pos < sfr.tot {
// We're in the last hole
n = sfr.readHole(b, sfr.tot)
return
}
// Otherwise, we're at the end of the file
return 0, io.EOF
}
if sfr.pos < sfr.sp[0].offset {
// We're in a hole
n = sfr.readHole(b, sfr.sp[0].offset)
return
}
// We're not in a hole, so we'll read from the next data fragment
posInFragment := sfr.pos - sfr.sp[0].offset
bytesLeft := sfr.sp[0].numBytes - posInFragment
if int64(len(b)) > bytesLeft {
b = b[0:bytesLeft]
}
n, err = sfr.rfr.Read(b)
sfr.pos += int64(n)
if int64(n) == bytesLeft {
// We're done with this fragment
sfr.sp = sfr.sp[1:]
}
if err == io.EOF && sfr.pos < sfr.tot {
// We reached the end of the last fragment's data, but there's a final hole
err = nil
}
return
}
// numBytes returns the number of bytes left to read in the sparse file's
// sparse-encoded data in the tar archive.
func (sfr *sparseFileReader) numBytes() int64 {
return sfr.rfr.nb
}

View File

@ -1,20 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build linux dragonfly openbsd solaris
package tar
import (
"syscall"
"time"
)
func statAtime(st *syscall.Stat_t) time.Time {
return time.Unix(st.Atim.Unix())
}
func statCtime(st *syscall.Stat_t) time.Time {
return time.Unix(st.Ctim.Unix())
}

View File

@ -1,20 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin freebsd netbsd
package tar
import (
"syscall"
"time"
)
func statAtime(st *syscall.Stat_t) time.Time {
return time.Unix(st.Atimespec.Unix())
}
func statCtime(st *syscall.Stat_t) time.Time {
return time.Unix(st.Ctimespec.Unix())
}

View File

@ -1,32 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build linux darwin dragonfly freebsd openbsd netbsd solaris
package tar
import (
"os"
"syscall"
)
func init() {
sysStat = statUnix
}
func statUnix(fi os.FileInfo, h *Header) error {
sys, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
return nil
}
h.Uid = int(sys.Uid)
h.Gid = int(sys.Gid)
// TODO(bradfitz): populate username & group. os/user
// doesn't cache LookupId lookups, and lacks group
// lookup functions.
h.AccessTime = statAtime(sys)
h.ChangeTime = statCtime(sys)
// TODO(bradfitz): major/minor device numbers?
return nil
}

View File

@ -1,396 +0,0 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tar
// TODO(dsymonds):
// - catch more errors (no first header, etc.)
import (
"bytes"
"errors"
"fmt"
"io"
"os"
"path"
"strconv"
"strings"
"time"
)
var (
ErrWriteTooLong = errors.New("archive/tar: write too long")
ErrFieldTooLong = errors.New("archive/tar: header field too long")
ErrWriteAfterClose = errors.New("archive/tar: write after close")
errNameTooLong = errors.New("archive/tar: name too long")
errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values")
)
// A Writer provides sequential writing of a tar archive in POSIX.1 format.
// A tar archive consists of a sequence of files.
// Call WriteHeader to begin a new file, and then call Write to supply that file's data,
// writing at most hdr.Size bytes in total.
type Writer struct {
w io.Writer
err error
nb int64 // number of unwritten bytes for current file entry
pad int64 // amount of padding to write after current file entry
closed bool
usedBinary bool // whether the binary numeric field extension was used
preferPax bool // use pax header instead of binary numeric header
hdrBuff [blockSize]byte // buffer to use in writeHeader when writing a regular header
paxHdrBuff [blockSize]byte // buffer to use in writeHeader when writing a pax header
}
// NewWriter creates a new Writer writing to w.
func NewWriter(w io.Writer) *Writer { return &Writer{w: w} }
// Flush finishes writing the current file (optional).
func (tw *Writer) Flush() error {
if tw.nb > 0 {
tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb)
return tw.err
}
n := tw.nb + tw.pad
for n > 0 && tw.err == nil {
nr := n
if nr > blockSize {
nr = blockSize
}
var nw int
nw, tw.err = tw.w.Write(zeroBlock[0:nr])
n -= int64(nw)
}
tw.nb = 0
tw.pad = 0
return tw.err
}
// Write s into b, terminating it with a NUL if there is room.
// If the value is too long for the field and allowPax is true add a paxheader record instead
func (tw *Writer) cString(b []byte, s string, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
needsPaxHeader := allowPax && len(s) > len(b) || !isASCII(s)
if needsPaxHeader {
paxHeaders[paxKeyword] = s
return
}
if len(s) > len(b) {
if tw.err == nil {
tw.err = ErrFieldTooLong
}
return
}
ascii := toASCII(s)
copy(b, ascii)
if len(ascii) < len(b) {
b[len(ascii)] = 0
}
}
// Encode x as an octal ASCII string and write it into b with leading zeros.
func (tw *Writer) octal(b []byte, x int64) {
s := strconv.FormatInt(x, 8)
// leading zeros, but leave room for a NUL.
for len(s)+1 < len(b) {
s = "0" + s
}
tw.cString(b, s, false, paxNone, nil)
}
// Write x into b, either as octal or as binary (GNUtar/star extension).
// If the value is too long for the field and writingPax is enabled both for the field and the add a paxheader record instead
func (tw *Writer) numeric(b []byte, x int64, allowPax bool, paxKeyword string, paxHeaders map[string]string) {
// Try octal first.
s := strconv.FormatInt(x, 8)
if len(s) < len(b) {
tw.octal(b, x)
return
}
// If it is too long for octal, and pax is preferred, use a pax header
if allowPax && tw.preferPax {
tw.octal(b, 0)
s := strconv.FormatInt(x, 10)
paxHeaders[paxKeyword] = s
return
}
// Too big: use binary (big-endian).
tw.usedBinary = true
for i := len(b) - 1; x > 0 && i >= 0; i-- {
b[i] = byte(x)
x >>= 8
}
b[0] |= 0x80 // highest bit indicates binary format
}
var (
minTime = time.Unix(0, 0)
// There is room for 11 octal digits (33 bits) of mtime.
maxTime = minTime.Add((1<<33 - 1) * time.Second)
)
// WriteHeader writes hdr and prepares to accept the file's contents.
// WriteHeader calls Flush if it is not the first header.
// Calling after a Close will return ErrWriteAfterClose.
func (tw *Writer) WriteHeader(hdr *Header) error {
return tw.writeHeader(hdr, true)
}
// WriteHeader writes hdr and prepares to accept the file's contents.
// WriteHeader calls Flush if it is not the first header.
// Calling after a Close will return ErrWriteAfterClose.
// As this method is called internally by writePax header to allow it to
// suppress writing the pax header.
func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error {
if tw.closed {
return ErrWriteAfterClose
}
if tw.err == nil {
tw.Flush()
}
if tw.err != nil {
return tw.err
}
// a map to hold pax header records, if any are needed
paxHeaders := make(map[string]string)
// TODO(shanemhansen): we might want to use PAX headers for
// subsecond time resolution, but for now let's just capture
// too long fields or non ascii characters
var header []byte
// We need to select which scratch buffer to use carefully,
// since this method is called recursively to write PAX headers.
// If allowPax is true, this is the non-recursive call, and we will use hdrBuff.
// If allowPax is false, we are being called by writePAXHeader, and hdrBuff is
// already being used by the non-recursive call, so we must use paxHdrBuff.
header = tw.hdrBuff[:]
if !allowPax {
header = tw.paxHdrBuff[:]
}
copy(header, zeroBlock)
s := slicer(header)
// keep a reference to the filename to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
pathHeaderBytes := s.next(fileNameSize)
tw.cString(pathHeaderBytes, hdr.Name, true, paxPath, paxHeaders)
// Handle out of range ModTime carefully.
var modTime int64
if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) {
modTime = hdr.ModTime.Unix()
}
tw.octal(s.next(8), hdr.Mode) // 100:108
tw.numeric(s.next(8), int64(hdr.Uid), true, paxUid, paxHeaders) // 108:116
tw.numeric(s.next(8), int64(hdr.Gid), true, paxGid, paxHeaders) // 116:124
tw.numeric(s.next(12), hdr.Size, true, paxSize, paxHeaders) // 124:136
tw.numeric(s.next(12), modTime, false, paxNone, nil) // 136:148 --- consider using pax for finer granularity
s.next(8) // chksum (148:156)
s.next(1)[0] = hdr.Typeflag // 156:157
tw.cString(s.next(100), hdr.Linkname, true, paxLinkpath, paxHeaders)
copy(s.next(8), []byte("ustar\x0000")) // 257:265
tw.cString(s.next(32), hdr.Uname, true, paxUname, paxHeaders) // 265:297
tw.cString(s.next(32), hdr.Gname, true, paxGname, paxHeaders) // 297:329
tw.numeric(s.next(8), hdr.Devmajor, false, paxNone, nil) // 329:337
tw.numeric(s.next(8), hdr.Devminor, false, paxNone, nil) // 337:345
// keep a reference to the prefix to allow to overwrite it later if we detect that we can use ustar longnames instead of pax
prefixHeaderBytes := s.next(155)
tw.cString(prefixHeaderBytes, "", false, paxNone, nil) // 345:500 prefix
// Use the GNU magic instead of POSIX magic if we used any GNU extensions.
if tw.usedBinary {
copy(header[257:265], []byte("ustar \x00"))
}
_, paxPathUsed := paxHeaders[paxPath]
// try to use a ustar header when only the name is too long
if !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed {
suffix := hdr.Name
prefix := ""
if len(hdr.Name) > fileNameSize && isASCII(hdr.Name) {
var err error
prefix, suffix, err = tw.splitUSTARLongName(hdr.Name)
if err == nil {
// ok we can use a ustar long name instead of pax, now correct the fields
// remove the path field from the pax header. this will suppress the pax header
delete(paxHeaders, paxPath)
// update the path fields
tw.cString(pathHeaderBytes, suffix, false, paxNone, nil)
tw.cString(prefixHeaderBytes, prefix, false, paxNone, nil)
// Use the ustar magic if we used ustar long names.
if len(prefix) > 0 && !tw.usedBinary {
copy(header[257:265], []byte("ustar\x00"))
}
}
}
}
// The chksum field is terminated by a NUL and a space.
// This is different from the other octal fields.
chksum, _ := checksum(header)
tw.octal(header[148:155], chksum)
header[155] = ' '
if tw.err != nil {
// problem with header; probably integer too big for a field.
return tw.err
}
if allowPax {
for k, v := range hdr.Xattrs {
paxHeaders[paxXattr+k] = v
}
}
if len(paxHeaders) > 0 {
if !allowPax {
return errInvalidHeader
}
if err := tw.writePAXHeader(hdr, paxHeaders); err != nil {
return err
}
}
tw.nb = int64(hdr.Size)
tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize
_, tw.err = tw.w.Write(header)
return tw.err
}
// writeUSTARLongName splits a USTAR long name hdr.Name.
// name must be < 256 characters. errNameTooLong is returned
// if hdr.Name can't be split. The splitting heuristic
// is compatible with gnu tar.
func (tw *Writer) splitUSTARLongName(name string) (prefix, suffix string, err error) {
length := len(name)
if length > fileNamePrefixSize+1 {
length = fileNamePrefixSize + 1
} else if name[length-1] == '/' {
length--
}
i := strings.LastIndex(name[:length], "/")
// nlen contains the resulting length in the name field.
// plen contains the resulting length in the prefix field.
nlen := len(name) - i - 1
plen := i
if i <= 0 || nlen > fileNameSize || nlen == 0 || plen > fileNamePrefixSize {
err = errNameTooLong
return
}
prefix, suffix = name[:i], name[i+1:]
return
}
// writePaxHeader writes an extended pax header to the
// archive.
func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error {
// Prepare extended header
ext := new(Header)
ext.Typeflag = TypeXHeader
// Setting ModTime is required for reader parsing to
// succeed, and seems harmless enough.
ext.ModTime = hdr.ModTime
// The spec asks that we namespace our pseudo files
// with the current pid.
pid := os.Getpid()
dir, file := path.Split(hdr.Name)
fullName := path.Join(dir,
fmt.Sprintf("PaxHeaders.%d", pid), file)
ascii := toASCII(fullName)
if len(ascii) > 100 {
ascii = ascii[:100]
}
ext.Name = ascii
// Construct the body
var buf bytes.Buffer
for k, v := range paxHeaders {
fmt.Fprint(&buf, paxHeader(k+"="+v))
}
ext.Size = int64(len(buf.Bytes()))
if err := tw.writeHeader(ext, false); err != nil {
return err
}
if _, err := tw.Write(buf.Bytes()); err != nil {
return err
}
if err := tw.Flush(); err != nil {
return err
}
return nil
}
// paxHeader formats a single pax record, prefixing it with the appropriate length
func paxHeader(msg string) string {
const padding = 2 // Extra padding for space and newline
size := len(msg) + padding
size += len(strconv.Itoa(size))
record := fmt.Sprintf("%d %s\n", size, msg)
if len(record) != size {
// Final adjustment if adding size increased
// the number of digits in size
size = len(record)
record = fmt.Sprintf("%d %s\n", size, msg)
}
return record
}
// Write writes to the current entry in the tar archive.
// Write returns the error ErrWriteTooLong if more than
// hdr.Size bytes are written after WriteHeader.
func (tw *Writer) Write(b []byte) (n int, err error) {
if tw.closed {
err = ErrWriteTooLong
return
}
overwrite := false
if int64(len(b)) > tw.nb {
b = b[0:tw.nb]
overwrite = true
}
n, err = tw.w.Write(b)
tw.nb -= int64(n)
if err == nil && overwrite {
err = ErrWriteTooLong
return
}
tw.err = err
return
}
// Close closes the tar archive, flushing any unwritten
// data to the underlying writer.
func (tw *Writer) Close() error {
if tw.err != nil || tw.closed {
return tw.err
}
tw.Flush()
tw.closed = true
if tw.err != nil {
return tw.err
}
// trailer: two zero blocks
for i := 0; i < 2; i++ {
_, tw.err = tw.w.Write(zeroBlock)
if tw.err != nil {
break
}
}
return tw.err
}

View File

@ -1,2 +1,3 @@
bundles
nsinit/nsinit
vendor/pkg

View File

@ -1,5 +1,8 @@
FROM golang:1.4
RUN echo "deb http://ftp.us.debian.org/debian testing main contrib" >> /etc/apt/sources.list
RUN apt-get update && apt-get install -y iptables criu=1.5.2-1 && rm -rf /var/lib/apt/lists/*
RUN go get golang.org/x/tools/cmd/cover
ENV GOPATH $GOPATH:/go/src/github.com/docker/libcontainer/vendor
@ -16,7 +19,6 @@ COPY . /go/src/github.com/docker/libcontainer
WORKDIR /go/src/github.com/docker/libcontainer
RUN cp sample_configs/minimal.json /busybox/container.json
RUN go get -d -v ./...
RUN make direct-install
ENTRYPOINT ["/dind"]

View File

@ -1,13 +1,13 @@
## libcontainer - reference implementation for containers [![Build Status](https://jenkins.dockerproject.com/buildStatus/icon?job=Libcontainer Master)](https://jenkins.dockerproject.com/job/Libcontainer%20Master/)
## libcontainer - reference implementation for containers [![Build Status](https://jenkins.dockerproject.org/buildStatus/icon?job=Libcontainer%20Master)](https://jenkins.dockerproject.org/job/Libcontainer%20Master/)
Libcontainer provides a native Go implementation for creating containers
Libcontainer provides a native Go implementation for creating containers
with namespaces, cgroups, capabilities, and filesystem access controls.
It allows you to manage the lifecycle of the container performing additional operations
after the container is created.
#### Container
A container is a self contained execution environment that shares the kernel of the
A container is a self contained execution environment that shares the kernel of the
host system and which is (optionally) isolated from other containers in the system.
#### Using libcontainer
@ -27,7 +27,7 @@ if err != nil {
}
```
Once you have an instance of the factory created we can create a configuration
Once you have an instance of the factory created we can create a configuration
struct describing how the container is to be created. A sample would look similar to this:
```go
@ -120,9 +120,9 @@ Additional ways to interact with a running container are:
```go
// return all the pids for all processes running inside the container.
processes, err := container.Processes()
processes, err := container.Processes()
// get detailed cpu, memory, io, and network statistics for the container and
// get detailed cpu, memory, io, and network statistics for the container and
// it's processes.
stats, err := container.Stats()
@ -137,18 +137,18 @@ container.Resume()
#### nsinit
`nsinit` is a cli application which demonstrates the use of libcontainer.
`nsinit` is a cli application which demonstrates the use of libcontainer.
It is able to spawn new containers or join existing containers. A root
filesystem must be provided for use along with a container configuration file.
To build `nsinit`, run `make binary`. It will save the binary into
`bundles/nsinit`.
To use `nsinit`, cd into a Linux rootfs and copy a `container.json` file into
the directory with your specified configuration. Environment, networking,
and different capabilities for the container are specified in this file.
To use `nsinit`, cd into a Linux rootfs and copy a `container.json` file into
the directory with your specified configuration. Environment, networking,
and different capabilities for the container are specified in this file.
The configuration is used for each process executed inside the container.
See the `sample_configs` folder for examples of what the container configuration should look like.
To execute `/bin/bash` in the current directory as a container just run the following **as root**:
@ -156,18 +156,50 @@ To execute `/bin/bash` in the current directory as a container just run the foll
nsinit exec --tty /bin/bash
```
If you wish to spawn another process inside the container while your
current bash session is running, run the same command again to
get another bash shell (or change the command). If the original
process (PID 1) dies, all other processes spawned inside the container
will be killed and the namespace will be removed.
If you wish to spawn another process inside the container while your
current bash session is running, run the same command again to
get another bash shell (or change the command). If the original
process (PID 1) dies, all other processes spawned inside the container
will be killed and the namespace will be removed.
You can identify if a process is running in a container by
You can identify if a process is running in a container by
looking to see if `state.json` is in the root of the directory.
You may also specify an alternate root place where
You may also specify an alternate root place where
the `container.json` file is read and where the `state.json` file will be saved.
#### Checkpoint & Restore
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
This let's you save the state of a process running inside a container to disk, and then restore
that state into a new process, on the same machine or on another machine.
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
If you don't already have `criu` installed, you can build it from source, following the
[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image
generated when building libcontainer with docker.
To try an example with `nsinit`, open two terminals to the same busybox directory.
In the first terminal, run a command like this one:
```bash
nsinit exec -- sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done'
```
You should see logs printing to the terminal every second. Now, in the second terminal, run:
```bash
nsinit checkpoint --image-path=/tmp/criu
```
The logs in your first terminal will stop and the process will exit. Finally, in the second
terminal, run the restore command:
```bash
nsinit restore --image-path=/tmp/criu
```
The process will resume counting where it left off and printing to the new terminal window.
#### Future
See the [roadmap](ROADMAP.md).

View File

@ -15,7 +15,7 @@ with a strong security configuration.
### System Requirements and Compatibility
Minimum requirements:
* Kernel version - 3.8 recommended 2.6.2x minimum(with backported patches)
* Kernel version - 3.10 recommended 2.6.2x minimum(with backported patches)
* Mounted cgroups with each subsystem in its own hierarchy
@ -28,11 +28,9 @@ Minimum requirements:
| CLONE_NEWIPC | 1 |
| CLONE_NEWNET | 1 |
| CLONE_NEWNS | 1 |
| CLONE_NEWUSER | 0 |
| CLONE_NEWUSER | 1 |
In v1 the user namespace is not enabled by default for support of older kernels
where the user namespace feature is not fully implemented. Namespaces are
created for the container via the `clone` syscall.
Namespaces are created for the container via the `clone` syscall.
### Filesystem
@ -49,14 +47,14 @@ unmount all the mounts that were setup within that namespace.
For a container to execute properly there are certain filesystems that
are required to be mounted within the rootfs that the runtime will setup.
| Path | Type | Flags | Data |
| ----------- | ------ | -------------------------------------- | --------------------------------------- |
| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 |
| /dev/shm | shm | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k |
| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid5 |
| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | |
| Path | Type | Flags | Data |
| ----------- | ------ | -------------------------------------- | ---------------------------------------- |
| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 |
| /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k |
| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 |
| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | |
After a container's filesystems are mounted within the newly created
@ -143,6 +141,7 @@ system resources like cpu, memory, and device access.
| blkio | 1 |
| perf_event | 1 |
| freezer | 1 |
| hugetlb | 1 |
All cgroup subsystem are joined so that statistics can be collected from
@ -165,6 +164,7 @@ provide a good default for security and flexibility for the applications.
| -------------------- | ------- |
| CAP_NET_RAW | 1 |
| CAP_NET_BIND_SERVICE | 1 |
| CAP_AUDIT_READ | 1 |
| CAP_AUDIT_WRITE | 1 |
| CAP_DAC_OVERRIDE | 1 |
| CAP_SETFCAP | 1 |
@ -217,17 +217,6 @@ profile <profile_name> flags=(attach_disconnected,mediate_deleted) {
file,
umount,
mount fstype=tmpfs,
mount fstype=mqueue,
mount fstype=fuse.*,
mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,
mount fstype=efivarfs -> /sys/firmware/efi/efivars/,
mount fstype=fusectl -> /sys/fs/fuse/connections/,
mount fstype=securityfs -> /sys/kernel/security/,
mount fstype=debugfs -> /sys/kernel/debug/,
mount fstype=proc -> /proc/,
mount fstype=sysfs -> /sys/,
deny @{PROC}/sys/fs/** wklx,
deny @{PROC}/sysrq-trigger rwklx,
deny @{PROC}/mem rwklx,
@ -235,9 +224,7 @@ profile <profile_name> flags=(attach_disconnected,mediate_deleted) {
deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx,
deny @{PROC}/sys/kernel/*/** wklx,
deny mount options=(ro, remount) -> /,
deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
deny mount fstype=devpts,
deny mount,
deny /sys/[^f]*/** wklx,
deny /sys/f[^s]*/** wklx,
@ -317,6 +304,7 @@ a container.
| Pause | Pause all processes inside the container |
| Resume | Resume all processes inside the container if paused |
| Exec | Execute a new process inside of the container ( requires setns ) |
| Set | Setup configs of the container after it's created |
### Execute a new process inside of a running container.

View File

@ -1,3 +1,5 @@
// +build linux
package apparmor
import (
@ -27,17 +29,6 @@ profile {{.Name}} flags=(attach_disconnected,mediate_deleted) {
file,
umount,
mount fstype=tmpfs,
mount fstype=mqueue,
mount fstype=fuse.*,
mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,
mount fstype=efivarfs -> /sys/firmware/efi/efivars/,
mount fstype=fusectl -> /sys/fs/fuse/connections/,
mount fstype=securityfs -> /sys/kernel/security/,
mount fstype=debugfs -> /sys/kernel/debug/,
mount fstype=proc -> /proc/,
mount fstype=sysfs -> /sys/,
deny @{PROC}/sys/fs/** wklx,
deny @{PROC}/sysrq-trigger rwklx,
deny @{PROC}/mem rwklx,
@ -45,9 +36,7 @@ profile {{.Name}} flags=(attach_disconnected,mediate_deleted) {
deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx,
deny @{PROC}/sys/kernel/*/** wklx,
deny mount options=(ro, remount) -> /,
deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
deny mount fstype=devpts,
deny mount,
deny /sys/[^f]*/** wklx,
deny /sys/f[^s]*/** wklx,

View File

@ -1,3 +1,5 @@
// +build linux
package apparmor
import (

View File

@ -1,3 +1,5 @@
// +build linux
package cgroups
import (

View File

@ -0,0 +1,3 @@
// +build !linux
package cgroups

View File

@ -1,3 +1,5 @@
// +build linux
package fs
import (
@ -22,10 +24,13 @@ var (
"cpuacct": &CpuacctGroup{},
"blkio": &BlkioGroup{},
"hugetlb": &HugetlbGroup{},
"net_cls": &NetClsGroup{},
"net_prio": &NetPrioGroup{},
"perf_event": &PerfEventGroup{},
"freezer": &FreezerGroup{},
}
CgroupProcesses = "cgroup.procs"
HugePageSizes, _ = cgroups.GetHugePageSize()
)
type subsystem interface {
@ -40,6 +45,7 @@ type subsystem interface {
}
type Manager struct {
mu sync.Mutex
Cgroups *configs.Cgroup
Paths map[string]string
}
@ -78,7 +84,6 @@ type data struct {
}
func (m *Manager) Apply(pid int) error {
if m.Cgroups == nil {
return nil
}
@ -124,14 +129,25 @@ func (m *Manager) Apply(pid int) error {
}
func (m *Manager) Destroy() error {
return cgroups.RemovePaths(m.Paths)
m.mu.Lock()
defer m.mu.Unlock()
if err := cgroups.RemovePaths(m.Paths); err != nil {
return err
}
m.Paths = make(map[string]string)
return nil
}
func (m *Manager) GetPaths() map[string]string {
return m.Paths
m.mu.Lock()
paths := m.Paths
m.mu.Unlock()
return paths
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
m.mu.Lock()
defer m.mu.Unlock()
stats := cgroups.NewStats()
for name, path := range m.Paths {
sys, ok := subsystems[name]
@ -262,6 +278,11 @@ func (raw *data) join(subsystem string) (string, error) {
}
func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s.", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}

View File

@ -1,3 +1,5 @@
// +build linux
package fs
import (

View File

@ -1,3 +1,5 @@
// +build linux
package fs
import (
@ -17,7 +19,7 @@ func (s *CpuGroup) Apply(d *data) error {
// We always want to join the cpu group, to allow fair cpu scheduling
// on a container basis
dir, err := d.join("cpu")
if err != nil {
if err != nil && !cgroups.IsNotFound(err) {
return err
}
@ -44,6 +46,16 @@ func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
}
if cgroup.CpuRtPeriod != 0 {
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.CpuRtPeriod, 10)); err != nil {
return err
}
}
if cgroup.CpuRtRuntime != 0 {
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.CpuRtRuntime, 10)); err != nil {
return err
}
}
return nil
}

View File

@ -1,3 +1,5 @@
// +build linux
package fs
import (

View File

@ -1,3 +1,5 @@
// +build linux
package fs
import (
@ -16,7 +18,7 @@ type CpusetGroup struct {
func (s *CpusetGroup) Apply(d *data) error {
dir, err := d.path("cpuset")
if err != nil {
if err != nil && !cgroups.IsNotFound(err) {
return err
}
@ -48,6 +50,11 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
}
func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
// This might happen if we have no cpuset cgroup mounted.
// Just do nothing and don't fail.
if dir == "" {
return nil
}
if err := s.ensureParent(dir); err != nil {
return err
}

View File

@ -1,3 +1,5 @@
// +build linux
package fs
import (
@ -11,6 +13,8 @@ type DevicesGroup struct {
func (s *DevicesGroup) Apply(d *data) error {
dir, err := d.join("devices")
if err != nil {
// We will return error even it's `not found` error, devices
// cgroup is hard requirement for container's security.
return err
}

View File

@ -1,3 +1,5 @@
// +build linux
package fs
import (

View File

@ -0,0 +1,3 @@
// +build !linux
package fs

View File

@ -1,6 +1,12 @@
// +build linux
package fs
import (
"fmt"
"strconv"
"strings"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
)
@ -9,14 +15,25 @@ type HugetlbGroup struct {
}
func (s *HugetlbGroup) Apply(d *data) error {
// we just want to join this group even though we don't set anything
if _, err := d.join("hugetlb"); err != nil && !cgroups.IsNotFound(err) {
dir, err := d.join("hugetlb")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
for _, hugetlb := range cgroup.HugetlbLimit {
if err := writeFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.Itoa(hugetlb.Limit)); err != nil {
return err
}
}
return nil
}
@ -25,5 +42,31 @@ func (s *HugetlbGroup) Remove(d *data) error {
}
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
hugetlbStats := cgroups.HugetlbStats{}
for _, pageSize := range HugePageSizes {
usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
value, err := getCgroupParamUint(path, usage)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", usage, err)
}
hugetlbStats.Usage = value
maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
value, err = getCgroupParamUint(path, maxUsage)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
}
hugetlbStats.MaxUsage = value
failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
value, err = getCgroupParamUint(path, failcnt)
if err != nil {
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
}
hugetlbStats.Failcnt = value
stats.HugetlbStats[pageSize] = hugetlbStats
}
return nil
}

View File

@ -1,3 +1,5 @@
// +build linux
package fs
import (
@ -6,6 +8,7 @@ import (
"os"
"path/filepath"
"strconv"
"strings"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
@ -16,8 +19,7 @@ type MemoryGroup struct {
func (s *MemoryGroup) Apply(d *data) error {
dir, err := d.join("memory")
// only return an error for memory if it was specified
if err != nil && (d.c.Memory != 0 || d.c.MemoryReservation != 0 || d.c.MemorySwap != 0) {
if err != nil && !cgroups.IsNotFound(err) {
return err
}
defer func() {
@ -44,14 +46,13 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
}
// By default, MemorySwap is set to twice the size of Memory.
if cgroup.MemorySwap == 0 && cgroup.Memory != 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Memory*2, 10)); err != nil {
if cgroup.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.MemorySwap, 10)); err != nil {
return err
}
}
if cgroup.MemorySwap > 0 {
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.MemorySwap, 10)); err != nil {
if cgroup.KernelMemory > 0 {
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.KernelMemory, 10)); err != nil {
return err
}
}
@ -61,6 +62,11 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
return err
}
}
if cgroup.MemorySwappiness >= 0 && cgroup.MemorySwappiness <= 100 {
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(cgroup.MemorySwappiness, 10)); err != nil {
return err
}
}
return nil
}
@ -88,24 +94,62 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
}
stats.MemoryStats.Stats[t] = v
}
// Set memory usage and max historical usage.
value, err := getCgroupParamUint(path, "memory.usage_in_bytes")
if err != nil {
return fmt.Errorf("failed to parse memory.usage_in_bytes - %v", err)
}
stats.MemoryStats.Usage = value
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
value, err = getCgroupParamUint(path, "memory.max_usage_in_bytes")
memoryUsage, err := getMemoryData(path, "")
if err != nil {
return fmt.Errorf("failed to parse memory.max_usage_in_bytes - %v", err)
return err
}
stats.MemoryStats.MaxUsage = value
value, err = getCgroupParamUint(path, "memory.failcnt")
stats.MemoryStats.Usage = memoryUsage
swapUsage, err := getMemoryData(path, "memsw")
if err != nil {
return fmt.Errorf("failed to parse memory.failcnt - %v", err)
return err
}
stats.MemoryStats.Failcnt = value
stats.MemoryStats.SwapUsage = swapUsage
kernelUsage, err := getMemoryData(path, "kmem")
if err != nil {
return err
}
stats.MemoryStats.KernelUsage = kernelUsage
return nil
}
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
memoryData := cgroups.MemoryData{}
moduleName := "memory"
if name != "" {
moduleName = strings.Join([]string{"memory", name}, ".")
}
usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
value, err := getCgroupParamUint(path, usage)
if err != nil {
if moduleName != "memory" && os.IsNotExist(err) {
return cgroups.MemoryData{}, nil
}
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
}
memoryData.Usage = value
value, err = getCgroupParamUint(path, maxUsage)
if err != nil {
if moduleName != "memory" && os.IsNotExist(err) {
return cgroups.MemoryData{}, nil
}
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
}
memoryData.MaxUsage = value
value, err = getCgroupParamUint(path, failcnt)
if err != nil {
if moduleName != "memory" && os.IsNotExist(err) {
return cgroups.MemoryData{}, nil
}
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
}
memoryData.Failcnt = value
return memoryData, nil
}

View File

@ -0,0 +1,40 @@
package fs
import (
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
)
type NetClsGroup struct {
}
func (s *NetClsGroup) Apply(d *data) error {
dir, err := d.join("net_cls")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
if cgroup.NetClsClassid != "" {
if err := writeFile(path, "net_cls.classid", cgroup.NetClsClassid); err != nil {
return err
}
}
return nil
}
func (s *NetClsGroup) Remove(d *data) error {
return removePath(d.path("net_cls"))
}
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}

View File

@ -0,0 +1,40 @@
package fs
import (
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
)
type NetPrioGroup struct {
}
func (s *NetPrioGroup) Apply(d *data) error {
dir, err := d.join("net_prio")
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if err := s.Set(dir, d.c); err != nil {
return err
}
return nil
}
func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
for _, prioMap := range cgroup.NetPrioIfpriomap {
if err := writeFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
return err
}
}
return nil
}
func (s *NetPrioGroup) Remove(d *data) error {
return removePath(d.path("net_prio"))
}
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}

View File

@ -1,3 +1,5 @@
// +build linux
package fs
import (

View File

@ -1,3 +1,5 @@
// +build linux
package fs
import (

View File

@ -1,3 +1,5 @@
// +build linux
package cgroups
type ThrottlingData struct {
@ -30,18 +32,21 @@ type CpuStats struct {
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
}
type MemoryData struct {
Usage uint64 `json:"usage,omitempty"`
MaxUsage uint64 `json:"max_usage,omitempty"`
Failcnt uint64 `json:"failcnt"`
}
type MemoryStats struct {
// current res_counter usage for memory
Usage uint64 `json:"usage,omitempty"`
// memory used for cache
Cache uint64 `json:"cache,omitempty"`
// maximum usage ever recorded.
MaxUsage uint64 `json:"max_usage,omitempty"`
// TODO(vishh): Export these as stronger types.
// all the stats exported via memory.stat.
Stats map[string]uint64 `json:"stats,omitempty"`
// number of times memory usage hits limits.
Failcnt uint64 `json:"failcnt"`
// usage of memory
Usage MemoryData `json:"usage,omitempty"`
// usage of memory + swap
SwapUsage MemoryData `json:"swap_usage,omitempty"`
// usafe of kernel memory
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
Stats map[string]uint64 `json:"stats,omitempty"`
}
type BlkioStatEntry struct {
@ -63,13 +68,25 @@ type BlkioStats struct {
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
}
type HugetlbStats struct {
// current res_counter usage for hugetlb
Usage uint64 `json:"usage,omitempty"`
// maximum usage ever recorded.
MaxUsage uint64 `json:"max_usage,omitempty"`
// number of times htgetlb usage allocation failure.
Failcnt uint64 `json:"failcnt"`
}
type Stats struct {
CpuStats CpuStats `json:"cpu_stats,omitempty"`
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
// the map is in the format "size of hugepage: stats of the hugepage"
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
}
func NewStats() *Stats {
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
return &Stats{MemoryStats: memoryStats}
hugetlbStats := make(map[string]HugetlbStats)
return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
}

View File

@ -20,6 +20,7 @@ import (
)
type Manager struct {
mu sync.Mutex
Cgroups *configs.Cgroup
Paths map[string]string
}
@ -41,6 +42,8 @@ var subsystems = map[string]subsystem{
"hugetlb": &fs.HugetlbGroup{},
"perf_event": &fs.PerfEventGroup{},
"freezer": &fs.FreezerGroup{},
"net_prio": &fs.NetPrioGroup{},
"net_cls": &fs.NetClsGroup{},
}
const (
@ -199,24 +202,30 @@ func (m *Manager) Apply(pid int) error {
return err
}
// -1 disables memorySwap
if c.MemorySwap >= 0 && c.Memory != 0 {
if err := joinMemory(c, pid); err != nil {
return err
}
if err := joinMemory(c, pid); err != nil {
return err
}
// we need to manually join the freezer and cpuset cgroup in systemd
// we need to manually join the freezer, net_cls, net_prio and cpuset cgroup in systemd
// because it does not currently support it via the dbus api.
if err := joinFreezer(c, pid); err != nil {
return err
}
if err := joinNetPrio(c, pid); err != nil {
return err
}
if err := joinNetCls(c, pid); err != nil {
return err
}
if err := joinCpuset(c, pid); err != nil {
return err
}
if err := joinHugetlb(c, pid); err != nil {
return err
}
// FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem
// using that (at least on systemd 208, see https://github.com/docker/libcontainer/pull/354),
// so use fs work around for now.
@ -248,14 +257,29 @@ func (m *Manager) Apply(pid int) error {
}
func (m *Manager) Destroy() error {
return cgroups.RemovePaths(m.Paths)
m.mu.Lock()
defer m.mu.Unlock()
theConn.StopUnit(getUnitName(m.Cgroups), "replace")
if err := cgroups.RemovePaths(m.Paths); err != nil {
return err
}
m.Paths = make(map[string]string)
return nil
}
func (m *Manager) GetPaths() map[string]string {
return m.Paths
m.mu.Lock()
paths := m.Paths
m.mu.Unlock()
return paths
}
func writeFile(dir, file, data string) error {
// Normally dir should not be empty, one case is that cgroup subsystem
// is not mounted, we will get empty dir, and we want it fail here.
if dir == "" {
return fmt.Errorf("no such directory for %s.", file)
}
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
@ -276,28 +300,61 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
func joinCpu(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "cpu")
if err != nil {
if err != nil && !cgroups.IsNotFound(err) {
return err
}
if c.CpuQuota != 0 {
if err = ioutil.WriteFile(filepath.Join(path, "cpu.cfs_quota_us"), []byte(strconv.FormatInt(c.CpuQuota, 10)), 0700); err != nil {
if err = writeFile(path, "cpu.cfs_quota_us", strconv.FormatInt(c.CpuQuota, 10)); err != nil {
return err
}
}
if c.CpuPeriod != 0 {
if err = ioutil.WriteFile(filepath.Join(path, "cpu.cfs_period_us"), []byte(strconv.FormatInt(c.CpuPeriod, 10)), 0700); err != nil {
if err = writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(c.CpuPeriod, 10)); err != nil {
return err
}
}
if c.CpuRtPeriod != 0 {
if err = writeFile(path, "cpu.rt_period_us", strconv.FormatInt(c.CpuRtPeriod, 10)); err != nil {
return err
}
}
if c.CpuRtRuntime != 0 {
if err = writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(c.CpuRtRuntime, 10)); err != nil {
return err
}
}
return nil
}
func joinFreezer(c *configs.Cgroup, pid int) error {
if _, err := join(c, "freezer", pid); err != nil {
path, err := join(c, "freezer", pid)
if err != nil && !cgroups.IsNotFound(err) {
return err
}
return nil
freezer := subsystems["freezer"]
return freezer.Set(path, c)
}
func joinNetPrio(c *configs.Cgroup, pid int) error {
path, err := join(c, "net_prio", pid)
if err != nil && !cgroups.IsNotFound(err) {
return err
}
netPrio := subsystems["net_prio"]
return netPrio.Set(path, c)
}
func joinNetCls(c *configs.Cgroup, pid int) error {
path, err := join(c, "net_cls", pid)
if err != nil && !cgroups.IsNotFound(err) {
return err
}
netcls := subsystems["net_cls"]
return netcls.Set(path, c)
}
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
@ -348,6 +405,8 @@ func (m *Manager) GetPids() ([]int, error) {
}
func (m *Manager) GetStats() (*cgroups.Stats, error) {
m.mu.Lock()
defer m.mu.Unlock()
stats := cgroups.NewStats()
for name, path := range m.Paths {
sys, ok := subsystems[name]
@ -393,6 +452,8 @@ func getUnitName(c *configs.Cgroup) string {
// This happens at least for v208 when any sibling unit is started.
func joinDevices(c *configs.Cgroup, pid int) error {
path, err := join(c, "devices", pid)
// Even if it's `not found` error, we'll return err because devices cgroup
// is hard requirement for container security.
if err != nil {
return err
}
@ -402,19 +463,33 @@ func joinDevices(c *configs.Cgroup, pid int) error {
}
func joinMemory(c *configs.Cgroup, pid int) error {
memorySwap := c.MemorySwap
if memorySwap == 0 {
// By default, MemorySwap is set to twice the size of RAM.
memorySwap = c.Memory * 2
}
path, err := getSubsystemPath(c, "memory")
if err != nil {
if err != nil && !cgroups.IsNotFound(err) {
return err
}
return ioutil.WriteFile(filepath.Join(path, "memory.memsw.limit_in_bytes"), []byte(strconv.FormatInt(memorySwap, 10)), 0700)
// -1 disables memoryswap
if c.MemorySwap > 0 {
err = writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(c.MemorySwap, 10))
if err != nil {
return err
}
}
if c.KernelMemory > 0 {
err = writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(c.KernelMemory, 10))
if err != nil {
return err
}
}
if c.MemorySwappiness >= 0 && c.MemorySwappiness <= 100 {
err = writeFile(path, "memory.swappiness", strconv.FormatInt(c.MemorySwappiness, 10))
if err != nil {
return err
}
}
return nil
}
// systemd does not atm set up the cpuset controller, so we must manually
@ -422,7 +497,7 @@ func joinMemory(c *configs.Cgroup, pid int) error {
// level must have a full setup as the default for a new directory is "no cpus"
func joinCpuset(c *configs.Cgroup, pid int) error {
path, err := getSubsystemPath(c, "cpuset")
if err != nil {
if err != nil && !cgroups.IsNotFound(err) {
return err
}
@ -467,3 +542,13 @@ func joinBlkio(c *configs.Cgroup, pid int) error {
return nil
}
func joinHugetlb(c *configs.Cgroup, pid int) error {
path, err := join(c, "hugetlb", pid)
if err != nil && !cgroups.IsNotFound(err) {
return err
}
hugetlb := subsystems["hugetlb"]
return hugetlb.Set(path, c)
}

View File

@ -1,3 +1,5 @@
// +build linux
package cgroups
import (
@ -12,24 +14,28 @@ import (
"time"
"github.com/docker/docker/pkg/mount"
"github.com/docker/docker/pkg/units"
)
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
func FindCgroupMountpoint(subsystem string) (string, error) {
mounts, err := mount.GetMounts()
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", err
}
for _, mount := range mounts {
if mount.Fstype == "cgroup" {
for _, opt := range strings.Split(mount.VfsOpts, ",") {
if opt == subsystem {
return mount.Mountpoint, nil
}
scanner := bufio.NewScanner(f)
for scanner.Scan() {
txt := scanner.Text()
fields := strings.Split(txt, " ")
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
if opt == subsystem {
return fields[4], nil
}
}
}
if err := scanner.Err(); err != nil {
return "", err
}
return "", NewNotFoundError(subsystem)
}
@ -236,3 +242,23 @@ func RemovePaths(paths map[string]string) (err error) {
}
return fmt.Errorf("Failed to remove paths: %s", paths)
}
func GetHugePageSize() ([]string, error) {
var pageSizes []string
sizeList := []string{"B", "kB", "MB", "GB", "TB", "PB"}
files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
if err != nil {
return pageSizes, err
}
for _, st := range files {
nameArray := strings.Split(st.Name(), "-")
pageSize, err := units.RAMInBytes(nameArray[1])
if err != nil {
return []string{}, err
}
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, sizeList)
pageSizes = append(pageSizes, sizeString)
}
return pageSizes, nil
}

View File

@ -8,6 +8,9 @@ const (
Thawed FreezerState = "THAWED"
)
// TODO Windows: This can be factored out in the future as Cgroups are not
// supported on the Windows platform.
type Cgroup struct {
Name string `json:"name"`
@ -30,6 +33,9 @@ type Cgroup struct {
// Total memory usage (memory + swap); set `-1' to disable swap
MemorySwap int64 `json:"memory_swap"`
// Kernel memory limit (in bytes)
KernelMemory int64 `json:"kernel_memory"`
// CPU shares (relative weight vs. other containers)
CpuShares int64 `json:"cpu_shares"`
@ -39,6 +45,12 @@ type Cgroup struct {
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpuPeriod int64 `json:"cpu_period"`
// How many time CPU will use in realtime scheduling (in usecs).
CpuRtRuntime int64 `json:"cpu_quota"`
// CPU period to be used for realtime scheduling (in usecs).
CpuRtPeriod int64 `json:"cpu_period"`
// CPU to use
CpusetCpus string `json:"cpuset_cpus"`
@ -66,9 +78,21 @@ type Cgroup struct {
// set the freeze value for the process
Freezer FreezerState `json:"freezer"`
// Hugetlb limit (in bytes)
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
// Parent slice to use for systemd TODO: remove in favor or parent
Slice string `json:"slice"`
// Whether to disable OOM Killer
OomKillDisable bool `json:"oom_kill_disable"`
// Tuning swappiness behaviour per cgroup
MemorySwappiness int64 `json:"memory_swappiness"`
// Set priority of network traffic for container
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
// Set class identifier for container's network packets
NetClsClassid string `json:"net_cls_classid"`
}

View File

@ -1,7 +1,5 @@
package configs
import "fmt"
type Rlimit struct {
Type int `json:"type"`
Hard uint64 `json:"hard"`
@ -15,6 +13,43 @@ type IDMap struct {
Size int `json:"size"`
}
type Seccomp struct {
Syscalls []*Syscall `json:"syscalls"`
}
type Action int
const (
Kill Action = iota - 3
Trap
Allow
)
type Operator int
const (
EqualTo Operator = iota
NotEqualTo
GreatherThan
LessThan
MaskEqualTo
)
type Arg struct {
Index int `json:"index"`
Value uint32 `json:"value"`
Op Operator `json:"op"`
}
type Syscall struct {
Value int `json:"value"`
Action Action `json:"action"`
Args []*Arg `json:"args"`
}
// TODO Windows. Many of these fields should be factored out into those parts
// which are common across platforms, and those which are platform specific.
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
@ -84,7 +119,7 @@ type Config struct {
// AdditionalGroups specifies the gids that should be added to supplementary groups
// in addition to those that the user belongs to.
AdditionalGroups []int `json:"additional_groups"`
AdditionalGroups []string `json:"additional_groups"`
// UidMappings is an array of User ID mappings for User Namespaces
UidMappings []IDMap `json:"uid_mappings"`
@ -103,50 +138,9 @@ type Config struct {
// SystemProperties is a map of properties and their values. It is the equivalent of using
// sysctl -w my.property.name value in Linux.
SystemProperties map[string]string `json:"system_properties"`
}
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
}
id, found := c.hostIDFromMapping(0, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(0, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
// Seccomp allows actions to be taken whenever a syscall is made within the container.
// By default, all syscalls are allowed with actions to allow, trap, kill, or return an errno
// can be specified on a per syscall basis.
Seccomp *Seccomp `json:"seccomp"`
}

View File

@ -0,0 +1,51 @@
// +build freebsd linux
package configs
import "fmt"
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostUID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.UidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
}
id, found := c.hostIDFromMapping(0, c.UidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// Gets the root uid for the process on host which could be non-zero
// when user namespaces are enabled.
func (c Config) HostGID() (int, error) {
if c.Namespaces.Contains(NEWUSER) {
if c.GidMappings == nil {
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
}
id, found := c.hostIDFromMapping(0, c.GidMappings)
if !found {
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
}
return id, nil
}
// Return default root uid 0
return 0, nil
}
// Utility function that gets a host ID for a container ID from user namespace map
// if that ID is present in the map.
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
for _, m := range uMap {
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
hostID := m.HostID + (containerID - m.ContainerID)
return hostID, true
}
}
return -1, false
}

View File

@ -9,6 +9,8 @@ const (
Wildcard = -1
)
// TODO Windows: This can be factored out in the future
type Device struct {
// Device type, block, char, etc.
Type rune `json:"type"`

View File

@ -1,3 +1,5 @@
// +build linux freebsd
package configs
var (

View File

@ -0,0 +1,9 @@
package configs
type HugepageLimit struct {
// which type of hugepage to limit.
Pagesize string `json:"page_size"`
// usage limit for hugepage.
Limit int `json:"limit"`
}

View File

@ -0,0 +1,14 @@
package configs
import (
"fmt"
)
type IfPrioMap struct {
Interface string `json:"interface"`
Priority int64 `json:"priority"`
}
func (i *IfPrioMap) CgroupString() string {
return fmt.Sprintf("%s %d", i.Interface, i.Priority)
}

View File

@ -1,91 +1,5 @@
package configs
import "fmt"
type NamespaceType string
const (
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
)
func NamespaceTypes() []NamespaceType {
return []NamespaceType{
NEWNET,
NEWPID,
NEWNS,
NEWUTS,
NEWIPC,
NEWUSER,
}
}
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Type NamespaceType `json:"type"`
Path string `json:"path"`
}
func (n *Namespace) GetPath(pid int) string {
if n.Path != "" {
return n.Path
}
return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file())
}
func (n *Namespace) file() string {
file := ""
switch n.Type {
case NEWNET:
file = "net"
case NEWNS:
file = "mnt"
case NEWPID:
file = "pid"
case NEWIPC:
file = "ipc"
case NEWUSER:
file = "user"
case NEWUTS:
file = "uts"
}
return file
}
type Namespaces []Namespace
func (n *Namespaces) Remove(t NamespaceType) bool {
i := n.index(t)
if i == -1 {
return false
}
*n = append((*n)[:i], (*n)[i+1:]...)
return true
}
func (n *Namespaces) Add(t NamespaceType, path string) {
i := n.index(t)
if i == -1 {
*n = append(*n, Namespace{Type: t, Path: path})
return
}
(*n)[i].Path = path
}
func (n *Namespaces) index(t NamespaceType) int {
for i, ns := range *n {
if ns.Type == t {
return i
}
}
return -1
}
func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}

View File

@ -1,4 +1,4 @@
// +build !linux
// +build !linux,!windows
package configs

View File

@ -0,0 +1,89 @@
// +build linux freebsd
package configs
import "fmt"
const (
NEWNET NamespaceType = "NEWNET"
NEWPID NamespaceType = "NEWPID"
NEWNS NamespaceType = "NEWNS"
NEWUTS NamespaceType = "NEWUTS"
NEWIPC NamespaceType = "NEWIPC"
NEWUSER NamespaceType = "NEWUSER"
)
func NamespaceTypes() []NamespaceType {
return []NamespaceType{
NEWNET,
NEWPID,
NEWNS,
NEWUTS,
NEWIPC,
NEWUSER,
}
}
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Type NamespaceType `json:"type"`
Path string `json:"path"`
}
func (n *Namespace) GetPath(pid int) string {
if n.Path != "" {
return n.Path
}
return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file())
}
func (n *Namespace) file() string {
file := ""
switch n.Type {
case NEWNET:
file = "net"
case NEWNS:
file = "mnt"
case NEWPID:
file = "pid"
case NEWIPC:
file = "ipc"
case NEWUSER:
file = "user"
case NEWUTS:
file = "uts"
}
return file
}
func (n *Namespaces) Remove(t NamespaceType) bool {
i := n.index(t)
if i == -1 {
return false
}
*n = append((*n)[:i], (*n)[i+1:]...)
return true
}
func (n *Namespaces) Add(t NamespaceType, path string) {
i := n.index(t)
if i == -1 {
*n = append(*n, Namespace{Type: t, Path: path})
return
}
(*n)[i].Path = path
}
func (n *Namespaces) index(t NamespaceType) int {
for i, ns := range *n {
if ns.Type == t {
return i
}
}
return -1
}
func (n *Namespaces) Contains(t NamespaceType) bool {
return n.index(t) != -1
}

View File

@ -0,0 +1,6 @@
package configs
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
}

View File

@ -0,0 +1,13 @@
// +build freebsd
package libcontainer
import (
"errors"
)
// newConsole returns an initalized console that can be used within a container by copying bytes
// from the master side to the slave that is attached as the tty for the container's init process.
func newConsole(uid, gid int) (Console, error) {
return nil, errors.New("libcontainer console is not supported on FreeBSD")
}

View File

@ -1,5 +1,3 @@
// +build linux
package libcontainer
import (
@ -94,7 +92,7 @@ func (c *linuxConsole) mount(rootfs, mountLabel string, uid, gid int) error {
return syscall.Mount(c.slavePath, dest, "bind", syscall.MS_BIND, "")
}
// dupStdio opens the slavePath for the console and dup2s the fds to the current
// dupStdio opens the slavePath for the console and dups the fds to the current
// processes stdio, fd 0,1,2.
func (c *linuxConsole) dupStdio() error {
slave, err := c.open(syscall.O_RDWR)
@ -103,7 +101,7 @@ func (c *linuxConsole) dupStdio() error {
}
fd := int(slave.Fd())
for _, i := range []int{0, 1, 2} {
if err := syscall.Dup2(fd, i); err != nil {
if err := syscall.Dup3(fd, i, 0); err != nil {
return err
}
}

View File

@ -0,0 +1,30 @@
package libcontainer
// newConsole returns an initalized console that can be used within a container
func newConsole(uid, gid int) (Console, error) {
return &windowsConsole{}, nil
}
// windowsConsole is a Windows psuedo TTY for use within a container.
type windowsConsole struct {
}
func (c *windowsConsole) Fd() uintptr {
return 0
}
func (c *windowsConsole) Path() string {
return ""
}
func (c *windowsConsole) Read(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Write(b []byte) (int, error) {
return 0, nil
}
func (c *windowsConsole) Close() error {
return nil
}

View File

@ -21,6 +21,9 @@ const (
// The container exists, but all its processes are paused.
Paused
// The container exists, but its state is saved on disk
Checkpointed
// The container does not exist.
Destroyed
)
@ -46,6 +49,9 @@ type State struct {
// Config is the container's configuration.
Config configs.Config `json:"config"`
// Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore
ExternalDescriptors []string `json:"external_descriptors,omitempty"`
}
// A libcontainer container object.
@ -108,6 +114,18 @@ type Container interface {
// Systemerror - System error.
Start(process *Process) (err error)
// Checkpoint checkpoints the running container's state to disk using the criu(8) utility.
//
// errors:
// Systemerror - System error.
Checkpoint(criuOpts *CriuOpts) error
// Restore restores the checkpointed container to a running state using the criu(8) utiity.
//
// errors:
// Systemerror - System error.
Restore(process *Process, criuOpts *CriuOpts) error
// Destroys the container after killing all running processes.
//
// Any event registrations are removed before the container is destroyed.

View File

@ -5,15 +5,19 @@ package libcontainer
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"syscall"
log "github.com/Sirupsen/logrus"
"github.com/Sirupsen/logrus"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/criurpc"
"github.com/golang/protobuf/proto"
)
const stdioFdCount = 3
@ -26,6 +30,7 @@ type linuxContainer struct {
initPath string
initArgs []string
initProcess parentProcess
criuPath string
m sync.Mutex
}
@ -102,13 +107,12 @@ func (c *linuxContainer) Start(process *Process) error {
if err := parent.start(); err != nil {
// terminate the process to ensure that it properly is reaped.
if err := parent.terminate(); err != nil {
log.Warn(err)
logrus.Warn(err)
}
return newSystemError(err)
}
process.ops = parent
if doInit {
c.updateState(parent)
}
return nil
@ -227,7 +231,7 @@ func (c *linuxContainer) Destroy() error {
}
if !c.config.Namespaces.Contains(configs.NEWPID) {
if err := killCgroupProcesses(c.cgroupManager); err != nil {
log.Warn(err)
logrus.Warn(err)
}
}
err = c.cgroupManager.Destroy()
@ -254,6 +258,458 @@ func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) {
return notifyOnOOM(c.cgroupManager.GetPaths())
}
// XXX debug support, remove when debugging done.
func addArgsFromEnv(evar string, args *[]string) {
if e := os.Getenv(evar); e != "" {
for _, f := range strings.Fields(e) {
*args = append(*args, f)
}
}
fmt.Printf(">>> criu %v\n", *args)
}
func (c *linuxContainer) checkCriuVersion() error {
var x, y, z int
out, err := exec.Command(c.criuPath, "-V").Output()
if err != nil {
return err
}
n, err := fmt.Sscanf(string(out), "Version: %d.%d.%d\n", &x, &y, &z) // 1.5.2
if err != nil {
n, err = fmt.Sscanf(string(out), "Version: %d.%d\n", &x, &y) // 1.6
}
if n < 2 || err != nil {
return fmt.Errorf("Unable to parse the CRIU version: %s %d %s", out, n, err)
}
if x*10000+y*100+z < 10502 {
return fmt.Errorf("CRIU version must be 1.5.2 or higher")
}
return nil
}
const descriptors_filename = "descriptors.json"
func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
if err := c.checkCriuVersion(); err != nil {
return err
}
if criuOpts.ImagesDirectory == "" {
criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image")
}
// Since a container can be C/R'ed multiple times,
// the checkpoint directory may already exist.
if err := os.Mkdir(criuOpts.ImagesDirectory, 0755); err != nil && !os.IsExist(err) {
return err
}
if criuOpts.WorkDirectory == "" {
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
}
if err := os.Mkdir(criuOpts.WorkDirectory, 0755); err != nil && !os.IsExist(err) {
return err
}
workDir, err := os.Open(criuOpts.WorkDirectory)
if err != nil {
return err
}
defer workDir.Close()
imageDir, err := os.Open(criuOpts.ImagesDirectory)
if err != nil {
return err
}
defer imageDir.Close()
rpcOpts := criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
LogLevel: proto.Int32(4),
LogFile: proto.String("dump.log"),
Root: proto.String(c.config.Rootfs),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
Pid: proto.Int32(int32(c.initProcess.pid())),
ShellJob: proto.Bool(criuOpts.ShellJob),
LeaveRunning: proto.Bool(criuOpts.LeaveRunning),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
}
// append optional criu opts, e.g., page-server and port
if criuOpts.PageServer.Address != "" && criuOpts.PageServer.Port != 0 {
rpcOpts.Ps = &criurpc.CriuPageServerInfo{
Address: proto.String(criuOpts.PageServer.Address),
Port: proto.Int32(criuOpts.PageServer.Port),
}
}
t := criurpc.CriuReqType_DUMP
req := criurpc.CriuReq{
Type: &t,
Opts: &rpcOpts,
}
for _, m := range c.config.Mounts {
if m.Device == "bind" {
mountDest := m.Destination
if strings.HasPrefix(mountDest, c.config.Rootfs) {
mountDest = mountDest[len(c.config.Rootfs):]
}
extMnt := new(criurpc.ExtMountMap)
extMnt.Key = proto.String(mountDest)
extMnt.Val = proto.String(mountDest)
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
}
}
// Write the FD info to a file in the image directory
fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors())
if err != nil {
return err
}
err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename), fdsJSON, 0655)
if err != nil {
return err
}
err = c.criuSwrk(nil, &req, criuOpts)
if err != nil {
return err
}
return nil
}
func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error {
c.m.Lock()
defer c.m.Unlock()
if err := c.checkCriuVersion(); err != nil {
return err
}
if criuOpts.WorkDirectory == "" {
criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work")
}
// Since a container can be C/R'ed multiple times,
// the work directory may already exist.
if err := os.Mkdir(criuOpts.WorkDirectory, 0655); err != nil && !os.IsExist(err) {
return err
}
workDir, err := os.Open(criuOpts.WorkDirectory)
if err != nil {
return err
}
defer workDir.Close()
if criuOpts.ImagesDirectory == "" {
criuOpts.ImagesDirectory = filepath.Join(c.root, "criu.image")
}
imageDir, err := os.Open(criuOpts.ImagesDirectory)
if err != nil {
return err
}
defer imageDir.Close()
// CRIU has a few requirements for a root directory:
// * it must be a mount point
// * its parent must not be overmounted
// c.config.Rootfs is bind-mounted to a temporary directory
// to satisfy these requirements.
root := filepath.Join(c.root, "criu-root")
if err := os.Mkdir(root, 0755); err != nil {
return err
}
defer os.Remove(root)
root, err = filepath.EvalSymlinks(root)
if err != nil {
return err
}
err = syscall.Mount(c.config.Rootfs, root, "", syscall.MS_BIND|syscall.MS_REC, "")
if err != nil {
return err
}
defer syscall.Unmount(root, syscall.MNT_DETACH)
t := criurpc.CriuReqType_RESTORE
req := criurpc.CriuReq{
Type: &t,
Opts: &criurpc.CriuOpts{
ImagesDirFd: proto.Int32(int32(imageDir.Fd())),
WorkDirFd: proto.Int32(int32(workDir.Fd())),
EvasiveDevices: proto.Bool(true),
LogLevel: proto.Int32(4),
LogFile: proto.String("restore.log"),
RstSibling: proto.Bool(true),
Root: proto.String(root),
ManageCgroups: proto.Bool(true),
NotifyScripts: proto.Bool(true),
ShellJob: proto.Bool(criuOpts.ShellJob),
ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections),
TcpEstablished: proto.Bool(criuOpts.TcpEstablished),
},
}
for _, m := range c.config.Mounts {
if m.Device == "bind" {
mountDest := m.Destination
if strings.HasPrefix(mountDest, c.config.Rootfs) {
mountDest = mountDest[len(c.config.Rootfs):]
}
extMnt := new(criurpc.ExtMountMap)
extMnt.Key = proto.String(mountDest)
extMnt.Val = proto.String(m.Source)
req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt)
}
}
for _, iface := range c.config.Networks {
switch iface.Type {
case "veth":
veth := new(criurpc.CriuVethPair)
veth.IfOut = proto.String(iface.HostInterfaceName)
veth.IfIn = proto.String(iface.Name)
req.Opts.Veths = append(req.Opts.Veths, veth)
break
case "loopback":
break
}
}
var (
fds []string
fdJSON []byte
)
if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptors_filename)); err != nil {
return err
}
if err = json.Unmarshal(fdJSON, &fds); err != nil {
return err
}
for i := range fds {
if s := fds[i]; strings.Contains(s, "pipe:") {
inheritFd := new(criurpc.InheritFd)
inheritFd.Key = proto.String(s)
inheritFd.Fd = proto.Int32(int32(i))
req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd)
}
}
err = c.criuSwrk(process, &req, criuOpts)
if err != nil {
return err
}
return nil
}
func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts) error {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return err
}
criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client")
criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server")
defer criuClient.Close()
defer criuServer.Close()
args := []string{"swrk", "3"}
cmd := exec.Command(c.criuPath, args...)
if process != nil {
cmd.Stdin = process.Stdin
cmd.Stdout = process.Stdout
cmd.Stderr = process.Stderr
}
cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer)
if err := cmd.Start(); err != nil {
return err
}
criuServer.Close()
defer func() {
criuClient.Close()
_, err := cmd.Process.Wait()
if err != nil {
return
}
}()
var extFds []string
if process != nil {
extFds, err = getPipeFds(cmd.Process.Pid)
if err != nil {
return err
}
}
data, err := proto.Marshal(req)
if err != nil {
return err
}
_, err = criuClient.Write(data)
if err != nil {
return err
}
buf := make([]byte, 10*4096)
for true {
n, err := criuClient.Read(buf)
if err != nil {
return err
}
if n == 0 {
return fmt.Errorf("unexpected EOF")
}
if n == len(buf) {
return fmt.Errorf("buffer is too small")
}
resp := new(criurpc.CriuResp)
err = proto.Unmarshal(buf[:n], resp)
if err != nil {
return err
}
if !resp.GetSuccess() {
return fmt.Errorf("criu failed: type %s errno %d", req.GetType().String(), resp.GetCrErrno())
}
t := resp.GetType()
switch {
case t == criurpc.CriuReqType_NOTIFY:
if err := c.criuNotifications(resp, process, opts, extFds); err != nil {
return err
}
t = criurpc.CriuReqType_NOTIFY
req = &criurpc.CriuReq{
Type: &t,
NotifySuccess: proto.Bool(true),
}
data, err = proto.Marshal(req)
if err != nil {
return err
}
n, err = criuClient.Write(data)
if err != nil {
return err
}
continue
case t == criurpc.CriuReqType_RESTORE:
case t == criurpc.CriuReqType_DUMP:
break
default:
return fmt.Errorf("unable to parse the response %s", resp.String())
}
break
}
// cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors.
// Here we want to wait only the CRIU process.
st, err := cmd.Process.Wait()
if err != nil {
return err
}
if !st.Success() {
return fmt.Errorf("criu failed: %s", st.String())
}
return nil
}
// block any external network activity
func lockNetwork(config *configs.Config) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err := strategy.detach(config); err != nil {
return err
}
}
return nil
}
func unlockNetwork(config *configs.Config) error {
for _, config := range config.Networks {
strategy, err := getStrategy(config.Type)
if err != nil {
return err
}
if err = strategy.attach(config); err != nil {
return err
}
}
return nil
}
func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, opts *CriuOpts, fds []string) error {
notify := resp.GetNotify()
if notify == nil {
return fmt.Errorf("invalid response: %s", resp.String())
}
switch {
case notify.GetScript() == "post-dump":
if !opts.LeaveRunning {
f, err := os.Create(filepath.Join(c.root, "checkpoint"))
if err != nil {
return err
}
f.Close()
}
break
case notify.GetScript() == "network-unlock":
if err := unlockNetwork(c.config); err != nil {
return err
}
break
case notify.GetScript() == "network-lock":
if err := lockNetwork(c.config); err != nil {
return err
}
break
case notify.GetScript() == "post-restore":
pid := notify.GetPid()
r, err := newRestoredProcess(int(pid), fds)
if err != nil {
return err
}
// TODO: crosbymichael restore previous process information by saving the init process information in
// the container's state file or separate process state files.
if err := c.updateState(r); err != nil {
return err
}
process.ops = r
break
}
return nil
}
func (c *linuxContainer) updateState(process parentProcess) error {
c.initProcess = process
state, err := c.currentState()
@ -265,10 +721,14 @@ func (c *linuxContainer) updateState(process parentProcess) error {
return err
}
defer f.Close()
os.Remove(filepath.Join(c.root, "checkpoint"))
return json.NewEncoder(f).Encode(state)
}
func (c *linuxContainer) currentStatus() (Status, error) {
if _, err := os.Stat(filepath.Join(c.root, "checkpoint")); err == nil {
return Checkpointed, nil
}
if c.initProcess == nil {
return Destroyed, nil
}
@ -304,6 +764,7 @@ func (c *linuxContainer) currentState() (*State, error) {
InitProcessStartTime: startTime,
CgroupPaths: c.cgroupManager.GetPaths(),
NamespacePaths: make(map[configs.NamespaceType]string),
ExternalDescriptors: c.initProcess.externalDescriptors(),
}
for _, ns := range c.config.Namespaces {
state.NamespacePaths[ns.Type] = ns.GetPath(c.initProcess.pid())

View File

@ -0,0 +1,16 @@
package libcontainer
type CriuPageServerInfo struct {
Address string // IP address of CRIU page server
Port int32 // port number of CRIU page server
}
type CriuOpts struct {
ImagesDirectory string // directory for storing image files
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
LeaveRunning bool // leave container in running state after checkpoint
TcpEstablished bool // checkpoint/restore established TCP connections
ExternalUnixConnections bool // allow external unix connections
ShellJob bool // allow to dump and restore shell jobs
PageServer CriuPageServerInfo // allow to dump to criu page server
}

View File

@ -0,0 +1,2 @@
gen: criurpc.proto
protoc --go_out=. criurpc.proto

View File

@ -0,0 +1,602 @@
// Code generated by protoc-gen-go.
// source: criurpc.proto
// DO NOT EDIT!
package criurpc
import proto "github.com/golang/protobuf/proto"
import json "encoding/json"
import math "math"
// Reference proto, json, and math imports to suppress error if they are not otherwise used.
var _ = proto.Marshal
var _ = &json.SyntaxError{}
var _ = math.Inf
type CriuReqType int32
const (
CriuReqType_EMPTY CriuReqType = 0
CriuReqType_DUMP CriuReqType = 1
CriuReqType_RESTORE CriuReqType = 2
CriuReqType_CHECK CriuReqType = 3
CriuReqType_PRE_DUMP CriuReqType = 4
CriuReqType_PAGE_SERVER CriuReqType = 5
CriuReqType_NOTIFY CriuReqType = 6
CriuReqType_CPUINFO_DUMP CriuReqType = 7
CriuReqType_CPUINFO_CHECK CriuReqType = 8
)
var CriuReqType_name = map[int32]string{
0: "EMPTY",
1: "DUMP",
2: "RESTORE",
3: "CHECK",
4: "PRE_DUMP",
5: "PAGE_SERVER",
6: "NOTIFY",
7: "CPUINFO_DUMP",
8: "CPUINFO_CHECK",
}
var CriuReqType_value = map[string]int32{
"EMPTY": 0,
"DUMP": 1,
"RESTORE": 2,
"CHECK": 3,
"PRE_DUMP": 4,
"PAGE_SERVER": 5,
"NOTIFY": 6,
"CPUINFO_DUMP": 7,
"CPUINFO_CHECK": 8,
}
func (x CriuReqType) Enum() *CriuReqType {
p := new(CriuReqType)
*p = x
return p
}
func (x CriuReqType) String() string {
return proto.EnumName(CriuReqType_name, int32(x))
}
func (x CriuReqType) MarshalJSON() ([]byte, error) {
return json.Marshal(x.String())
}
func (x *CriuReqType) UnmarshalJSON(data []byte) error {
value, err := proto.UnmarshalJSONEnum(CriuReqType_value, data, "CriuReqType")
if err != nil {
return err
}
*x = CriuReqType(value)
return nil
}
type CriuPageServerInfo struct {
Address *string `protobuf:"bytes,1,opt,name=address" json:"address,omitempty"`
Port *int32 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"`
Pid *int32 `protobuf:"varint,3,opt,name=pid" json:"pid,omitempty"`
Fd *int32 `protobuf:"varint,4,opt,name=fd" json:"fd,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuPageServerInfo) Reset() { *m = CriuPageServerInfo{} }
func (m *CriuPageServerInfo) String() string { return proto.CompactTextString(m) }
func (*CriuPageServerInfo) ProtoMessage() {}
func (m *CriuPageServerInfo) GetAddress() string {
if m != nil && m.Address != nil {
return *m.Address
}
return ""
}
func (m *CriuPageServerInfo) GetPort() int32 {
if m != nil && m.Port != nil {
return *m.Port
}
return 0
}
func (m *CriuPageServerInfo) GetPid() int32 {
if m != nil && m.Pid != nil {
return *m.Pid
}
return 0
}
func (m *CriuPageServerInfo) GetFd() int32 {
if m != nil && m.Fd != nil {
return *m.Fd
}
return 0
}
type CriuVethPair struct {
IfIn *string `protobuf:"bytes,1,req,name=if_in" json:"if_in,omitempty"`
IfOut *string `protobuf:"bytes,2,req,name=if_out" json:"if_out,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuVethPair) Reset() { *m = CriuVethPair{} }
func (m *CriuVethPair) String() string { return proto.CompactTextString(m) }
func (*CriuVethPair) ProtoMessage() {}
func (m *CriuVethPair) GetIfIn() string {
if m != nil && m.IfIn != nil {
return *m.IfIn
}
return ""
}
func (m *CriuVethPair) GetIfOut() string {
if m != nil && m.IfOut != nil {
return *m.IfOut
}
return ""
}
type ExtMountMap struct {
Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"`
Val *string `protobuf:"bytes,2,req,name=val" json:"val,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *ExtMountMap) Reset() { *m = ExtMountMap{} }
func (m *ExtMountMap) String() string { return proto.CompactTextString(m) }
func (*ExtMountMap) ProtoMessage() {}
func (m *ExtMountMap) GetKey() string {
if m != nil && m.Key != nil {
return *m.Key
}
return ""
}
func (m *ExtMountMap) GetVal() string {
if m != nil && m.Val != nil {
return *m.Val
}
return ""
}
type InheritFd struct {
Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"`
Fd *int32 `protobuf:"varint,2,req,name=fd" json:"fd,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *InheritFd) Reset() { *m = InheritFd{} }
func (m *InheritFd) String() string { return proto.CompactTextString(m) }
func (*InheritFd) ProtoMessage() {}
func (m *InheritFd) GetKey() string {
if m != nil && m.Key != nil {
return *m.Key
}
return ""
}
func (m *InheritFd) GetFd() int32 {
if m != nil && m.Fd != nil {
return *m.Fd
}
return 0
}
type CgroupRoot struct {
Ctrl *string `protobuf:"bytes,1,opt,name=ctrl" json:"ctrl,omitempty"`
Path *string `protobuf:"bytes,2,req,name=path" json:"path,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CgroupRoot) Reset() { *m = CgroupRoot{} }
func (m *CgroupRoot) String() string { return proto.CompactTextString(m) }
func (*CgroupRoot) ProtoMessage() {}
func (m *CgroupRoot) GetCtrl() string {
if m != nil && m.Ctrl != nil {
return *m.Ctrl
}
return ""
}
func (m *CgroupRoot) GetPath() string {
if m != nil && m.Path != nil {
return *m.Path
}
return ""
}
type CriuOpts struct {
ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd" json:"images_dir_fd,omitempty"`
Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"`
LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running" json:"leave_running,omitempty"`
ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk" json:"ext_unix_sk,omitempty"`
TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established" json:"tcp_established,omitempty"`
EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices" json:"evasive_devices,omitempty"`
ShellJob *bool `protobuf:"varint,7,opt,name=shell_job" json:"shell_job,omitempty"`
FileLocks *bool `protobuf:"varint,8,opt,name=file_locks" json:"file_locks,omitempty"`
LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,def=2" json:"log_level,omitempty"`
LogFile *string `protobuf:"bytes,10,opt,name=log_file" json:"log_file,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"`
NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts" json:"notify_scripts,omitempty"`
Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"`
ParentImg *string `protobuf:"bytes,14,opt,name=parent_img" json:"parent_img,omitempty"`
TrackMem *bool `protobuf:"varint,15,opt,name=track_mem" json:"track_mem,omitempty"`
AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup" json:"auto_dedup,omitempty"`
WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd" json:"work_dir_fd,omitempty"`
LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap" json:"link_remap,omitempty"`
Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"`
CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,def=4294967295" json:"cpu_cap,omitempty"`
ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap" json:"force_irmap,omitempty"`
ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd" json:"exec_cmd,omitempty"`
ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt" json:"ext_mnt,omitempty"`
ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups" json:"manage_cgroups,omitempty"`
CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root" json:"cg_root,omitempty"`
RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling" json:"rst_sibling,omitempty"`
InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd" json:"inherit_fd,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuOpts) Reset() { *m = CriuOpts{} }
func (m *CriuOpts) String() string { return proto.CompactTextString(m) }
func (*CriuOpts) ProtoMessage() {}
const Default_CriuOpts_LogLevel int32 = 2
const Default_CriuOpts_CpuCap uint32 = 4294967295
func (m *CriuOpts) GetImagesDirFd() int32 {
if m != nil && m.ImagesDirFd != nil {
return *m.ImagesDirFd
}
return 0
}
func (m *CriuOpts) GetPid() int32 {
if m != nil && m.Pid != nil {
return *m.Pid
}
return 0
}
func (m *CriuOpts) GetLeaveRunning() bool {
if m != nil && m.LeaveRunning != nil {
return *m.LeaveRunning
}
return false
}
func (m *CriuOpts) GetExtUnixSk() bool {
if m != nil && m.ExtUnixSk != nil {
return *m.ExtUnixSk
}
return false
}
func (m *CriuOpts) GetTcpEstablished() bool {
if m != nil && m.TcpEstablished != nil {
return *m.TcpEstablished
}
return false
}
func (m *CriuOpts) GetEvasiveDevices() bool {
if m != nil && m.EvasiveDevices != nil {
return *m.EvasiveDevices
}
return false
}
func (m *CriuOpts) GetShellJob() bool {
if m != nil && m.ShellJob != nil {
return *m.ShellJob
}
return false
}
func (m *CriuOpts) GetFileLocks() bool {
if m != nil && m.FileLocks != nil {
return *m.FileLocks
}
return false
}
func (m *CriuOpts) GetLogLevel() int32 {
if m != nil && m.LogLevel != nil {
return *m.LogLevel
}
return Default_CriuOpts_LogLevel
}
func (m *CriuOpts) GetLogFile() string {
if m != nil && m.LogFile != nil {
return *m.LogFile
}
return ""
}
func (m *CriuOpts) GetPs() *CriuPageServerInfo {
if m != nil {
return m.Ps
}
return nil
}
func (m *CriuOpts) GetNotifyScripts() bool {
if m != nil && m.NotifyScripts != nil {
return *m.NotifyScripts
}
return false
}
func (m *CriuOpts) GetRoot() string {
if m != nil && m.Root != nil {
return *m.Root
}
return ""
}
func (m *CriuOpts) GetParentImg() string {
if m != nil && m.ParentImg != nil {
return *m.ParentImg
}
return ""
}
func (m *CriuOpts) GetTrackMem() bool {
if m != nil && m.TrackMem != nil {
return *m.TrackMem
}
return false
}
func (m *CriuOpts) GetAutoDedup() bool {
if m != nil && m.AutoDedup != nil {
return *m.AutoDedup
}
return false
}
func (m *CriuOpts) GetWorkDirFd() int32 {
if m != nil && m.WorkDirFd != nil {
return *m.WorkDirFd
}
return 0
}
func (m *CriuOpts) GetLinkRemap() bool {
if m != nil && m.LinkRemap != nil {
return *m.LinkRemap
}
return false
}
func (m *CriuOpts) GetVeths() []*CriuVethPair {
if m != nil {
return m.Veths
}
return nil
}
func (m *CriuOpts) GetCpuCap() uint32 {
if m != nil && m.CpuCap != nil {
return *m.CpuCap
}
return Default_CriuOpts_CpuCap
}
func (m *CriuOpts) GetForceIrmap() bool {
if m != nil && m.ForceIrmap != nil {
return *m.ForceIrmap
}
return false
}
func (m *CriuOpts) GetExecCmd() []string {
if m != nil {
return m.ExecCmd
}
return nil
}
func (m *CriuOpts) GetExtMnt() []*ExtMountMap {
if m != nil {
return m.ExtMnt
}
return nil
}
func (m *CriuOpts) GetManageCgroups() bool {
if m != nil && m.ManageCgroups != nil {
return *m.ManageCgroups
}
return false
}
func (m *CriuOpts) GetCgRoot() []*CgroupRoot {
if m != nil {
return m.CgRoot
}
return nil
}
func (m *CriuOpts) GetRstSibling() bool {
if m != nil && m.RstSibling != nil {
return *m.RstSibling
}
return false
}
func (m *CriuOpts) GetInheritFd() []*InheritFd {
if m != nil {
return m.InheritFd
}
return nil
}
type CriuDumpResp struct {
Restored *bool `protobuf:"varint,1,opt,name=restored" json:"restored,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuDumpResp) Reset() { *m = CriuDumpResp{} }
func (m *CriuDumpResp) String() string { return proto.CompactTextString(m) }
func (*CriuDumpResp) ProtoMessage() {}
func (m *CriuDumpResp) GetRestored() bool {
if m != nil && m.Restored != nil {
return *m.Restored
}
return false
}
type CriuRestoreResp struct {
Pid *int32 `protobuf:"varint,1,req,name=pid" json:"pid,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuRestoreResp) Reset() { *m = CriuRestoreResp{} }
func (m *CriuRestoreResp) String() string { return proto.CompactTextString(m) }
func (*CriuRestoreResp) ProtoMessage() {}
func (m *CriuRestoreResp) GetPid() int32 {
if m != nil && m.Pid != nil {
return *m.Pid
}
return 0
}
type CriuNotify struct {
Script *string `protobuf:"bytes,1,opt,name=script" json:"script,omitempty"`
Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuNotify) Reset() { *m = CriuNotify{} }
func (m *CriuNotify) String() string { return proto.CompactTextString(m) }
func (*CriuNotify) ProtoMessage() {}
func (m *CriuNotify) GetScript() string {
if m != nil && m.Script != nil {
return *m.Script
}
return ""
}
func (m *CriuNotify) GetPid() int32 {
if m != nil && m.Pid != nil {
return *m.Pid
}
return 0
}
type CriuReq struct {
Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"`
Opts *CriuOpts `protobuf:"bytes,2,opt,name=opts" json:"opts,omitempty"`
NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success" json:"notify_success,omitempty"`
//
// When set service won't close the connection but
// will wait for more req-s to appear. Works not
// for all request types.
KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open" json:"keep_open,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuReq) Reset() { *m = CriuReq{} }
func (m *CriuReq) String() string { return proto.CompactTextString(m) }
func (*CriuReq) ProtoMessage() {}
func (m *CriuReq) GetType() CriuReqType {
if m != nil && m.Type != nil {
return *m.Type
}
return 0
}
func (m *CriuReq) GetOpts() *CriuOpts {
if m != nil {
return m.Opts
}
return nil
}
func (m *CriuReq) GetNotifySuccess() bool {
if m != nil && m.NotifySuccess != nil {
return *m.NotifySuccess
}
return false
}
func (m *CriuReq) GetKeepOpen() bool {
if m != nil && m.KeepOpen != nil {
return *m.KeepOpen
}
return false
}
type CriuResp struct {
Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"`
Success *bool `protobuf:"varint,2,req,name=success" json:"success,omitempty"`
Dump *CriuDumpResp `protobuf:"bytes,3,opt,name=dump" json:"dump,omitempty"`
Restore *CriuRestoreResp `protobuf:"bytes,4,opt,name=restore" json:"restore,omitempty"`
Notify *CriuNotify `protobuf:"bytes,5,opt,name=notify" json:"notify,omitempty"`
Ps *CriuPageServerInfo `protobuf:"bytes,6,opt,name=ps" json:"ps,omitempty"`
CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno" json:"cr_errno,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *CriuResp) Reset() { *m = CriuResp{} }
func (m *CriuResp) String() string { return proto.CompactTextString(m) }
func (*CriuResp) ProtoMessage() {}
func (m *CriuResp) GetType() CriuReqType {
if m != nil && m.Type != nil {
return *m.Type
}
return 0
}
func (m *CriuResp) GetSuccess() bool {
if m != nil && m.Success != nil {
return *m.Success
}
return false
}
func (m *CriuResp) GetDump() *CriuDumpResp {
if m != nil {
return m.Dump
}
return nil
}
func (m *CriuResp) GetRestore() *CriuRestoreResp {
if m != nil {
return m.Restore
}
return nil
}
func (m *CriuResp) GetNotify() *CriuNotify {
if m != nil {
return m.Notify
}
return nil
}
func (m *CriuResp) GetPs() *CriuPageServerInfo {
if m != nil {
return m.Ps
}
return nil
}
func (m *CriuResp) GetCrErrno() int32 {
if m != nil && m.CrErrno != nil {
return *m.CrErrno
}
return 0
}
func init() {
proto.RegisterEnum("CriuReqType", CriuReqType_name, CriuReqType_value)
}

View File

@ -0,0 +1,127 @@
message criu_page_server_info {
optional string address = 1;
optional int32 port = 2;
optional int32 pid = 3;
optional int32 fd = 4;
}
message criu_veth_pair {
required string if_in = 1;
required string if_out = 2;
};
message ext_mount_map {
required string key = 1;
required string val = 2;
};
message inherit_fd {
required string key = 1;
required int32 fd = 2;
};
message cgroup_root {
optional string ctrl = 1;
required string path = 2;
};
message criu_opts {
required int32 images_dir_fd = 1;
optional int32 pid = 2; /* if not set on dump, will dump requesting process */
optional bool leave_running = 3;
optional bool ext_unix_sk = 4;
optional bool tcp_established = 5;
optional bool evasive_devices = 6;
optional bool shell_job = 7;
optional bool file_locks = 8;
optional int32 log_level = 9 [default = 2];
optional string log_file = 10; /* No subdirs are allowed. Consider using work-dir */
optional criu_page_server_info ps = 11;
optional bool notify_scripts = 12;
optional string root = 13;
optional string parent_img = 14;
optional bool track_mem = 15;
optional bool auto_dedup = 16;
optional int32 work_dir_fd = 17;
optional bool link_remap = 18;
repeated criu_veth_pair veths = 19;
optional uint32 cpu_cap = 20 [default = 0xffffffff];
optional bool force_irmap = 21;
repeated string exec_cmd = 22;
repeated ext_mount_map ext_mnt = 23;
optional bool manage_cgroups = 24;
repeated cgroup_root cg_root = 25;
optional bool rst_sibling = 26; /* swrk only */
repeated inherit_fd inherit_fd = 27;
}
message criu_dump_resp {
optional bool restored = 1;
}
message criu_restore_resp {
required int32 pid = 1;
}
message criu_notify {
optional string script = 1;
optional int32 pid = 2;
}
enum criu_req_type {
EMPTY = 0;
DUMP = 1;
RESTORE = 2;
CHECK = 3;
PRE_DUMP = 4;
PAGE_SERVER = 5;
NOTIFY = 6;
CPUINFO_DUMP = 7;
CPUINFO_CHECK = 8;
}
/*
* Request -- each type corresponds to must-be-there
* request arguments of respective type
*/
message criu_req {
required criu_req_type type = 1;
optional criu_opts opts = 2;
optional bool notify_success = 3;
/*
* When set service won't close the connection but
* will wait for more req-s to appear. Works not
* for all request types.
*/
optional bool keep_open = 4;
}
/*
* Responce -- it states whether the request was served
* and additional request-specific informarion
*/
message criu_resp {
required criu_req_type type = 1;
required bool success = 2;
optional criu_dump_resp dump = 3;
optional criu_restore_resp restore = 4;
optional criu_notify notify = 5;
optional criu_page_server_info ps = 6;
optional int32 cr_errno = 7;
}

View File

@ -0,0 +1,16 @@
package devices
import (
"github.com/docker/libcontainer/configs"
)
// TODO Windows. This can be factored out further - Devices are not supported
// by Windows Containers.
func DeviceFromPath(path, permissions string) (*configs.Device, error) {
return nil, nil
}
func HostDevices() ([]*configs.Device, error) {
return nil, nil
}

View File

@ -1,3 +1,5 @@
// +build linux freebsd
package devices
/*

View File

@ -106,6 +106,7 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
l := &LinuxFactory{
Root: root,
Validator: validate.New(),
CriuPath: "criu",
}
InitArgs(os.Args[0], "init")(l)
Cgroupfs(l)
@ -129,6 +130,10 @@ type LinuxFactory struct {
// a container.
InitArgs []string
// CriuPath is the path to the criu binary used for checkpoint and restore of
// containers.
CriuPath string
// Validator provides validation to container configurations.
Validator validate.Validator
@ -161,6 +166,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
config: config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
}, nil
}
@ -174,9 +180,10 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
if err != nil {
return nil, err
}
r := &restoredProcess{
r := &nonChildProcess{
processPid: state.InitProcessPid,
processStartTime: state.InitProcessStartTime,
fds: state.ExternalDescriptors,
}
return &linuxContainer{
initProcess: r,
@ -184,6 +191,7 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
config: &state.Config,
initPath: l.InitPath,
initArgs: l.InitArgs,
criuPath: l.CriuPath,
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
root: containerRoot,
}, nil
@ -253,35 +261,3 @@ func (l *LinuxFactory) validateID(id string) error {
}
return nil
}
// restoredProcess represents a process where the calling process may or may not be
// the parent process. This process is created when a factory loads a container from
// a persisted state.
type restoredProcess struct {
processPid int
processStartTime string
}
func (p *restoredProcess) start() error {
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
}
func (p *restoredProcess) pid() int {
return p.processPid
}
func (p *restoredProcess) terminate() error {
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
}
func (p *restoredProcess) wait() (*os.ProcessState, error) {
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
}
func (p *restoredProcess) startTime() (string, error) {
return p.processStartTime, nil
}
func (p *restoredProcess) signal(s os.Signal) error {
return newGenericError(fmt.Errorf("restored process cannot be signaled"), SystemError)
}

View File

@ -4,6 +4,7 @@ set -e
# This script runs all validations
validate() {
export MAKEDIR=/go/src/github.com/docker/docker/hack/make
sed -i 's!docker/docker!docker/libcontainer!' /go/src/github.com/docker/docker/hack/make/.validate
bash /go/src/github.com/docker/docker/hack/make/validate-dco
bash /go/src/github.com/docker/docker/hack/make/validate-gofmt

View File

@ -9,10 +9,11 @@ import (
"strings"
"syscall"
log "github.com/Sirupsen/logrus"
"github.com/Sirupsen/logrus"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/netlink"
"github.com/docker/libcontainer/seccomp"
"github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/user"
"github.com/docker/libcontainer/utils"
@ -176,10 +177,20 @@ func setupUser(config *initConfig) error {
if err != nil {
return err
}
suppGroups := append(execUser.Sgids, config.Config.AdditionalGroups...)
var addGroups []int
if len(config.Config.AdditionalGroups) > 0 {
addGroups, err = user.GetAdditionalGroupsPath(config.Config.AdditionalGroups, groupPath)
if err != nil {
return err
}
}
suppGroups := append(execUser.Sgids, addGroups...)
if err := syscall.Setgroups(suppGroups); err != nil {
return err
}
if err := system.Setgid(execUser.Gid); err != nil {
return err
}
@ -234,7 +245,7 @@ func setupRlimits(config *configs.Config) error {
func killCgroupProcesses(m cgroups.Manager) error {
var procs []*os.Process
if err := m.Freeze(configs.Frozen); err != nil {
log.Warn(err)
logrus.Warn(err)
}
pids, err := m.GetPids()
if err != nil {
@ -245,17 +256,75 @@ func killCgroupProcesses(m cgroups.Manager) error {
if p, err := os.FindProcess(pid); err == nil {
procs = append(procs, p)
if err := p.Kill(); err != nil {
log.Warn(err)
logrus.Warn(err)
}
}
}
if err := m.Freeze(configs.Thawed); err != nil {
log.Warn(err)
logrus.Warn(err)
}
for _, p := range procs {
if _, err := p.Wait(); err != nil {
log.Warn(err)
logrus.Warn(err)
}
}
return nil
}
func finalizeSeccomp(config *initConfig) error {
if config.Config.Seccomp == nil {
return nil
}
context := seccomp.New()
for _, s := range config.Config.Seccomp.Syscalls {
ss := &seccomp.Syscall{
Value: uint32(s.Value),
Action: seccompAction(s.Action),
}
if len(s.Args) > 0 {
ss.Args = seccompArgs(s.Args)
}
context.Add(ss)
}
return context.Load()
}
func seccompAction(a configs.Action) seccomp.Action {
switch a {
case configs.Kill:
return seccomp.Kill
case configs.Trap:
return seccomp.Trap
case configs.Allow:
return seccomp.Allow
}
return seccomp.Error(syscall.Errno(int(a)))
}
func seccompArgs(args []*configs.Arg) seccomp.Args {
var sa []seccomp.Arg
for _, a := range args {
sa = append(sa, seccomp.Arg{
Index: uint32(a.Index),
Op: seccompOperator(a.Op),
Value: uint(a.Value),
})
}
return seccomp.Args{sa}
}
func seccompOperator(o configs.Operator) seccomp.Operator {
switch o {
case configs.EqualTo:
return seccomp.EqualTo
case configs.NotEqualTo:
return seccomp.NotEqualTo
case configs.GreatherThan:
return seccomp.GreatherThan
case configs.LessThan:
return seccomp.LessThan
case configs.MaskEqualTo:
return seccomp.MaskEqualTo
}
return 0
}

View File

@ -105,14 +105,14 @@ func Relabel(path string, fileLabel string, relabel string) error {
if fileLabel == "" {
return nil
}
if !strings.ContainsAny(relabel, "zZ") {
return nil
}
for _, p := range exclude_path {
if path == p {
return fmt.Errorf("Relabeling of %s is not allowed", path)
}
}
if !strings.ContainsAny(relabel, "zZ") {
return nil
}
if strings.Contains(relabel, "z") && strings.Contains(relabel, "Z") {
return fmt.Errorf("Bad SELinux option z and Z can not be used together")
}

View File

@ -9,6 +9,7 @@ import (
"os"
"sync/atomic"
"syscall"
"time"
"unsafe"
)
@ -26,6 +27,7 @@ const (
SIOC_BRADDBR = 0x89a0
SIOC_BRDELBR = 0x89a1
SIOC_BRADDIF = 0x89a2
SIOC_BRDELIF = 0x89a3
)
const (
@ -54,6 +56,8 @@ type ifreqFlags struct {
var native binary.ByteOrder
var rnd = rand.New(rand.NewSource(time.Now().UnixNano()))
func init() {
var x uint32 = 0x01020304
if *(*byte)(unsafe.Pointer(&x)) == 0x01 {
@ -1188,9 +1192,7 @@ func DeleteBridge(name string) error {
return nil
}
// Add a slave to abridge device. This is more backward-compatible than
// netlink.NetworkSetMaster and works on RHEL 6.
func AddToBridge(iface, master *net.Interface) error {
func ifIoctBridge(iface, master *net.Interface, op uintptr) error {
if len(master.Name) >= IFNAMSIZ {
return fmt.Errorf("Interface name %s too long", master.Name)
}
@ -1205,17 +1207,29 @@ func AddToBridge(iface, master *net.Interface) error {
copy(ifr.IfrnName[:len(ifr.IfrnName)-1], master.Name)
ifr.IfruIndex = int32(iface.Index)
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), SIOC_BRADDIF, uintptr(unsafe.Pointer(&ifr))); err != 0 {
if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, uintptr(s), op, uintptr(unsafe.Pointer(&ifr))); err != 0 {
return err
}
return nil
}
// Add a slave to a bridge device. This is more backward-compatible than
// netlink.NetworkSetMaster and works on RHEL 6.
func AddToBridge(iface, master *net.Interface) error {
return ifIoctBridge(iface, master, SIOC_BRADDIF)
}
// Detach a slave from a bridge device. This is more backward-compatible than
// netlink.NetworkSetMaster and works on RHEL 6.
func DelFromBridge(iface, master *net.Interface) error {
return ifIoctBridge(iface, master, SIOC_BRDELIF)
}
func randMacAddr() string {
hw := make(net.HardwareAddr, 6)
for i := 0; i < 6; i++ {
hw[i] = byte(rand.Intn(255))
hw[i] = byte(rnd.Intn(255))
}
hw[0] &^= 0x1 // clear multicast bit
hw[0] |= 0x2 // set local assignment bit (IEEE802)

View File

@ -1,3 +1,5 @@
// +build arm ppc64 ppc64le
package netlink
func ifrDataByte(b byte) uint8 {

View File

@ -1,4 +1,4 @@
// +build !arm
// +build !arm,!ppc64,!ppc64le
package netlink

View File

@ -10,6 +10,7 @@ import (
"strconv"
"strings"
"github.com/docker/libcontainer/configs"
"github.com/docker/libcontainer/netlink"
"github.com/docker/libcontainer/utils"
)
@ -24,6 +25,8 @@ var strategies = map[string]networkStrategy{
type networkStrategy interface {
create(*network, int) error
initialize(*network) error
detach(*configs.Network) error
attach(*configs.Network) error
}
// getStrategy returns the specific network strategy for the
@ -97,32 +100,39 @@ func (l *loopback) initialize(config *network) error {
return netlink.NetworkLinkUp(iface)
}
func (l *loopback) attach(n *configs.Network) (err error) {
return nil
}
func (l *loopback) detach(n *configs.Network) (err error) {
return nil
}
// veth is a network strategy that uses a bridge and creates
// a veth pair, one that is attached to the bridge on the host and the other
// is placed inside the container's namespace
type veth struct {
}
func (v *veth) create(n *network, nspid int) (err error) {
tmpName, err := v.generateTempPeerName()
if err != nil {
return err
}
n.TempVethPeerName = tmpName
defer func() {
if err != nil {
netlink.NetworkLinkDel(n.HostInterfaceName)
netlink.NetworkLinkDel(n.TempVethPeerName)
}
}()
if n.Bridge == "" {
return fmt.Errorf("bridge is not specified")
}
func (v *veth) detach(n *configs.Network) (err error) {
bridge, err := net.InterfaceByName(n.Bridge)
if err != nil {
return err
}
if err := netlink.NetworkCreateVethPair(n.HostInterfaceName, n.TempVethPeerName, n.TxQueueLen); err != nil {
host, err := net.InterfaceByName(n.HostInterfaceName)
if err != nil {
return err
}
if err := netlink.DelFromBridge(host, bridge); err != nil {
return err
}
return nil
}
// attach a container network interface to an external network
func (v *veth) attach(n *configs.Network) (err error) {
bridge, err := net.InterfaceByName(n.Bridge)
if err != nil {
return err
}
host, err := net.InterfaceByName(n.HostInterfaceName)
@ -143,6 +153,31 @@ func (v *veth) create(n *network, nspid int) (err error) {
if err := netlink.NetworkLinkUp(host); err != nil {
return err
}
return nil
}
func (v *veth) create(n *network, nspid int) (err error) {
tmpName, err := v.generateTempPeerName()
if err != nil {
return err
}
n.TempVethPeerName = tmpName
defer func() {
if err != nil {
netlink.NetworkLinkDel(n.HostInterfaceName)
netlink.NetworkLinkDel(n.TempVethPeerName)
}
}()
if n.Bridge == "" {
return fmt.Errorf("bridge is not specified")
}
if err := netlink.NetworkCreateVethPair(n.HostInterfaceName, n.TempVethPeerName, n.TxQueueLen); err != nil {
return err
}
if err := v.attach(&n.Network); err != nil {
return err
}
child, err := net.InterfaceByName(n.TempVethPeerName)
if err != nil {
return err

View File

@ -1,3 +1,5 @@
// +build !linux !cgo
package nsenter
import "C"

View File

@ -148,15 +148,15 @@ void nsexec()
pr_perror("ioctl TIOCSCTTY failed");
exit(1);
}
if (dup2(consolefd, STDIN_FILENO) != STDIN_FILENO) {
if (dup3(consolefd, STDIN_FILENO, 0) != STDIN_FILENO) {
pr_perror("Failed to dup 0");
exit(1);
}
if (dup2(consolefd, STDOUT_FILENO) != STDOUT_FILENO) {
if (dup3(consolefd, STDOUT_FILENO, 0) != STDOUT_FILENO) {
pr_perror("Failed to dup 1");
exit(1);
}
if (dup2(consolefd, STDERR_FILENO) != STDERR_FILENO) {
if (dup3(consolefd, STDERR_FILENO, 0) != STDERR_FILENO) {
pr_perror("Failed to dup 2");
exit(1);
}

View File

@ -1,7 +1,7 @@
## nsinit
`nsinit` is a cli application which demonstrates the use of libcontainer.
It is able to spawn new containers or join existing containers.
`nsinit` is a cli application which demonstrates the use of libcontainer.
It is able to spawn new containers or join existing containers.
### How to build?
@ -58,7 +58,7 @@ If you wish to spawn another process inside the container while your current
bash session is running, run the same command again to get another bash shell
(or change the command). If the original process (PID 1) dies, all other
processes spawned inside the container will be killed and the namespace will
be removed.
be removed.
You can identify if a process is running in a container by looking to see if
`state.json` is in the root of the directory.
@ -68,45 +68,61 @@ file is read and where the `state.json` file will be saved.
### How to use?
Currently nsinit has 9 commands. Type `nsinit -h` to list all of them.
And for every alternative command, you can also use `--help` to get more
Currently nsinit has 9 commands. Type `nsinit -h` to list all of them.
And for every alternative command, you can also use `--help` to get more
detailed help documents. For example, `nsinit config --help`.
`nsinit` cli application is implemented using [cli.go](https://github.com/codegangsta/cli).
Lots of details are handled in cli.go, so the implementation of `nsinit` itself
`nsinit` cli application is implemented using [cli.go](https://github.com/codegangsta/cli).
Lots of details are handled in cli.go, so the implementation of `nsinit` itself
is very clean and clear.
* **config**
It will generate a standard configuration file for a container. By default, it
will generate as the template file in [config.go](https://github.com/docker/libcontainer/blob/master/nsinit/config.go#L192).
It will generate a standard configuration file for a container. By default, it
will generate as the template file in [config.go](https://github.com/docker/libcontainer/blob/f28dff5539855bac2adbc5699f57f84349605b5f/nsinit/config.go#L234).
It will modify the template if you have specified some configuration by options.
* **exec**
Starts a container and execute a new command inside it. Besides common options, it
has some special options as below.
- `--tty,-t`: allocate a TTY to the container.
- `--config`: you can specify a configuration file. By default, it will use
- `--config`: you can specify a configuration file. By default, it will use
template configuration.
- `--id`: specify the ID for a container. By default, the id is "nsinit".
- `--user,-u`: set the user, uid, and/or gid for the process. By default the
- `--user,-u`: set the user, uid, and/or gid for the process. By default the
value is "root".
- `--cwd`: set the current working dir.
- `--env`: set environment variables for the process.
* **init**
It's an internal command that is called inside the container's namespaces to
initialize the namespace and exec the user's process. It should not be called
It's an internal command that is called inside the container's namespaces to
initialize the namespace and exec the user's process. It should not be called
externally.
* **oom**
Display oom notifications for a container, you should specify container id.
* **pause**
Pause the container's processes, you should specify container id. It will use
Pause the container's processes, you should specify container id. It will use
cgroup freeze subsystem to help.
* **unpause**
Unpause the container's processes. Same with `pause`.
* **stats**
Display statistics for the container, it will mainly show cgroup and network
Display statistics for the container, it will mainly show cgroup and network
statistics.
* **state**
Get the container's current state. You can also read the state from `state.json`
in your container_id folder.
in your container_id folder.
* **checkpoint**
Checkpoint a running container. You can read [this](http://criu.org/Advanced_usage)
for more detailed information about options.
- `--id` specify the ID for a container. By default, the id is "nsinit".
- `--image-path` path for saving criu image files. You must specify this option.
- `--work-path`: path for saving work files and logs. By default it will
generate a folder named "criu.work" in root directory.
- `--leave-running`: leave the process running after checkpointing.
- `--tcp-established`: allow open tcp connections.
- `--ext-unix-sk`: allow external unix sockets.
- `--shell-job`: allow shell jobs.
- `--page-server`: ADDRESS:PORT of the page server. The dump image can be
sent to a criu page server if we have a page server.
* **restore**
Restore a container from a previous checkpoint. Options are almost the same
with checkpoint and `--image-path` must be specified.
* **help, h**
Shows a list of commands or help for one command.

View File

@ -0,0 +1,67 @@
package main
import (
"fmt"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer"
"strconv"
"strings"
)
var checkpointCommand = cli.Command{
Name: "checkpoint",
Usage: "checkpoint a running container",
Flags: []cli.Flag{
cli.StringFlag{Name: "id", Value: "nsinit", Usage: "specify the ID for a container"},
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
},
Action: func(context *cli.Context) {
imagePath := context.String("image-path")
if imagePath == "" {
fatal(fmt.Errorf("The --image-path option isn't specified"))
}
container, err := getContainer(context)
if err != nil {
fatal(err)
}
// these are the mandatory criu options for a container
criuOpts := &libcontainer.CriuOpts{
ImagesDirectory: imagePath,
WorkDirectory: context.String("work-path"),
LeaveRunning: context.Bool("leave-running"),
TcpEstablished: context.Bool("tcp-established"),
ExternalUnixConnections: context.Bool("ext-unix-sk"),
ShellJob: context.Bool("shell-job"),
}
// xxx following criu opts are optional
// The dump image can be sent to a criu page server
if psOpt := context.String("page-server"); psOpt != "" {
addressPort := strings.Split(psOpt, ":")
if len(addressPort) != 2 {
fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server"))
}
port_int, err := strconv.Atoi(addressPort[1])
if err != nil {
fatal(fmt.Errorf("Invalid port number"))
}
criuOpts.PageServer = libcontainer.CriuPageServerInfo{
Address: addressPort[0],
Port: int32(port_int),
}
}
if err := container.Checkpoint(criuOpts); err != nil {
fatal(err)
}
},
}

View File

@ -19,31 +19,34 @@ import (
const defaultMountFlags = syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_NODEV
var createFlags = []cli.Flag{
cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"},
cli.BoolFlag{Name: "cgroup", Usage: "mount the cgroup data for the container"},
cli.BoolFlag{Name: "read-only", Usage: "set the container's rootfs as read-only"},
cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"},
cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"},
cli.IntFlag{Name: "cpushares", Usage: "set the cpushares for the container"},
cli.IntFlag{Name: "memory-limit", Usage: "set the memory limit for the container"},
cli.IntFlag{Name: "memory-swap", Usage: "set the memory swap limit for the container"},
cli.IntFlag{Name: "parent-death-signal", Usage: "set the signal that will be delivered to the process in case the parent dies"},
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"},
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
cli.StringFlag{Name: "cpuset-cpus", Usage: "set the cpuset cpus"},
cli.StringFlag{Name: "cpuset-mems", Usage: "set the cpuset mems"},
cli.StringFlag{Name: "apparmor-profile", Usage: "set the apparmor profile"},
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
cli.IntFlag{Name: "userns-root-uid", Usage: "set the user namespace root uid"},
cli.StringFlag{Name: "hostname", Value: "nsinit", Usage: "hostname value for the container"},
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
cli.StringFlag{Name: "ipc", Value: "", Usage: "ipc namespace"},
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
cli.StringFlag{Name: "mnt", Value: "", Usage: "mount namespace"},
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"},
cli.StringFlag{Name: "mount-label", Usage: "set the mount label"},
cli.StringFlag{Name: "net", Value: "", Usage: "network namespace"},
cli.StringFlag{Name: "pid", Value: "", Usage: "pid namespace"},
cli.StringFlag{Name: "process-label", Usage: "set the process label"},
cli.StringFlag{Name: "rootfs", Usage: "set the rootfs"},
cli.StringFlag{Name: "security", Value: "", Usage: "set the security profile (high, medium, low)"},
cli.StringFlag{Name: "uts", Value: "", Usage: "uts namespace"},
cli.StringFlag{Name: "veth-address", Usage: "veth ip address"},
cli.StringFlag{Name: "veth-bridge", Usage: "veth bridge"},
cli.StringFlag{Name: "veth-gateway", Usage: "veth gateway address"},
cli.IntFlag{Name: "veth-mtu", Usage: "veth mtu"},
cli.BoolFlag{Name: "cgroup", Usage: "mount the cgroup data for the container"},
cli.StringSliceFlag{Name: "bind", Value: &cli.StringSlice{}, Usage: "add bind mounts to the container"},
cli.StringSliceFlag{Name: "sysctl", Value: &cli.StringSlice{}, Usage: "set system properties in the container"},
cli.StringSliceFlag{Name: "tmpfs", Value: &cli.StringSlice{}, Usage: "add tmpfs mounts to the container"},
cli.StringSliceFlag{Name: "groups", Value: &cli.StringSlice{}, Usage: "add additional groups"},
}
var configCommand = cli.Command{
@ -111,6 +114,20 @@ func modify(config *configs.Config, context *cli.Context) {
node.Gid = uint32(userns_uid)
}
}
config.SystemProperties = make(map[string]string)
for _, sysProp := range context.StringSlice("sysctl") {
parts := strings.SplitN(sysProp, "=", 2)
if len(parts) != 2 {
logrus.Fatalf("invalid system property %s", sysProp)
}
config.SystemProperties[parts[0]] = parts[1]
}
for _, group := range context.StringSlice("groups") {
config.AdditionalGroups = append(config.AdditionalGroups, group)
}
for _, rawBind := range context.StringSlice("bind") {
mount := &configs.Mount{
Device: "bind",
@ -194,6 +211,24 @@ func modify(config *configs.Config, context *cli.Context) {
Device: "cgroup",
})
}
modifySecurityProfile(context, config)
}
func modifySecurityProfile(context *cli.Context, config *configs.Config) {
profileName := context.String("security")
if profileName == "" {
return
}
profile := profiles[profileName]
if profile == nil {
logrus.Fatalf("invalid profile name %q", profileName)
}
config.Rlimits = profile.Rlimits
config.Capabilities = profile.Capabilities
config.Seccomp = profile.Seccomp
config.AppArmorProfile = profile.ApparmorProfile
config.MountLabel = profile.MountLabel
config.ProcessLabel = profile.ProcessLabel
}
func getTemplate() *configs.Config {
@ -281,13 +316,5 @@ func getTemplate() *configs.Config {
Flags: defaultMountFlags | syscall.MS_RDONLY,
},
},
Rlimits: []configs.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: 1024,
Soft: 1024,
},
},
}
}

View File

@ -93,10 +93,17 @@ func execAction(context *cli.Context) {
}
}
if created {
if err := container.Destroy(); err != nil {
status, err := container.Status()
if err != nil {
tty.Close()
fatal(err)
}
if status != libcontainer.Checkpointed {
if err := container.Destroy(); err != nil {
tty.Close()
fatal(err)
}
}
}
tty.Close()
os.Exit(utils.ExitStatus(status.Sys().(syscall.WaitStatus)))

View File

@ -3,7 +3,7 @@ package main
import (
"runtime"
log "github.com/Sirupsen/logrus"
"github.com/Sirupsen/logrus"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer"
_ "github.com/docker/libcontainer/nsenter"
@ -13,7 +13,7 @@ var initCommand = cli.Command{
Name: "init",
Usage: "runs the init process inside the namespace",
Action: func(context *cli.Context) {
log.SetLevel(log.DebugLevel)
logrus.SetLevel(logrus.DebugLevel)
runtime.GOMAXPROCS(1)
runtime.LockOSThread()
factory, err := libcontainer.New("")

Some files were not shown because too many files have changed in this diff Show More