From 13d955d6a9faa2f70387354ff17df3d614a6c37b Mon Sep 17 00:00:00 2001 From: vikaschoudhary16 Date: Fri, 12 Jan 2018 13:55:09 -0500 Subject: [PATCH] Bump runc to d5b4a3eddbe4c890843da971b64f45a0f023f4db Signed-off-by: vikaschoudhary16 --- Godeps/Godeps.json | 90 +- .../github.com/containerd/console/.travis.yml | 17 + vendor/github.com/containerd/console/LICENSE | 201 ++ .../github.com/containerd/console/README.md | 17 + .../github.com/containerd/console/console.go | 62 + .../containerd/console/console_linux.go | 255 +++ .../containerd/console/console_unix.go | 142 ++ .../containerd/console/console_windows.go | 200 ++ .../containerd/console/tc_darwin.go | 37 + .../containerd/console/tc_freebsd.go | 29 + .../github.com/containerd/console/tc_linux.go | 37 + .../containerd/console/tc_solaris_cgo.go | 35 + .../containerd/console/tc_solaris_nocgo.go | 31 + .../github.com/containerd/console/tc_unix.go | 75 + .../cyphar/filepath-securejoin/.travis.yml | 19 + .../cyphar/filepath-securejoin/LICENSE | 28 + .../cyphar/filepath-securejoin/README.md | 65 + .../cyphar/filepath-securejoin/VERSION | 1 + .../cyphar/filepath-securejoin/join.go | 135 ++ .../cyphar/filepath-securejoin/vendor.conf | 1 + .../cyphar/filepath-securejoin/vfs.go | 41 + vendor/github.com/influxdata/influxdb/LICENSE | 20 + .../influxdb/LICENSE_OF_DEPENDENCIES.md | 23 + .../influxdata/influxdb/models/consistency.go | 46 + .../influxdata/influxdb/models/inline_fnv.go | 27 + .../influxdb/models/inline_strconv_parse.go | 38 + .../influxdata/influxdb/models/points.go | 1916 +++++++++++++++++ .../influxdata/influxdb/models/rows.go | 58 + .../influxdata/influxdb/models/statistic.go | 40 + .../influxdata/influxdb/models/time.go | 74 + .../influxdata/influxdb/pkg/escape/bytes.go | 95 + .../influxdata/influxdb/pkg/escape/strings.go | 38 + .../opencontainers/runc/libcontainer/SPEC.md | 86 +- .../runc/libcontainer/apparmor/apparmor.go | 37 +- .../runc/libcontainer/cgroups/fs/apply_raw.go | 13 + .../runc/libcontainer/cgroups/fs/freezer.go | 13 +- .../libcontainer/cgroups/rootless/rootless.go | 128 -- .../cgroups/systemd/apply_nosystemd.go | 4 +- .../cgroups/systemd/apply_systemd.go | 14 +- .../runc/libcontainer/compat_1.5_linux.go | 10 - .../configs/cgroup_unsupported.go | 6 - .../runc/libcontainer/configs/config.go | 4 + .../libcontainer/configs/device_defaults.go | 2 +- .../runc/libcontainer/configs/intelrdt.go | 7 + .../libcontainer/configs/validate/rootless.go | 68 +- .../configs/validate/validator.go | 17 + .../runc/libcontainer/console.go | 17 - .../runc/libcontainer/console_freebsd.go | 13 - .../runc/libcontainer/console_linux.go | 129 +- .../runc/libcontainer/console_solaris.go | 11 - .../runc/libcontainer/console_windows.go | 30 - .../runc/libcontainer/container_linux.go | 149 +- .../runc/libcontainer/container_solaris.go | 20 - .../runc/libcontainer/container_windows.go | 20 - .../runc/libcontainer/criu_opts_linux.go | 4 +- .../runc/libcontainer/criu_opts_windows.go | 6 - .../runc/libcontainer/factory_linux.go | 77 +- .../runc/libcontainer/init_linux.go | 103 +- .../runc/libcontainer/intelrdt/intelrdt.go | 553 +++++ .../runc/libcontainer/intelrdt/stats.go | 24 + .../runc/libcontainer/keys/keyctl.go | 2 +- .../runc/libcontainer/message_linux.go | 2 + .../runc/libcontainer/mount/mount.go | 23 + .../runc/libcontainer/mount/mount_linux.go | 82 + .../runc/libcontainer/mount/mountinfo.go | 40 + .../runc/libcontainer/process.go | 4 + .../runc/libcontainer/process_linux.go | 80 +- .../runc/libcontainer/rootfs_linux.go | 38 +- .../libcontainer/seccomp/seccomp_linux.go | 47 +- .../runc/libcontainer/setgroups_linux.go | 11 - .../runc/libcontainer/setns_init_linux.go | 13 +- .../runc/libcontainer/standard_init_linux.go | 42 +- .../runc/libcontainer/state_linux.go | 5 + .../runc/libcontainer/stats_freebsd.go | 5 - .../runc/libcontainer/stats_linux.go | 6 +- .../runc/libcontainer/stats_solaris.go | 7 - .../runc/libcontainer/stats_windows.go | 5 - .../runc/libcontainer/system/linux.go | 11 + ...scall_linux_arm.go => syscall_linux_32.go} | 3 +- .../libcontainer/system/syscall_linux_386.go | 25 - .../libcontainer/system/syscall_linux_64.go | 3 +- .../runc/libcontainer/system/sysconfig.go | 2 +- .../libcontainer/user/lookup_unsupported.go | 38 - .../runc/libcontainer/utils/cmsg.go | 10 +- 84 files changed, 5257 insertions(+), 705 deletions(-) create mode 100644 vendor/github.com/containerd/console/.travis.yml create mode 100644 vendor/github.com/containerd/console/LICENSE create mode 100644 vendor/github.com/containerd/console/README.md create mode 100644 vendor/github.com/containerd/console/console.go create mode 100644 vendor/github.com/containerd/console/console_linux.go create mode 100644 vendor/github.com/containerd/console/console_unix.go create mode 100644 vendor/github.com/containerd/console/console_windows.go create mode 100644 vendor/github.com/containerd/console/tc_darwin.go create mode 100644 vendor/github.com/containerd/console/tc_freebsd.go create mode 100644 vendor/github.com/containerd/console/tc_linux.go create mode 100644 vendor/github.com/containerd/console/tc_solaris_cgo.go create mode 100644 vendor/github.com/containerd/console/tc_solaris_nocgo.go create mode 100644 vendor/github.com/containerd/console/tc_unix.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/.travis.yml create mode 100644 vendor/github.com/cyphar/filepath-securejoin/LICENSE create mode 100644 vendor/github.com/cyphar/filepath-securejoin/README.md create mode 100644 vendor/github.com/cyphar/filepath-securejoin/VERSION create mode 100644 vendor/github.com/cyphar/filepath-securejoin/join.go create mode 100644 vendor/github.com/cyphar/filepath-securejoin/vendor.conf create mode 100644 vendor/github.com/cyphar/filepath-securejoin/vfs.go create mode 100644 vendor/github.com/influxdata/influxdb/LICENSE create mode 100644 vendor/github.com/influxdata/influxdb/LICENSE_OF_DEPENDENCIES.md create mode 100644 vendor/github.com/influxdata/influxdb/models/consistency.go create mode 100644 vendor/github.com/influxdata/influxdb/models/inline_fnv.go create mode 100644 vendor/github.com/influxdata/influxdb/models/inline_strconv_parse.go create mode 100644 vendor/github.com/influxdata/influxdb/models/points.go create mode 100644 vendor/github.com/influxdata/influxdb/models/rows.go create mode 100644 vendor/github.com/influxdata/influxdb/models/statistic.go create mode 100644 vendor/github.com/influxdata/influxdb/models/time.go create mode 100644 vendor/github.com/influxdata/influxdb/pkg/escape/bytes.go create mode 100644 vendor/github.com/influxdata/influxdb/pkg/escape/strings.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/console.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/console_windows.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/container_windows.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go rename vendor/github.com/opencontainers/runc/libcontainer/system/{syscall_linux_arm.go => syscall_linux_32.go} (93%) delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unsupported.go diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index dd0c9058..8c0a00a2 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -153,6 +153,10 @@ "Comment": "v3.1.0", "Rev": "aea32c919a18e5ef4537bbd283ff29594b1b0165" }, + { + "ImportPath": "github.com/containerd/console", + "Rev": "84eeaae905fa414d03e07bcd6c8d3f19e7cf180e" + }, { "ImportPath": "github.com/containerd/containerd/api/services/containers/v1", "Comment": "v1.0.0-beta.2-159-g27d450a0", @@ -222,6 +226,11 @@ "Comment": "v1.25.0", "Rev": "ec37f3cb649bfb72408906e7cbf330e4aeda1075" }, + { + "ImportPath": "github.com/cyphar/filepath-securejoin", + "Comment": "v0.2.1-1-gae69057", + "Rev": "ae69057f2299fb9e5ba2df738607e6a505b74ab6" + }, { "ImportPath": "github.com/davecgh/go-spew/spew", "Comment": "v1.1.0-9-gecdeabc", @@ -454,6 +463,16 @@ "ImportPath": "github.com/golang/snappy", "Rev": "723cc1e459b8eea2dea4583200fd60757d40097a" }, + { + "ImportPath": "github.com/influxdata/influxdb/models", + "Comment": "v1.1.1", + "Rev": "e47cf1f2e83a02443d7115c54f838be8ee959644" + }, + { + "ImportPath": "github.com/influxdata/influxdb/pkg/escape", + "Comment": "v1.1.1", + "Rev": "e47cf1f2e83a02443d7115c54f838be8ee959644" + }, { "ImportPath": "github.com/influxdb/influxdb/client", "Comment": "v0.9.5.1", @@ -521,78 +540,83 @@ }, { "ImportPath": "github.com/opencontainers/runc/libcontainer", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/apparmor", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups/fs", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" - }, - { - "ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups/rootless", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/cgroups/systemd", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/configs", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/configs/validate", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/criurpc", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" + }, + { + "ImportPath": "github.com/opencontainers/runc/libcontainer/intelrdt", + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/keys", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" + }, + { + "ImportPath": "github.com/opencontainers/runc/libcontainer/mount", + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/seccomp", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/stacktrace", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/system", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/user", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runc/libcontainer/utils", - "Comment": "v1.0.0-rc4-50-g4d6e6720", - "Rev": "4d6e6720a7c885c37b4cb083c0d372dda3425120" + "Comment": "v1.0.0-rc4-197-gd5b4a3e", + "Rev": "d5b4a3eddbe4c890843da971b64f45a0f023f4db" }, { "ImportPath": "github.com/opencontainers/runtime-spec/specs-go", diff --git a/vendor/github.com/containerd/console/.travis.yml b/vendor/github.com/containerd/console/.travis.yml new file mode 100644 index 00000000..ba93012c --- /dev/null +++ b/vendor/github.com/containerd/console/.travis.yml @@ -0,0 +1,17 @@ +language: go +go: + - 1.9.x + - tip + +go_import_path: github.com/containerd/console + +install: + - go get -d + - GOOS=windows go get -d + - GOOS=solaris go get -d + +script: + - go test -race + - GOOS=windows go test + - GOOS=solaris go build + - GOOS=solaris go test -c diff --git a/vendor/github.com/containerd/console/LICENSE b/vendor/github.com/containerd/console/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/vendor/github.com/containerd/console/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/containerd/console/README.md b/vendor/github.com/containerd/console/README.md new file mode 100644 index 00000000..4c56d9d1 --- /dev/null +++ b/vendor/github.com/containerd/console/README.md @@ -0,0 +1,17 @@ +# console + +[![Build Status](https://travis-ci.org/containerd/console.svg?branch=master)](https://travis-ci.org/containerd/console) + +Golang package for dealing with consoles. Light on deps and a simple API. + +## Modifying the current process + +```go +current := console.Current() +defer current.Reset() + +if err := current.SetRaw(); err != nil { +} +ws, err := current.Size() +current.Resize(ws) +``` diff --git a/vendor/github.com/containerd/console/console.go b/vendor/github.com/containerd/console/console.go new file mode 100644 index 00000000..bf2798fd --- /dev/null +++ b/vendor/github.com/containerd/console/console.go @@ -0,0 +1,62 @@ +package console + +import ( + "errors" + "io" + "os" +) + +var ErrNotAConsole = errors.New("provided file is not a console") + +type Console interface { + io.Reader + io.Writer + io.Closer + + // Resize resizes the console to the provided window size + Resize(WinSize) error + // ResizeFrom resizes the calling console to the size of the + // provided console + ResizeFrom(Console) error + // SetRaw sets the console in raw mode + SetRaw() error + // DisableEcho disables echo on the console + DisableEcho() error + // Reset restores the console to its orignal state + Reset() error + // Size returns the window size of the console + Size() (WinSize, error) + // Fd returns the console's file descriptor + Fd() uintptr + // Name returns the console's file name + Name() string +} + +// WinSize specifies the window size of the console +type WinSize struct { + // Height of the console + Height uint16 + // Width of the console + Width uint16 + x uint16 + y uint16 +} + +// Current returns the current processes console +func Current() Console { + c, err := ConsoleFromFile(os.Stdin) + if err != nil { + // stdin should always be a console for the design + // of this function + panic(err) + } + return c +} + +// ConsoleFromFile returns a console using the provided file +func ConsoleFromFile(f *os.File) (Console, error) { + if err := checkConsole(f); err != nil { + return nil, err + } + return newMaster(f) +} diff --git a/vendor/github.com/containerd/console/console_linux.go b/vendor/github.com/containerd/console/console_linux.go new file mode 100644 index 00000000..c9637292 --- /dev/null +++ b/vendor/github.com/containerd/console/console_linux.go @@ -0,0 +1,255 @@ +// +build linux + +package console + +import ( + "io" + "os" + "sync" + + "golang.org/x/sys/unix" +) + +const ( + maxEvents = 128 +) + +// Epoller manages multiple epoll consoles using edge-triggered epoll api so we +// dont have to deal with repeated wake-up of EPOLLER or EPOLLHUP. +// For more details, see: +// - https://github.com/systemd/systemd/pull/4262 +// - https://github.com/moby/moby/issues/27202 +// +// Example usage of Epoller and EpollConsole can be as follow: +// +// epoller, _ := NewEpoller() +// epollConsole, _ := epoller.Add(console) +// go epoller.Wait() +// var ( +// b bytes.Buffer +// wg sync.WaitGroup +// ) +// wg.Add(1) +// go func() { +// io.Copy(&b, epollConsole) +// wg.Done() +// }() +// // perform I/O on the console +// epollConsole.Shutdown(epoller.CloseConsole) +// wg.Wait() +// epollConsole.Close() +type Epoller struct { + efd int + mu sync.Mutex + fdMapping map[int]*EpollConsole +} + +// NewEpoller returns an instance of epoller with a valid epoll fd. +func NewEpoller() (*Epoller, error) { + efd, err := unix.EpollCreate1(unix.EPOLL_CLOEXEC) + if err != nil { + return nil, err + } + return &Epoller{ + efd: efd, + fdMapping: make(map[int]*EpollConsole), + }, nil +} + +// Add creates a epoll console based on the provided console. The console will +// be registered with EPOLLET (i.e. using edge-triggered notification) and its +// file descriptor will be set to non-blocking mode. After this, user should use +// the return console to perform I/O. +func (e *Epoller) Add(console Console) (*EpollConsole, error) { + sysfd := int(console.Fd()) + // Set sysfd to non-blocking mode + if err := unix.SetNonblock(sysfd, true); err != nil { + return nil, err + } + + ev := unix.EpollEvent{ + Events: unix.EPOLLIN | unix.EPOLLOUT | unix.EPOLLRDHUP | unix.EPOLLET, + Fd: int32(sysfd), + } + if err := unix.EpollCtl(e.efd, unix.EPOLL_CTL_ADD, sysfd, &ev); err != nil { + return nil, err + } + ef := &EpollConsole{ + Console: console, + sysfd: sysfd, + readc: sync.NewCond(&sync.Mutex{}), + writec: sync.NewCond(&sync.Mutex{}), + } + e.mu.Lock() + e.fdMapping[sysfd] = ef + e.mu.Unlock() + return ef, nil +} + +// Wait starts the loop to wait for its consoles' notifications and signal +// appropriate console that it can perform I/O. +func (e *Epoller) Wait() error { + events := make([]unix.EpollEvent, maxEvents) + for { + n, err := unix.EpollWait(e.efd, events, -1) + if err != nil { + // EINTR: The call was interrupted by a signal handler before either + // any of the requested events occurred or the timeout expired + if err == unix.EINTR { + continue + } + return err + } + for i := 0; i < n; i++ { + ev := &events[i] + // the console is ready to be read from + if ev.Events&(unix.EPOLLIN|unix.EPOLLHUP|unix.EPOLLERR) != 0 { + if epfile := e.getConsole(int(ev.Fd)); epfile != nil { + epfile.signalRead() + } + } + // the console is ready to be written to + if ev.Events&(unix.EPOLLOUT|unix.EPOLLHUP|unix.EPOLLERR) != 0 { + if epfile := e.getConsole(int(ev.Fd)); epfile != nil { + epfile.signalWrite() + } + } + } + } +} + +// Close unregister the console's file descriptor from epoll interface +func (e *Epoller) CloseConsole(fd int) error { + e.mu.Lock() + defer e.mu.Unlock() + delete(e.fdMapping, fd) + return unix.EpollCtl(e.efd, unix.EPOLL_CTL_DEL, fd, &unix.EpollEvent{}) +} + +func (e *Epoller) getConsole(sysfd int) *EpollConsole { + e.mu.Lock() + f := e.fdMapping[sysfd] + e.mu.Unlock() + return f +} + +// Close the epoll fd +func (e *Epoller) Close() error { + return unix.Close(e.efd) +} + +// EpollConsole acts like a console but register its file descriptor with a +// epoll fd and uses epoll API to perform I/O. +type EpollConsole struct { + Console + readc *sync.Cond + writec *sync.Cond + sysfd int + closed bool +} + +// Read reads up to len(p) bytes into p. It returns the number of bytes read +// (0 <= n <= len(p)) and any error encountered. +// +// If the console's read returns EAGAIN or EIO, we assumes that its a +// temporary error because the other side went away and wait for the signal +// generated by epoll event to continue. +func (ec *EpollConsole) Read(p []byte) (n int, err error) { + var read int + ec.readc.L.Lock() + defer ec.readc.L.Unlock() + for { + read, err = ec.Console.Read(p[n:]) + n += read + if err != nil { + var hangup bool + if perr, ok := err.(*os.PathError); ok { + hangup = (perr.Err == unix.EAGAIN || perr.Err == unix.EIO) + } else { + hangup = (err == unix.EAGAIN || err == unix.EIO) + } + // if the other end disappear, assume this is temporary and wait for the + // signal to continue again. Unless we didnt read anything and the + // console is already marked as closed then we should exit + if hangup && !(n == 0 && len(p) > 0 && ec.closed) { + ec.readc.Wait() + continue + } + } + break + } + // if we didnt read anything then return io.EOF to end gracefully + if n == 0 && len(p) > 0 && err == nil { + err = io.EOF + } + // signal for others that we finished the read + ec.readc.Signal() + return n, err +} + +// Writes len(p) bytes from p to the console. It returns the number of bytes +// written from p (0 <= n <= len(p)) and any error encountered that caused +// the write to stop early. +// +// If writes to the console returns EAGAIN or EIO, we assumes that its a +// temporary error because the other side went away and wait for the signal +// generated by epoll event to continue. +func (ec *EpollConsole) Write(p []byte) (n int, err error) { + var written int + ec.writec.L.Lock() + defer ec.writec.L.Unlock() + for { + written, err = ec.Console.Write(p[n:]) + n += written + if err != nil { + var hangup bool + if perr, ok := err.(*os.PathError); ok { + hangup = (perr.Err == unix.EAGAIN || perr.Err == unix.EIO) + } else { + hangup = (err == unix.EAGAIN || err == unix.EIO) + } + // if the other end disappear, assume this is temporary and wait for the + // signal to continue again. + if hangup { + ec.writec.Wait() + continue + } + } + // unrecoverable error, break the loop and return the error + break + } + if n < len(p) && err == nil { + err = io.ErrShortWrite + } + // signal for others that we finished the write + ec.writec.Signal() + return n, err +} + +// Close closed the file descriptor and signal call waiters for this fd. +// It accepts a callback which will be called with the console's fd. The +// callback typically will be used to do further cleanup such as unregister the +// console's fd from the epoll interface. +// User should call Shutdown and wait for all I/O operation to be finished +// before closing the console. +func (ec *EpollConsole) Shutdown(close func(int) error) error { + ec.readc.L.Lock() + defer ec.readc.L.Unlock() + ec.writec.L.Lock() + defer ec.writec.L.Unlock() + + ec.readc.Broadcast() + ec.writec.Broadcast() + ec.closed = true + return close(ec.sysfd) +} + +// signalRead signals that the console is readable. +func (ec *EpollConsole) signalRead() { + ec.readc.Signal() +} + +// signalWrite signals that the console is writable. +func (ec *EpollConsole) signalWrite() { + ec.writec.Signal() +} diff --git a/vendor/github.com/containerd/console/console_unix.go b/vendor/github.com/containerd/console/console_unix.go new file mode 100644 index 00000000..118c8c3a --- /dev/null +++ b/vendor/github.com/containerd/console/console_unix.go @@ -0,0 +1,142 @@ +// +build darwin freebsd linux solaris + +package console + +import ( + "os" + + "golang.org/x/sys/unix" +) + +// NewPty creates a new pty pair +// The master is returned as the first console and a string +// with the path to the pty slave is returned as the second +func NewPty() (Console, string, error) { + f, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, "", err + } + slave, err := ptsname(f) + if err != nil { + return nil, "", err + } + if err := unlockpt(f); err != nil { + return nil, "", err + } + m, err := newMaster(f) + if err != nil { + return nil, "", err + } + return m, slave, nil +} + +type master struct { + f *os.File + original *unix.Termios +} + +func (m *master) Read(b []byte) (int, error) { + return m.f.Read(b) +} + +func (m *master) Write(b []byte) (int, error) { + return m.f.Write(b) +} + +func (m *master) Close() error { + return m.f.Close() +} + +func (m *master) Resize(ws WinSize) error { + return tcswinsz(m.f.Fd(), ws) +} + +func (m *master) ResizeFrom(c Console) error { + ws, err := c.Size() + if err != nil { + return err + } + return m.Resize(ws) +} + +func (m *master) Reset() error { + if m.original == nil { + return nil + } + return tcset(m.f.Fd(), m.original) +} + +func (m *master) getCurrent() (unix.Termios, error) { + var termios unix.Termios + if err := tcget(m.f.Fd(), &termios); err != nil { + return unix.Termios{}, err + } + return termios, nil +} + +func (m *master) SetRaw() error { + rawState, err := m.getCurrent() + if err != nil { + return err + } + rawState = cfmakeraw(rawState) + rawState.Oflag = rawState.Oflag | unix.OPOST + return tcset(m.f.Fd(), &rawState) +} + +func (m *master) DisableEcho() error { + rawState, err := m.getCurrent() + if err != nil { + return err + } + rawState.Lflag = rawState.Lflag &^ unix.ECHO + return tcset(m.f.Fd(), &rawState) +} + +func (m *master) Size() (WinSize, error) { + return tcgwinsz(m.f.Fd()) +} + +func (m *master) Fd() uintptr { + return m.f.Fd() +} + +func (m *master) Name() string { + return m.f.Name() +} + +// checkConsole checks if the provided file is a console +func checkConsole(f *os.File) error { + var termios unix.Termios + if tcget(f.Fd(), &termios) != nil { + return ErrNotAConsole + } + return nil +} + +func newMaster(f *os.File) (Console, error) { + m := &master{ + f: f, + } + t, err := m.getCurrent() + if err != nil { + return nil, err + } + m.original = &t + return m, nil +} + +// ClearONLCR sets the necessary tty_ioctl(4)s to ensure that a pty pair +// created by us acts normally. In particular, a not-very-well-known default of +// Linux unix98 ptys is that they have +onlcr by default. While this isn't a +// problem for terminal emulators, because we relay data from the terminal we +// also relay that funky line discipline. +func ClearONLCR(fd uintptr) error { + return setONLCR(fd, false) +} + +// SetONLCR sets the necessary tty_ioctl(4)s to ensure that a pty pair +// created by us acts as intended for a terminal emulator. +func SetONLCR(fd uintptr) error { + return setONLCR(fd, true) +} diff --git a/vendor/github.com/containerd/console/console_windows.go b/vendor/github.com/containerd/console/console_windows.go new file mode 100644 index 00000000..d78a0b84 --- /dev/null +++ b/vendor/github.com/containerd/console/console_windows.go @@ -0,0 +1,200 @@ +package console + +import ( + "fmt" + "os" + + "github.com/pkg/errors" + "golang.org/x/sys/windows" +) + +var ( + vtInputSupported bool + ErrNotImplemented = errors.New("not implemented") +) + +func (m *master) initStdios() { + m.in = windows.Handle(os.Stdin.Fd()) + if err := windows.GetConsoleMode(m.in, &m.inMode); err == nil { + // Validate that windows.ENABLE_VIRTUAL_TERMINAL_INPUT is supported, but do not set it. + if err = windows.SetConsoleMode(m.in, m.inMode|windows.ENABLE_VIRTUAL_TERMINAL_INPUT); err == nil { + vtInputSupported = true + } + // Unconditionally set the console mode back even on failure because SetConsoleMode + // remembers invalid bits on input handles. + windows.SetConsoleMode(m.in, m.inMode) + } else { + fmt.Printf("failed to get console mode for stdin: %v\n", err) + } + + m.out = windows.Handle(os.Stdout.Fd()) + if err := windows.GetConsoleMode(m.out, &m.outMode); err == nil { + if err := windows.SetConsoleMode(m.out, m.outMode|windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING); err == nil { + m.outMode |= windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING + } else { + windows.SetConsoleMode(m.out, m.outMode) + } + } else { + fmt.Printf("failed to get console mode for stdout: %v\n", err) + } + + m.err = windows.Handle(os.Stderr.Fd()) + if err := windows.GetConsoleMode(m.err, &m.errMode); err == nil { + if err := windows.SetConsoleMode(m.err, m.errMode|windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING); err == nil { + m.errMode |= windows.ENABLE_VIRTUAL_TERMINAL_PROCESSING + } else { + windows.SetConsoleMode(m.err, m.errMode) + } + } else { + fmt.Printf("failed to get console mode for stderr: %v\n", err) + } +} + +type master struct { + in windows.Handle + inMode uint32 + + out windows.Handle + outMode uint32 + + err windows.Handle + errMode uint32 +} + +func (m *master) SetRaw() error { + if err := makeInputRaw(m.in, m.inMode); err != nil { + return err + } + + // Set StdOut and StdErr to raw mode, we ignore failures since + // windows.DISABLE_NEWLINE_AUTO_RETURN might not be supported on this version of + // Windows. + + windows.SetConsoleMode(m.out, m.outMode|windows.DISABLE_NEWLINE_AUTO_RETURN) + + windows.SetConsoleMode(m.err, m.errMode|windows.DISABLE_NEWLINE_AUTO_RETURN) + + return nil +} + +func (m *master) Reset() error { + for _, s := range []struct { + fd windows.Handle + mode uint32 + }{ + {m.in, m.inMode}, + {m.out, m.outMode}, + {m.err, m.errMode}, + } { + if err := windows.SetConsoleMode(s.fd, s.mode); err != nil { + return errors.Wrap(err, "unable to restore console mode") + } + } + + return nil +} + +func (m *master) Size() (WinSize, error) { + var info windows.ConsoleScreenBufferInfo + err := windows.GetConsoleScreenBufferInfo(m.out, &info) + if err != nil { + return WinSize{}, errors.Wrap(err, "unable to get console info") + } + + winsize := WinSize{ + Width: uint16(info.Window.Right - info.Window.Left + 1), + Height: uint16(info.Window.Bottom - info.Window.Top + 1), + } + + return winsize, nil +} + +func (m *master) Resize(ws WinSize) error { + return ErrNotImplemented +} + +func (m *master) ResizeFrom(c Console) error { + return ErrNotImplemented +} + +func (m *master) DisableEcho() error { + mode := m.inMode &^ windows.ENABLE_ECHO_INPUT + mode |= windows.ENABLE_PROCESSED_INPUT + mode |= windows.ENABLE_LINE_INPUT + + if err := windows.SetConsoleMode(m.in, mode); err != nil { + return errors.Wrap(err, "unable to set console to disable echo") + } + + return nil +} + +func (m *master) Close() error { + return nil +} + +func (m *master) Read(b []byte) (int, error) { + panic("not implemented on windows") +} + +func (m *master) Write(b []byte) (int, error) { + panic("not implemented on windows") +} + +func (m *master) Fd() uintptr { + return uintptr(m.in) +} + +// on windows, console can only be made from os.Std{in,out,err}, hence there +// isnt a single name here we can use. Return a dummy "console" value in this +// case should be sufficient. +func (m *master) Name() string { + return "console" +} + +// makeInputRaw puts the terminal (Windows Console) connected to the given +// file descriptor into raw mode +func makeInputRaw(fd windows.Handle, mode uint32) error { + // See + // -- https://msdn.microsoft.com/en-us/library/windows/desktop/ms686033(v=vs.85).aspx + // -- https://msdn.microsoft.com/en-us/library/windows/desktop/ms683462(v=vs.85).aspx + + // Disable these modes + mode &^= windows.ENABLE_ECHO_INPUT + mode &^= windows.ENABLE_LINE_INPUT + mode &^= windows.ENABLE_MOUSE_INPUT + mode &^= windows.ENABLE_WINDOW_INPUT + mode &^= windows.ENABLE_PROCESSED_INPUT + + // Enable these modes + mode |= windows.ENABLE_EXTENDED_FLAGS + mode |= windows.ENABLE_INSERT_MODE + mode |= windows.ENABLE_QUICK_EDIT_MODE + + if vtInputSupported { + mode |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT + } + + if err := windows.SetConsoleMode(fd, mode); err != nil { + return errors.Wrap(err, "unable to set console to raw mode") + } + + return nil +} + +func checkConsole(f *os.File) error { + var mode uint32 + if err := windows.GetConsoleMode(windows.Handle(f.Fd()), &mode); err != nil { + return err + } + return nil +} + +func newMaster(f *os.File) (Console, error) { + if f != os.Stdin && f != os.Stdout && f != os.Stderr { + return nil, errors.New("creating a console from a file is not supported on windows") + } + m := &master{} + m.initStdios() + return m, nil +} diff --git a/vendor/github.com/containerd/console/tc_darwin.go b/vendor/github.com/containerd/console/tc_darwin.go new file mode 100644 index 00000000..b102bad7 --- /dev/null +++ b/vendor/github.com/containerd/console/tc_darwin.go @@ -0,0 +1,37 @@ +package console + +import ( + "fmt" + "os" + "unsafe" + + "golang.org/x/sys/unix" +) + +const ( + cmdTcGet = unix.TIOCGETA + cmdTcSet = unix.TIOCSETA +) + +func ioctl(fd, flag, data uintptr) error { + if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 { + return err + } + return nil +} + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +func unlockpt(f *os.File) error { + var u int32 + return ioctl(f.Fd(), unix.TIOCPTYUNLK, uintptr(unsafe.Pointer(&u))) +} + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCPTYGNAME) + if err != nil { + return "", err + } + return fmt.Sprintf("/dev/pts/%d", n), nil +} diff --git a/vendor/github.com/containerd/console/tc_freebsd.go b/vendor/github.com/containerd/console/tc_freebsd.go new file mode 100644 index 00000000..e2a10e44 --- /dev/null +++ b/vendor/github.com/containerd/console/tc_freebsd.go @@ -0,0 +1,29 @@ +package console + +import ( + "fmt" + "os" + + "golang.org/x/sys/unix" +) + +const ( + cmdTcGet = unix.TIOCGETA + cmdTcSet = unix.TIOCSETA +) + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +// This does not exist on FreeBSD, it does not allocate controlling terminals on open +func unlockpt(f *os.File) error { + return nil +} + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) + if err != nil { + return "", err + } + return fmt.Sprintf("/dev/pts/%d", n), nil +} diff --git a/vendor/github.com/containerd/console/tc_linux.go b/vendor/github.com/containerd/console/tc_linux.go new file mode 100644 index 00000000..80ef2f6f --- /dev/null +++ b/vendor/github.com/containerd/console/tc_linux.go @@ -0,0 +1,37 @@ +package console + +import ( + "fmt" + "os" + "unsafe" + + "golang.org/x/sys/unix" +) + +const ( + cmdTcGet = unix.TCGETS + cmdTcSet = unix.TCSETS +) + +func ioctl(fd, flag, data uintptr) error { + if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 { + return err + } + return nil +} + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +func unlockpt(f *os.File) error { + var u int32 + return ioctl(f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) +} + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) + if err != nil { + return "", err + } + return fmt.Sprintf("/dev/pts/%d", n), nil +} diff --git a/vendor/github.com/containerd/console/tc_solaris_cgo.go b/vendor/github.com/containerd/console/tc_solaris_cgo.go new file mode 100644 index 00000000..f8066d8e --- /dev/null +++ b/vendor/github.com/containerd/console/tc_solaris_cgo.go @@ -0,0 +1,35 @@ +// +build solaris,cgo + +package console + +import ( + "os" + + "golang.org/x/sys/unix" +) + +//#include +import "C" + +const ( + cmdTcGet = unix.TCGETS + cmdTcSet = unix.TCSETS +) + +// ptsname retrieves the name of the first available pts for the given master. +func ptsname(f *os.File) (string, error) { + ptspath, err := C.ptsname(C.int(f.Fd())) + if err != nil { + return "", err + } + return C.GoString(ptspath), nil +} + +// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. +// unlockpt should be called before opening the slave side of a pty. +func unlockpt(f *os.File) error { + if _, err := C.grantpt(C.int(f.Fd())); err != nil { + return err + } + return nil +} diff --git a/vendor/github.com/containerd/console/tc_solaris_nocgo.go b/vendor/github.com/containerd/console/tc_solaris_nocgo.go new file mode 100644 index 00000000..0aefa0d2 --- /dev/null +++ b/vendor/github.com/containerd/console/tc_solaris_nocgo.go @@ -0,0 +1,31 @@ +// +build solaris,!cgo + +// +// Implementing the functions below requires cgo support. Non-cgo stubs +// versions are defined below to enable cross-compilation of source code +// that depends on these functions, but the resultant cross-compiled +// binaries cannot actually be used. If the stub function(s) below are +// actually invoked they will display an error message and cause the +// calling process to exit. +// + +package console + +import ( + "os" + + "golang.org/x/sys/unix" +) + +const ( + cmdTcGet = unix.TCGETS + cmdTcSet = unix.TCSETS +) + +func ptsname(f *os.File) (string, error) { + panic("ptsname() support requires cgo.") +} + +func unlockpt(f *os.File) error { + panic("unlockpt() support requires cgo.") +} diff --git a/vendor/github.com/containerd/console/tc_unix.go b/vendor/github.com/containerd/console/tc_unix.go new file mode 100644 index 00000000..df7dcb93 --- /dev/null +++ b/vendor/github.com/containerd/console/tc_unix.go @@ -0,0 +1,75 @@ +// +build darwin freebsd linux solaris + +package console + +import ( + "golang.org/x/sys/unix" +) + +func tcget(fd uintptr, p *unix.Termios) error { + termios, err := unix.IoctlGetTermios(int(fd), cmdTcGet) + if err != nil { + return err + } + *p = *termios + return nil +} + +func tcset(fd uintptr, p *unix.Termios) error { + return unix.IoctlSetTermios(int(fd), cmdTcSet, p) +} + +func tcgwinsz(fd uintptr) (WinSize, error) { + var ws WinSize + + uws, err := unix.IoctlGetWinsize(int(fd), unix.TIOCGWINSZ) + if err != nil { + return ws, err + } + + // Translate from unix.Winsize to console.WinSize + ws.Height = uws.Row + ws.Width = uws.Col + ws.x = uws.Xpixel + ws.y = uws.Ypixel + return ws, nil +} + +func tcswinsz(fd uintptr, ws WinSize) error { + // Translate from console.WinSize to unix.Winsize + + var uws unix.Winsize + uws.Row = ws.Height + uws.Col = ws.Width + uws.Xpixel = ws.x + uws.Ypixel = ws.y + + return unix.IoctlSetWinsize(int(fd), unix.TIOCSWINSZ, &uws) +} + +func setONLCR(fd uintptr, enable bool) error { + var termios unix.Termios + if err := tcget(fd, &termios); err != nil { + return err + } + if enable { + // Set +onlcr so we can act like a real terminal + termios.Oflag |= unix.ONLCR + } else { + // Set -onlcr so we don't have to deal with \r. + termios.Oflag &^= unix.ONLCR + } + return tcset(fd, &termios) +} + +func cfmakeraw(t unix.Termios) unix.Termios { + t.Iflag &^= (unix.IGNBRK | unix.BRKINT | unix.PARMRK | unix.ISTRIP | unix.INLCR | unix.IGNCR | unix.ICRNL | unix.IXON) + t.Oflag &^= unix.OPOST + t.Lflag &^= (unix.ECHO | unix.ECHONL | unix.ICANON | unix.ISIG | unix.IEXTEN) + t.Cflag &^= (unix.CSIZE | unix.PARENB) + t.Cflag &^= unix.CS8 + t.Cc[unix.VMIN] = 1 + t.Cc[unix.VTIME] = 0 + + return t +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/.travis.yml b/vendor/github.com/cyphar/filepath-securejoin/.travis.yml new file mode 100644 index 00000000..3938f383 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/.travis.yml @@ -0,0 +1,19 @@ +# Copyright (C) 2017 SUSE LLC. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +language: go +go: + - 1.7.x + - 1.8.x + - tip + +os: + - linux + - osx + +script: + - go test -cover -v ./... + +notifications: + email: false diff --git a/vendor/github.com/cyphar/filepath-securejoin/LICENSE b/vendor/github.com/cyphar/filepath-securejoin/LICENSE new file mode 100644 index 00000000..bec842f2 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/LICENSE @@ -0,0 +1,28 @@ +Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +Copyright (C) 2017 SUSE LLC. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/cyphar/filepath-securejoin/README.md b/vendor/github.com/cyphar/filepath-securejoin/README.md new file mode 100644 index 00000000..49b2baa9 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/README.md @@ -0,0 +1,65 @@ +## `filepath-securejoin` ## + +[![Build Status](https://travis-ci.org/cyphar/filepath-securejoin.svg?branch=master)](https://travis-ci.org/cyphar/filepath-securejoin) + +An implementation of `SecureJoin`, a [candidate for inclusion in the Go +standard library][go#20126]. The purpose of this function is to be a "secure" +alternative to `filepath.Join`, and in particular it provides certain +guarantees that are not provided by `filepath.Join`. + +This is the function prototype: + +```go +func SecureJoin(root, unsafePath string) (string, error) +``` + +This library **guarantees** the following: + +* If no error is set, the resulting string **must** be a child path of + `SecureJoin` and will not contain any symlink path components (they will all + be expanded). + +* When expanding symlinks, all symlink path components **must** be resolved + relative to the provided root. In particular, this can be considered a + userspace implementation of how `chroot(2)` operates on file paths. Note that + these symlinks will **not** be expanded lexically (`filepath.Clean` is not + called on the input before processing). + +* Non-existant path components are unaffected by `SecureJoin` (similar to + `filepath.EvalSymlinks`'s semantics). + +* The returned path will always be `filepath.Clean`ed and thus not contain any + `..` components. + +A (trivial) implementation of this function on GNU/Linux systems could be done +with the following (note that this requires root privileges and is far more +opaque than the implementation in this library, and also requires that +`readlink` is inside the `root` path): + +```go +package securejoin + +import ( + "os/exec" + "path/filepath" +) + +func SecureJoin(root, unsafePath string) (string, error) { + unsafePath = string(filepath.Separator) + unsafePath + cmd := exec.Command("chroot", root, + "readlink", "--canonicalize-missing", "--no-newline", unsafePath) + output, err := cmd.CombinedOutput() + if err != nil { + return "", err + } + expanded := string(output) + return filepath.Join(root, expanded), nil +} +``` + +[go#20126]: https://github.com/golang/go/issues/20126 + +### License ### + +The license of this project is the same as Go, which is a BSD 3-clause license +available in the `LICENSE` file. diff --git a/vendor/github.com/cyphar/filepath-securejoin/VERSION b/vendor/github.com/cyphar/filepath-securejoin/VERSION new file mode 100644 index 00000000..1f5f8304 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/VERSION @@ -0,0 +1 @@ +0.2.1+dev diff --git a/vendor/github.com/cyphar/filepath-securejoin/join.go b/vendor/github.com/cyphar/filepath-securejoin/join.go new file mode 100644 index 00000000..f2098547 --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/join.go @@ -0,0 +1,135 @@ +// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. +// Copyright (C) 2017 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package securejoin is an implementation of the hopefully-soon-to-be-included +// SecureJoin helper that is meant to be part of the "path/filepath" package. +// The purpose of this project is to provide a PoC implementation to make the +// SecureJoin proposal (https://github.com/golang/go/issues/20126) more +// tangible. +package securejoin + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "strings" + "syscall" + + "github.com/pkg/errors" +) + +// ErrSymlinkLoop is returned by SecureJoinVFS when too many symlinks have been +// evaluated in attempting to securely join the two given paths. +var ErrSymlinkLoop = fmt.Errorf("SecureJoin: too many links") + +// IsNotExist tells you if err is an error that implies that either the path +// accessed does not exist (or path components don't exist). This is +// effectively a more broad version of os.IsNotExist. +func IsNotExist(err error) bool { + // If it's a bone-fide ENOENT just bail. + if os.IsNotExist(errors.Cause(err)) { + return true + } + + // Check that it's not actually an ENOTDIR, which in some cases is a more + // convoluted case of ENOENT (usually involving weird paths). + var errno error + switch err := errors.Cause(err).(type) { + case *os.PathError: + errno = err.Err + case *os.LinkError: + errno = err.Err + case *os.SyscallError: + errno = err.Err + } + return errno == syscall.ENOTDIR || errno == syscall.ENOENT +} + +// SecureJoinVFS joins the two given path components (similar to Join) except +// that the returned path is guaranteed to be scoped inside the provided root +// path (when evaluated). Any symbolic links in the path are evaluated with the +// given root treated as the root of the filesystem, similar to a chroot. The +// filesystem state is evaluated through the given VFS interface (if nil, the +// standard os.* family of functions are used). +// +// Note that the guarantees provided by this function only apply if the path +// components in the returned string are not modified (in other words are not +// replaced with symlinks on the filesystem) after this function has returned. +// Such a symlink race is necessarily out-of-scope of SecureJoin. +func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { + // Use the os.* VFS implementation if none was specified. + if vfs == nil { + vfs = osVFS{} + } + + var path bytes.Buffer + n := 0 + for unsafePath != "" { + if n > 255 { + return "", ErrSymlinkLoop + } + + // Next path component, p. + i := strings.IndexRune(unsafePath, filepath.Separator) + var p string + if i == -1 { + p, unsafePath = unsafePath, "" + } else { + p, unsafePath = unsafePath[:i], unsafePath[i+1:] + } + + // Create a cleaned path, using the lexical semantics of /../a, to + // create a "scoped" path component which can safely be joined to fullP + // for evaluation. At this point, path.String() doesn't contain any + // symlink components. + cleanP := filepath.Clean(string(filepath.Separator) + path.String() + p) + if cleanP == string(filepath.Separator) { + path.Reset() + continue + } + fullP := filepath.Clean(root + cleanP) + + // Figure out whether the path is a symlink. + fi, err := vfs.Lstat(fullP) + if err != nil && !IsNotExist(err) { + return "", err + } + // Treat non-existent path components the same as non-symlinks (we + // can't do any better here). + if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 { + path.WriteString(p) + path.WriteRune(filepath.Separator) + continue + } + + // Only increment when we actually dereference a link. + n++ + + // It's a symlink, expand it by prepending it to the yet-unparsed path. + dest, err := vfs.Readlink(fullP) + if err != nil { + return "", err + } + // Absolute symlinks reset any work we've already done. + if filepath.IsAbs(dest) { + path.Reset() + } + unsafePath = dest + string(filepath.Separator) + unsafePath + } + + // We have to clean path.String() here because it may contain '..' + // components that are entirely lexical, but would be misleading otherwise. + // And finally do a final clean to ensure that root is also lexically + // clean. + fullP := filepath.Clean(string(filepath.Separator) + path.String()) + return filepath.Clean(root + fullP), nil +} + +// SecureJoin is a wrapper around SecureJoinVFS that just uses the os.* library +// of functions as the VFS. If in doubt, use this function over SecureJoinVFS. +func SecureJoin(root, unsafePath string) (string, error) { + return SecureJoinVFS(root, unsafePath, nil) +} diff --git a/vendor/github.com/cyphar/filepath-securejoin/vendor.conf b/vendor/github.com/cyphar/filepath-securejoin/vendor.conf new file mode 100644 index 00000000..66bb574b --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/vendor.conf @@ -0,0 +1 @@ +github.com/pkg/errors v0.8.0 diff --git a/vendor/github.com/cyphar/filepath-securejoin/vfs.go b/vendor/github.com/cyphar/filepath-securejoin/vfs.go new file mode 100644 index 00000000..a82a5eae --- /dev/null +++ b/vendor/github.com/cyphar/filepath-securejoin/vfs.go @@ -0,0 +1,41 @@ +// Copyright (C) 2017 SUSE LLC. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package securejoin + +import "os" + +// In future this should be moved into a separate package, because now there +// are several projects (umoci and go-mtree) that are using this sort of +// interface. + +// VFS is the minimal interface necessary to use SecureJoinVFS. A nil VFS is +// equivalent to using the standard os.* family of functions. This is mainly +// used for the purposes of mock testing, but also can be used to otherwise use +// SecureJoin with VFS-like system. +type VFS interface { + // Lstat returns a FileInfo describing the named file. If the file is a + // symbolic link, the returned FileInfo describes the symbolic link. Lstat + // makes no attempt to follow the link. These semantics are identical to + // os.Lstat. + Lstat(name string) (os.FileInfo, error) + + // Readlink returns the destination of the named symbolic link. These + // semantics are identical to os.Readlink. + Readlink(name string) (string, error) +} + +// osVFS is the "nil" VFS, in that it just passes everything through to the os +// module. +type osVFS struct{} + +// Lstat returns a FileInfo describing the named file. If the file is a +// symbolic link, the returned FileInfo describes the symbolic link. Lstat +// makes no attempt to follow the link. These semantics are identical to +// os.Lstat. +func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) } + +// Readlink returns the destination of the named symbolic link. These +// semantics are identical to os.Readlink. +func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) } diff --git a/vendor/github.com/influxdata/influxdb/LICENSE b/vendor/github.com/influxdata/influxdb/LICENSE new file mode 100644 index 00000000..63cef79b --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2013-2016 Errplane Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/influxdata/influxdb/LICENSE_OF_DEPENDENCIES.md b/vendor/github.com/influxdata/influxdb/LICENSE_OF_DEPENDENCIES.md new file mode 100644 index 00000000..14bbfa95 --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/LICENSE_OF_DEPENDENCIES.md @@ -0,0 +1,23 @@ +# List +- bootstrap 3.3.5 [MIT LICENSE](https://github.com/twbs/bootstrap/blob/master/LICENSE) +- collectd.org [ISC LICENSE](https://github.com/collectd/go-collectd/blob/master/LICENSE) +- github.com/BurntSushi/toml [WTFPL LICENSE](https://github.com/BurntSushi/toml/blob/master/COPYING) +- github.com/bmizerany/pat [MIT LICENSE](https://github.com/bmizerany/pat#license) +- github.com/boltdb/bolt [MIT LICENSE](https://github.com/boltdb/bolt/blob/master/LICENSE) +- github.com/davecgh/go-spew/spew [ISC LICENSE](https://github.com/davecgh/go-spew/blob/master/LICENSE) +- github.com/dgrijalva/jwt-go [MIT LICENSE](https://github.com/dgrijalva/jwt-go/blob/master/LICENSE) +- github.com/dgryski/go-bits [MIT LICENSE](https://github.com/dgryski/go-bits/blob/master/LICENSE) +- github.com/dgryski/go-bitstream [MIT LICENSE](https://github.com/dgryski/go-bitstream/blob/master/LICENSE) +- github.com/gogo/protobuf/proto [BSD LICENSE](https://github.com/gogo/protobuf/blob/master/LICENSE) +- github.com/golang/snappy [BSD LICENSE](https://github.com/golang/snappy/blob/master/LICENSE) +- github.com/influxdata/usage-client [MIT LICENSE](https://github.com/influxdata/usage-client/blob/master/LICENSE.txt) +- github.com/jwilder/encoding [MIT LICENSE](https://github.com/jwilder/encoding/blob/master/LICENSE) +- github.com/kimor79/gollectd [BSD LICENSE](https://github.com/kimor79/gollectd/blob/master/LICENSE) +- github.com/paulbellamy/ratecounter [MIT LICENSE](https://github.com/paulbellamy/ratecounter/blob/master/LICENSE) +- github.com/peterh/liner [MIT LICENSE](https://github.com/peterh/liner/blob/master/COPYING) +- github.com/rakyll/statik [APACHE LICENSE](https://github.com/rakyll/statik/blob/master/LICENSE) +- github.com/retailnext/hllpp [BSD LICENSE](https://github.com/retailnext/hllpp/blob/master/LICENSE) +- glyphicons [LICENSE](http://glyphicons.com/license/) +- golang.org/x/crypto [BSD LICENSE](https://github.com/golang/crypto/blob/master/LICENSE) +- jquery 2.1.4 [MIT LICENSE](https://github.com/jquery/jquery/blob/master/LICENSE.txt) +- react 0.13.3 [BSD LICENSE](https://github.com/facebook/react/blob/master/LICENSE) diff --git a/vendor/github.com/influxdata/influxdb/models/consistency.go b/vendor/github.com/influxdata/influxdb/models/consistency.go new file mode 100644 index 00000000..97cdc51a --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/models/consistency.go @@ -0,0 +1,46 @@ +package models + +import ( + "errors" + "strings" +) + +// ConsistencyLevel represent a required replication criteria before a write can +// be returned as successful +type ConsistencyLevel int + +const ( + // ConsistencyLevelAny allows for hinted hand off, potentially no write happened yet + ConsistencyLevelAny ConsistencyLevel = iota + + // ConsistencyLevelOne requires at least one data node acknowledged a write + ConsistencyLevelOne + + // ConsistencyLevelQuorum requires a quorum of data nodes to acknowledge a write + ConsistencyLevelQuorum + + // ConsistencyLevelAll requires all data nodes to acknowledge a write + ConsistencyLevelAll +) + +var ( + // ErrInvalidConsistencyLevel is returned when parsing the string version + // of a consistency level. + ErrInvalidConsistencyLevel = errors.New("invalid consistency level") +) + +// ParseConsistencyLevel converts a consistency level string to the corresponding ConsistencyLevel const +func ParseConsistencyLevel(level string) (ConsistencyLevel, error) { + switch strings.ToLower(level) { + case "any": + return ConsistencyLevelAny, nil + case "one": + return ConsistencyLevelOne, nil + case "quorum": + return ConsistencyLevelQuorum, nil + case "all": + return ConsistencyLevelAll, nil + default: + return 0, ErrInvalidConsistencyLevel + } +} diff --git a/vendor/github.com/influxdata/influxdb/models/inline_fnv.go b/vendor/github.com/influxdata/influxdb/models/inline_fnv.go new file mode 100644 index 00000000..9391946d --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/models/inline_fnv.go @@ -0,0 +1,27 @@ +package models + +// from stdlib hash/fnv/fnv.go +const ( + prime64 = 1099511628211 + offset64 = 14695981039346656037 +) + +// InlineFNV64a is an alloc-free port of the standard library's fnv64a. +type InlineFNV64a uint64 + +func NewInlineFNV64a() InlineFNV64a { + return offset64 +} + +func (s *InlineFNV64a) Write(data []byte) (int, error) { + hash := uint64(*s) + for _, c := range data { + hash ^= uint64(c) + hash *= prime64 + } + *s = InlineFNV64a(hash) + return len(data), nil +} +func (s *InlineFNV64a) Sum64() uint64 { + return uint64(*s) +} diff --git a/vendor/github.com/influxdata/influxdb/models/inline_strconv_parse.go b/vendor/github.com/influxdata/influxdb/models/inline_strconv_parse.go new file mode 100644 index 00000000..727ce358 --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/models/inline_strconv_parse.go @@ -0,0 +1,38 @@ +package models + +import ( + "reflect" + "strconv" + "unsafe" +) + +// parseIntBytes is a zero-alloc wrapper around strconv.ParseInt. +func parseIntBytes(b []byte, base int, bitSize int) (i int64, err error) { + s := unsafeBytesToString(b) + return strconv.ParseInt(s, base, bitSize) +} + +// parseFloatBytes is a zero-alloc wrapper around strconv.ParseFloat. +func parseFloatBytes(b []byte, bitSize int) (float64, error) { + s := unsafeBytesToString(b) + return strconv.ParseFloat(s, bitSize) +} + +// parseBoolBytes is a zero-alloc wrapper around strconv.ParseBool. +func parseBoolBytes(b []byte) (bool, error) { + return strconv.ParseBool(unsafeBytesToString(b)) +} + +// unsafeBytesToString converts a []byte to a string without a heap allocation. +// +// It is unsafe, and is intended to prepare input to short-lived functions +// that require strings. +func unsafeBytesToString(in []byte) string { + src := *(*reflect.SliceHeader)(unsafe.Pointer(&in)) + dst := reflect.StringHeader{ + Data: src.Data, + Len: src.Len, + } + s := *(*string)(unsafe.Pointer(&dst)) + return s +} diff --git a/vendor/github.com/influxdata/influxdb/models/points.go b/vendor/github.com/influxdata/influxdb/models/points.go new file mode 100644 index 00000000..a4104b8a --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/models/points.go @@ -0,0 +1,1916 @@ +package models + +import ( + "bytes" + "encoding/binary" + "errors" + "fmt" + "math" + "sort" + "strconv" + "strings" + "time" + + "github.com/influxdata/influxdb/pkg/escape" +) + +var ( + measurementEscapeCodes = map[byte][]byte{ + ',': []byte(`\,`), + ' ': []byte(`\ `), + } + + tagEscapeCodes = map[byte][]byte{ + ',': []byte(`\,`), + ' ': []byte(`\ `), + '=': []byte(`\=`), + } + + ErrPointMustHaveAField = errors.New("point without fields is unsupported") + ErrInvalidNumber = errors.New("invalid number") + ErrInvalidPoint = errors.New("point is invalid") + ErrMaxKeyLengthExceeded = errors.New("max key length exceeded") +) + +const ( + MaxKeyLength = 65535 +) + +// Point defines the values that will be written to the database +type Point interface { + Name() string + SetName(string) + + Tags() Tags + AddTag(key, value string) + SetTags(tags Tags) + + Fields() Fields + + Time() time.Time + SetTime(t time.Time) + UnixNano() int64 + + HashID() uint64 + Key() []byte + + Data() []byte + SetData(buf []byte) + + // String returns a string representation of the point, if there is a + // timestamp associated with the point then it will be specified with the default + // precision of nanoseconds + String() string + + // Bytes returns a []byte representation of the point similar to string. + MarshalBinary() ([]byte, error) + + // PrecisionString returns a string representation of the point, if there + // is a timestamp associated with the point then it will be specified in the + // given unit + PrecisionString(precision string) string + + // RoundedString returns a string representation of the point, if there + // is a timestamp associated with the point, then it will be rounded to the + // given duration + RoundedString(d time.Duration) string + + // Split will attempt to return multiple points with the same timestamp whose + // string representations are no longer than size. Points with a single field or + // a point without a timestamp may exceed the requested size. + Split(size int) []Point + + // Round will round the timestamp of the point to the given duration + Round(d time.Duration) + + // StringSize returns the length of the string that would be returned by String() + StringSize() int + + // AppendString appends the result of String() to the provided buffer and returns + // the result, potentially reducing string allocations + AppendString(buf []byte) []byte + + // FieldIterator retuns a FieldIterator that can be used to traverse the + // fields of a point without constructing the in-memory map + FieldIterator() FieldIterator +} + +type FieldType int + +const ( + Integer FieldType = iota + Float + Boolean + String + Empty +) + +type FieldIterator interface { + Next() bool + FieldKey() []byte + Type() FieldType + StringValue() string + IntegerValue() int64 + BooleanValue() bool + FloatValue() float64 + + Delete() + Reset() +} + +// Points represents a sortable list of points by timestamp. +type Points []Point + +func (a Points) Len() int { return len(a) } +func (a Points) Less(i, j int) bool { return a[i].Time().Before(a[j].Time()) } +func (a Points) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +// point is the default implementation of Point. +type point struct { + time time.Time + + // text encoding of measurement and tags + // key must always be stored sorted by tags, if the original line was not sorted, + // we need to resort it + key []byte + + // text encoding of field data + fields []byte + + // text encoding of timestamp + ts []byte + + // binary encoded field data + data []byte + + // cached version of parsed fields from data + cachedFields map[string]interface{} + + // cached version of parsed name from key + cachedName string + + // cached version of parsed tags + cachedTags Tags + + it fieldIterator +} + +const ( + // the number of characters for the largest possible int64 (9223372036854775807) + maxInt64Digits = 19 + + // the number of characters for the smallest possible int64 (-9223372036854775808) + minInt64Digits = 20 + + // the number of characters required for the largest float64 before a range check + // would occur during parsing + maxFloat64Digits = 25 + + // the number of characters required for smallest float64 before a range check occur + // would occur during parsing + minFloat64Digits = 27 +) + +// ParsePoints returns a slice of Points from a text representation of a point +// with each point separated by newlines. If any points fail to parse, a non-nil error +// will be returned in addition to the points that parsed successfully. +func ParsePoints(buf []byte) ([]Point, error) { + return ParsePointsWithPrecision(buf, time.Now().UTC(), "n") +} + +// ParsePointsString is identical to ParsePoints but accepts a string +// buffer. +func ParsePointsString(buf string) ([]Point, error) { + return ParsePoints([]byte(buf)) +} + +// ParseKey returns the measurement name and tags from a point. +func ParseKey(buf []byte) (string, Tags, error) { + // Ignore the error because scanMeasurement returns "missing fields" which we ignore + // when just parsing a key + state, i, _ := scanMeasurement(buf, 0) + + var tags Tags + if state == tagKeyState { + tags = parseTags(buf) + // scanMeasurement returns the location of the comma if there are tags, strip that off + return string(buf[:i-1]), tags, nil + } + return string(buf[:i]), tags, nil +} + +// ParsePointsWithPrecision is similar to ParsePoints, but allows the +// caller to provide a precision for time. +func ParsePointsWithPrecision(buf []byte, defaultTime time.Time, precision string) ([]Point, error) { + points := make([]Point, 0, bytes.Count(buf, []byte{'\n'})+1) + var ( + pos int + block []byte + failed []string + ) + for pos < len(buf) { + pos, block = scanLine(buf, pos) + pos++ + + if len(block) == 0 { + continue + } + + // lines which start with '#' are comments + start := skipWhitespace(block, 0) + + // If line is all whitespace, just skip it + if start >= len(block) { + continue + } + + if block[start] == '#' { + continue + } + + // strip the newline if one is present + if block[len(block)-1] == '\n' { + block = block[:len(block)-1] + } + + pt, err := parsePoint(block[start:], defaultTime, precision) + if err != nil { + failed = append(failed, fmt.Sprintf("unable to parse '%s': %v", string(block[start:len(block)]), err)) + } else { + points = append(points, pt) + } + + } + if len(failed) > 0 { + return points, fmt.Errorf("%s", strings.Join(failed, "\n")) + } + return points, nil + +} + +func parsePoint(buf []byte, defaultTime time.Time, precision string) (Point, error) { + // scan the first block which is measurement[,tag1=value1,tag2=value=2...] + pos, key, err := scanKey(buf, 0) + if err != nil { + return nil, err + } + + // measurement name is required + if len(key) == 0 { + return nil, fmt.Errorf("missing measurement") + } + + if len(key) > MaxKeyLength { + return nil, fmt.Errorf("max key length exceeded: %v > %v", len(key), MaxKeyLength) + } + + // scan the second block is which is field1=value1[,field2=value2,...] + pos, fields, err := scanFields(buf, pos) + if err != nil { + return nil, err + } + + // at least one field is required + if len(fields) == 0 { + return nil, fmt.Errorf("missing fields") + } + + // scan the last block which is an optional integer timestamp + pos, ts, err := scanTime(buf, pos) + if err != nil { + return nil, err + } + + pt := &point{ + key: key, + fields: fields, + ts: ts, + } + + if len(ts) == 0 { + pt.time = defaultTime + pt.SetPrecision(precision) + } else { + ts, err := parseIntBytes(ts, 10, 64) + if err != nil { + return nil, err + } + pt.time, err = SafeCalcTime(ts, precision) + if err != nil { + return nil, err + } + + // Determine if there are illegal non-whitespace characters after the + // timestamp block. + for pos < len(buf) { + if buf[pos] != ' ' { + return nil, ErrInvalidPoint + } + pos++ + } + } + return pt, nil +} + +// GetPrecisionMultiplier will return a multiplier for the precision specified +func GetPrecisionMultiplier(precision string) int64 { + d := time.Nanosecond + switch precision { + case "u": + d = time.Microsecond + case "ms": + d = time.Millisecond + case "s": + d = time.Second + case "m": + d = time.Minute + case "h": + d = time.Hour + } + return int64(d) +} + +// scanKey scans buf starting at i for the measurement and tag portion of the point. +// It returns the ending position and the byte slice of key within buf. If there +// are tags, they will be sorted if they are not already. +func scanKey(buf []byte, i int) (int, []byte, error) { + start := skipWhitespace(buf, i) + + i = start + + // Determines whether the tags are sort, assume they are + sorted := true + + // indices holds the indexes within buf of the start of each tag. For example, + // a buf of 'cpu,host=a,region=b,zone=c' would have indices slice of [4,11,20] + // which indicates that the first tag starts at buf[4], seconds at buf[11], and + // last at buf[20] + indices := make([]int, 100) + + // tracks how many commas we've seen so we know how many values are indices. + // Since indices is an arbitrarily large slice, + // we need to know how many values in the buffer are in use. + commas := 0 + + // First scan the Point's measurement. + state, i, err := scanMeasurement(buf, i) + if err != nil { + return i, buf[start:i], err + } + + // Optionally scan tags if needed. + if state == tagKeyState { + i, commas, indices, err = scanTags(buf, i, indices) + if err != nil { + return i, buf[start:i], err + } + } + + // Now we know where the key region is within buf, and the location of tags, we + // need to determine if duplicate tags exist and if the tags are sorted. This iterates + // over the list comparing each tag in the sequence with each other. + for j := 0; j < commas-1; j++ { + // get the left and right tags + _, left := scanTo(buf[indices[j]:indices[j+1]-1], 0, '=') + _, right := scanTo(buf[indices[j+1]:indices[j+2]-1], 0, '=') + + // If left is greater than right, the tags are not sorted. We do not have to + // continue because the short path no longer works. + // If the tags are equal, then there are duplicate tags, and we should abort. + // If the tags are not sorted, this pass may not find duplicate tags and we + // need to do a more exhaustive search later. + if cmp := bytes.Compare(left, right); cmp > 0 { + sorted = false + break + } else if cmp == 0 { + return i, buf[start:i], fmt.Errorf("duplicate tags") + } + } + + // If the tags are not sorted, then sort them. This sort is inline and + // uses the tag indices we created earlier. The actual buffer is not sorted, the + // indices are using the buffer for value comparison. After the indices are sorted, + // the buffer is reconstructed from the sorted indices. + if !sorted && commas > 0 { + // Get the measurement name for later + measurement := buf[start : indices[0]-1] + + // Sort the indices + indices := indices[:commas] + insertionSort(0, commas, buf, indices) + + // Create a new key using the measurement and sorted indices + b := make([]byte, len(buf[start:i])) + pos := copy(b, measurement) + for _, i := range indices { + b[pos] = ',' + pos++ + _, v := scanToSpaceOr(buf, i, ',') + pos += copy(b[pos:], v) + } + + // Check again for duplicate tags now that the tags are sorted. + for j := 0; j < commas-1; j++ { + // get the left and right tags + _, left := scanTo(buf[indices[j]:], 0, '=') + _, right := scanTo(buf[indices[j+1]:], 0, '=') + + // If the tags are equal, then there are duplicate tags, and we should abort. + // If the tags are not sorted, this pass may not find duplicate tags and we + // need to do a more exhaustive search later. + if bytes.Equal(left, right) { + return i, b, fmt.Errorf("duplicate tags") + } + } + + return i, b, nil + } + + return i, buf[start:i], nil +} + +// The following constants allow us to specify which state to move to +// next, when scanning sections of a Point. +const ( + tagKeyState = iota + tagValueState + fieldsState +) + +// scanMeasurement examines the measurement part of a Point, returning +// the next state to move to, and the current location in the buffer. +func scanMeasurement(buf []byte, i int) (int, int, error) { + // Check first byte of measurement, anything except a comma is fine. + // It can't be a space, since whitespace is stripped prior to this + // function call. + if i >= len(buf) || buf[i] == ',' { + return -1, i, fmt.Errorf("missing measurement") + } + + for { + i++ + if i >= len(buf) { + // cpu + return -1, i, fmt.Errorf("missing fields") + } + + if buf[i-1] == '\\' { + // Skip character (it's escaped). + continue + } + + // Unescaped comma; move onto scanning the tags. + if buf[i] == ',' { + return tagKeyState, i + 1, nil + } + + // Unescaped space; move onto scanning the fields. + if buf[i] == ' ' { + // cpu value=1.0 + return fieldsState, i, nil + } + } +} + +// scanTags examines all the tags in a Point, keeping track of and +// returning the updated indices slice, number of commas and location +// in buf where to start examining the Point fields. +func scanTags(buf []byte, i int, indices []int) (int, int, []int, error) { + var ( + err error + commas int + state = tagKeyState + ) + + for { + switch state { + case tagKeyState: + // Grow our indices slice if we have too many tags. + if commas >= len(indices) { + newIndics := make([]int, cap(indices)*2) + copy(newIndics, indices) + indices = newIndics + } + indices[commas] = i + commas++ + + i, err = scanTagsKey(buf, i) + state = tagValueState // tag value always follows a tag key + case tagValueState: + state, i, err = scanTagsValue(buf, i) + case fieldsState: + indices[commas] = i + 1 + return i, commas, indices, nil + } + + if err != nil { + return i, commas, indices, err + } + } +} + +// scanTagsKey scans each character in a tag key. +func scanTagsKey(buf []byte, i int) (int, error) { + // First character of the key. + if i >= len(buf) || buf[i] == ' ' || buf[i] == ',' || buf[i] == '=' { + // cpu,{'', ' ', ',', '='} + return i, fmt.Errorf("missing tag key") + } + + // Examine each character in the tag key until we hit an unescaped + // equals (the tag value), or we hit an error (i.e., unescaped + // space or comma). + for { + i++ + + // Either we reached the end of the buffer or we hit an + // unescaped comma or space. + if i >= len(buf) || + ((buf[i] == ' ' || buf[i] == ',') && buf[i-1] != '\\') { + // cpu,tag{'', ' ', ','} + return i, fmt.Errorf("missing tag value") + } + + if buf[i] == '=' && buf[i-1] != '\\' { + // cpu,tag= + return i + 1, nil + } + } +} + +// scanTagsValue scans each character in a tag value. +func scanTagsValue(buf []byte, i int) (int, int, error) { + // Tag value cannot be empty. + if i >= len(buf) || buf[i] == ',' || buf[i] == ' ' { + // cpu,tag={',', ' '} + return -1, i, fmt.Errorf("missing tag value") + } + + // Examine each character in the tag value until we hit an unescaped + // comma (move onto next tag key), an unescaped space (move onto + // fields), or we error out. + for { + i++ + if i >= len(buf) { + // cpu,tag=value + return -1, i, fmt.Errorf("missing fields") + } + + // An unescaped equals sign is an invalid tag value. + if buf[i] == '=' && buf[i-1] != '\\' { + // cpu,tag={'=', 'fo=o'} + return -1, i, fmt.Errorf("invalid tag format") + } + + if buf[i] == ',' && buf[i-1] != '\\' { + // cpu,tag=foo, + return tagKeyState, i + 1, nil + } + + // cpu,tag=foo value=1.0 + // cpu, tag=foo\= value=1.0 + if buf[i] == ' ' && buf[i-1] != '\\' { + return fieldsState, i, nil + } + } +} + +func insertionSort(l, r int, buf []byte, indices []int) { + for i := l + 1; i < r; i++ { + for j := i; j > l && less(buf, indices, j, j-1); j-- { + indices[j], indices[j-1] = indices[j-1], indices[j] + } + } +} + +func less(buf []byte, indices []int, i, j int) bool { + // This grabs the tag names for i & j, it ignores the values + _, a := scanTo(buf, indices[i], '=') + _, b := scanTo(buf, indices[j], '=') + return bytes.Compare(a, b) < 0 +} + +// scanFields scans buf, starting at i for the fields section of a point. It returns +// the ending position and the byte slice of the fields within buf +func scanFields(buf []byte, i int) (int, []byte, error) { + start := skipWhitespace(buf, i) + i = start + quoted := false + + // tracks how many '=' we've seen + equals := 0 + + // tracks how many commas we've seen + commas := 0 + + for { + // reached the end of buf? + if i >= len(buf) { + break + } + + // escaped characters? + if buf[i] == '\\' && i+1 < len(buf) { + i += 2 + continue + } + + // If the value is quoted, scan until we get to the end quote + // Only quote values in the field value since quotes are not significant + // in the field key + if buf[i] == '"' && equals > commas { + quoted = !quoted + i++ + continue + } + + // If we see an =, ensure that there is at least on char before and after it + if buf[i] == '=' && !quoted { + equals++ + + // check for "... =123" but allow "a\ =123" + if buf[i-1] == ' ' && buf[i-2] != '\\' { + return i, buf[start:i], fmt.Errorf("missing field key") + } + + // check for "...a=123,=456" but allow "a=123,a\,=456" + if buf[i-1] == ',' && buf[i-2] != '\\' { + return i, buf[start:i], fmt.Errorf("missing field key") + } + + // check for "... value=" + if i+1 >= len(buf) { + return i, buf[start:i], fmt.Errorf("missing field value") + } + + // check for "... value=,value2=..." + if buf[i+1] == ',' || buf[i+1] == ' ' { + return i, buf[start:i], fmt.Errorf("missing field value") + } + + if isNumeric(buf[i+1]) || buf[i+1] == '-' || buf[i+1] == 'N' || buf[i+1] == 'n' { + var err error + i, err = scanNumber(buf, i+1) + if err != nil { + return i, buf[start:i], err + } + continue + } + // If next byte is not a double-quote, the value must be a boolean + if buf[i+1] != '"' { + var err error + i, _, err = scanBoolean(buf, i+1) + if err != nil { + return i, buf[start:i], err + } + continue + } + } + + if buf[i] == ',' && !quoted { + commas++ + } + + // reached end of block? + if buf[i] == ' ' && !quoted { + break + } + i++ + } + + if quoted { + return i, buf[start:i], fmt.Errorf("unbalanced quotes") + } + + // check that all field sections had key and values (e.g. prevent "a=1,b" + if equals == 0 || commas != equals-1 { + return i, buf[start:i], fmt.Errorf("invalid field format") + } + + return i, buf[start:i], nil +} + +// scanTime scans buf, starting at i for the time section of a point. It +// returns the ending position and the byte slice of the timestamp within buf +// and and error if the timestamp is not in the correct numeric format. +func scanTime(buf []byte, i int) (int, []byte, error) { + start := skipWhitespace(buf, i) + i = start + + for { + // reached the end of buf? + if i >= len(buf) { + break + } + + // Reached end of block or trailing whitespace? + if buf[i] == '\n' || buf[i] == ' ' { + break + } + + // Handle negative timestamps + if i == start && buf[i] == '-' { + i++ + continue + } + + // Timestamps should be integers, make sure they are so we don't need + // to actually parse the timestamp until needed. + if buf[i] < '0' || buf[i] > '9' { + return i, buf[start:i], fmt.Errorf("bad timestamp") + } + i++ + } + return i, buf[start:i], nil +} + +func isNumeric(b byte) bool { + return (b >= '0' && b <= '9') || b == '.' +} + +// scanNumber returns the end position within buf, start at i after +// scanning over buf for an integer, or float. It returns an +// error if a invalid number is scanned. +func scanNumber(buf []byte, i int) (int, error) { + start := i + var isInt bool + + // Is negative number? + if i < len(buf) && buf[i] == '-' { + i++ + // There must be more characters now, as just '-' is illegal. + if i == len(buf) { + return i, ErrInvalidNumber + } + } + + // how many decimal points we've see + decimal := false + + // indicates the number is float in scientific notation + scientific := false + + for { + if i >= len(buf) { + break + } + + if buf[i] == ',' || buf[i] == ' ' { + break + } + + if buf[i] == 'i' && i > start && !isInt { + isInt = true + i++ + continue + } + + if buf[i] == '.' { + // Can't have more than 1 decimal (e.g. 1.1.1 should fail) + if decimal { + return i, ErrInvalidNumber + } + decimal = true + } + + // `e` is valid for floats but not as the first char + if i > start && (buf[i] == 'e' || buf[i] == 'E') { + scientific = true + i++ + continue + } + + // + and - are only valid at this point if they follow an e (scientific notation) + if (buf[i] == '+' || buf[i] == '-') && (buf[i-1] == 'e' || buf[i-1] == 'E') { + i++ + continue + } + + // NaN is an unsupported value + if i+2 < len(buf) && (buf[i] == 'N' || buf[i] == 'n') { + return i, ErrInvalidNumber + } + + if !isNumeric(buf[i]) { + return i, ErrInvalidNumber + } + i++ + } + + if isInt && (decimal || scientific) { + return i, ErrInvalidNumber + } + + numericDigits := i - start + if isInt { + numericDigits-- + } + if decimal { + numericDigits-- + } + if buf[start] == '-' { + numericDigits-- + } + + if numericDigits == 0 { + return i, ErrInvalidNumber + } + + // It's more common that numbers will be within min/max range for their type but we need to prevent + // out or range numbers from being parsed successfully. This uses some simple heuristics to decide + // if we should parse the number to the actual type. It does not do it all the time because it incurs + // extra allocations and we end up converting the type again when writing points to disk. + if isInt { + // Make sure the last char is an 'i' for integers (e.g. 9i10 is not valid) + if buf[i-1] != 'i' { + return i, ErrInvalidNumber + } + // Parse the int to check bounds the number of digits could be larger than the max range + // We subtract 1 from the index to remove the `i` from our tests + if len(buf[start:i-1]) >= maxInt64Digits || len(buf[start:i-1]) >= minInt64Digits { + if _, err := parseIntBytes(buf[start:i-1], 10, 64); err != nil { + return i, fmt.Errorf("unable to parse integer %s: %s", buf[start:i-1], err) + } + } + } else { + // Parse the float to check bounds if it's scientific or the number of digits could be larger than the max range + if scientific || len(buf[start:i]) >= maxFloat64Digits || len(buf[start:i]) >= minFloat64Digits { + if _, err := parseFloatBytes(buf[start:i], 10); err != nil { + return i, fmt.Errorf("invalid float") + } + } + } + + return i, nil +} + +// scanBoolean returns the end position within buf, start at i after +// scanning over buf for boolean. Valid values for a boolean are +// t, T, true, TRUE, f, F, false, FALSE. It returns an error if a invalid boolean +// is scanned. +func scanBoolean(buf []byte, i int) (int, []byte, error) { + start := i + + if i < len(buf) && (buf[i] != 't' && buf[i] != 'f' && buf[i] != 'T' && buf[i] != 'F') { + return i, buf[start:i], fmt.Errorf("invalid boolean") + } + + i++ + for { + if i >= len(buf) { + break + } + + if buf[i] == ',' || buf[i] == ' ' { + break + } + i++ + } + + // Single char bool (t, T, f, F) is ok + if i-start == 1 { + return i, buf[start:i], nil + } + + // length must be 4 for true or TRUE + if (buf[start] == 't' || buf[start] == 'T') && i-start != 4 { + return i, buf[start:i], fmt.Errorf("invalid boolean") + } + + // length must be 5 for false or FALSE + if (buf[start] == 'f' || buf[start] == 'F') && i-start != 5 { + return i, buf[start:i], fmt.Errorf("invalid boolean") + } + + // Otherwise + valid := false + switch buf[start] { + case 't': + valid = bytes.Equal(buf[start:i], []byte("true")) + case 'f': + valid = bytes.Equal(buf[start:i], []byte("false")) + case 'T': + valid = bytes.Equal(buf[start:i], []byte("TRUE")) || bytes.Equal(buf[start:i], []byte("True")) + case 'F': + valid = bytes.Equal(buf[start:i], []byte("FALSE")) || bytes.Equal(buf[start:i], []byte("False")) + } + + if !valid { + return i, buf[start:i], fmt.Errorf("invalid boolean") + } + + return i, buf[start:i], nil + +} + +// skipWhitespace returns the end position within buf, starting at i after +// scanning over spaces in tags +func skipWhitespace(buf []byte, i int) int { + for i < len(buf) { + if buf[i] != ' ' && buf[i] != '\t' && buf[i] != 0 { + break + } + i++ + } + return i +} + +// scanLine returns the end position in buf and the next line found within +// buf. +func scanLine(buf []byte, i int) (int, []byte) { + start := i + quoted := false + fields := false + + // tracks how many '=' and commas we've seen + // this duplicates some of the functionality in scanFields + equals := 0 + commas := 0 + for { + // reached the end of buf? + if i >= len(buf) { + break + } + + // skip past escaped characters + if buf[i] == '\\' { + i += 2 + continue + } + + if buf[i] == ' ' { + fields = true + } + + // If we see a double quote, makes sure it is not escaped + if fields { + if !quoted && buf[i] == '=' { + i++ + equals++ + continue + } else if !quoted && buf[i] == ',' { + i++ + commas++ + continue + } else if buf[i] == '"' && equals > commas { + i++ + quoted = !quoted + continue + } + } + + if buf[i] == '\n' && !quoted { + break + } + + i++ + } + + return i, buf[start:i] +} + +// scanTo returns the end position in buf and the next consecutive block +// of bytes, starting from i and ending with stop byte, where stop byte +// has not been escaped. +// +// If there are leading spaces, they are skipped. +func scanTo(buf []byte, i int, stop byte) (int, []byte) { + start := i + for { + // reached the end of buf? + if i >= len(buf) { + break + } + + // Reached unescaped stop value? + if buf[i] == stop && (i == 0 || buf[i-1] != '\\') { + break + } + i++ + } + + return i, buf[start:i] +} + +// scanTo returns the end position in buf and the next consecutive block +// of bytes, starting from i and ending with stop byte. If there are leading +// spaces, they are skipped. +func scanToSpaceOr(buf []byte, i int, stop byte) (int, []byte) { + start := i + if buf[i] == stop || buf[i] == ' ' { + return i, buf[start:i] + } + + for { + i++ + if buf[i-1] == '\\' { + continue + } + + // reached the end of buf? + if i >= len(buf) { + return i, buf[start:i] + } + + // reached end of block? + if buf[i] == stop || buf[i] == ' ' { + return i, buf[start:i] + } + } +} + +func scanTagValue(buf []byte, i int) (int, []byte) { + start := i + for { + if i >= len(buf) { + break + } + + if buf[i] == ',' && buf[i-1] != '\\' { + break + } + i++ + } + return i, buf[start:i] +} + +func scanFieldValue(buf []byte, i int) (int, []byte) { + start := i + quoted := false + for i < len(buf) { + // Only escape char for a field value is a double-quote and backslash + if buf[i] == '\\' && i+1 < len(buf) && (buf[i+1] == '"' || buf[i+1] == '\\') { + i += 2 + continue + } + + // Quoted value? (e.g. string) + if buf[i] == '"' { + i++ + quoted = !quoted + continue + } + + if buf[i] == ',' && !quoted { + break + } + i++ + } + return i, buf[start:i] +} + +func escapeMeasurement(in []byte) []byte { + for b, esc := range measurementEscapeCodes { + in = bytes.Replace(in, []byte{b}, esc, -1) + } + return in +} + +func unescapeMeasurement(in []byte) []byte { + for b, esc := range measurementEscapeCodes { + in = bytes.Replace(in, esc, []byte{b}, -1) + } + return in +} + +func escapeTag(in []byte) []byte { + for b, esc := range tagEscapeCodes { + if bytes.IndexByte(in, b) != -1 { + in = bytes.Replace(in, []byte{b}, esc, -1) + } + } + return in +} + +func unescapeTag(in []byte) []byte { + if bytes.IndexByte(in, '\\') == -1 { + return in + } + + for b, esc := range tagEscapeCodes { + if bytes.IndexByte(in, b) != -1 { + in = bytes.Replace(in, esc, []byte{b}, -1) + } + } + return in +} + +// EscapeStringField returns a copy of in with any double quotes or +// backslashes with escaped values +func EscapeStringField(in string) string { + var out []byte + i := 0 + for { + if i >= len(in) { + break + } + // escape double-quotes + if in[i] == '\\' { + out = append(out, '\\') + out = append(out, '\\') + i++ + continue + } + // escape double-quotes + if in[i] == '"' { + out = append(out, '\\') + out = append(out, '"') + i++ + continue + } + out = append(out, in[i]) + i++ + + } + return string(out) +} + +// unescapeStringField returns a copy of in with any escaped double-quotes +// or backslashes unescaped +func unescapeStringField(in string) string { + if strings.IndexByte(in, '\\') == -1 { + return in + } + + var out []byte + i := 0 + for { + if i >= len(in) { + break + } + // unescape backslashes + if in[i] == '\\' && i+1 < len(in) && in[i+1] == '\\' { + out = append(out, '\\') + i += 2 + continue + } + // unescape double-quotes + if in[i] == '\\' && i+1 < len(in) && in[i+1] == '"' { + out = append(out, '"') + i += 2 + continue + } + out = append(out, in[i]) + i++ + + } + return string(out) +} + +// NewPoint returns a new point with the given measurement name, tags, fields and timestamp. If +// an unsupported field value (NaN) or out of range time is passed, this function returns an error. +func NewPoint(name string, tags Tags, fields Fields, t time.Time) (Point, error) { + key, err := pointKey(name, tags, fields, t) + if err != nil { + return nil, err + } + + return &point{ + key: key, + time: t, + fields: fields.MarshalBinary(), + }, nil +} + +// pointKey checks some basic requirements for valid points, and returns the +// key, along with an possible error +func pointKey(measurement string, tags Tags, fields Fields, t time.Time) ([]byte, error) { + if len(fields) == 0 { + return nil, ErrPointMustHaveAField + } + + if !t.IsZero() { + if err := CheckTime(t); err != nil { + return nil, err + } + } + + for key, value := range fields { + switch value := value.(type) { + case float64: + // Ensure the caller validates and handles invalid field values + if math.IsNaN(value) { + return nil, fmt.Errorf("NaN is an unsupported value for field %s", key) + } + case float32: + // Ensure the caller validates and handles invalid field values + if math.IsNaN(float64(value)) { + return nil, fmt.Errorf("NaN is an unsupported value for field %s", key) + } + } + if len(key) == 0 { + return nil, fmt.Errorf("all fields must have non-empty names") + } + } + + key := MakeKey([]byte(measurement), tags) + if len(key) > MaxKeyLength { + return nil, fmt.Errorf("max key length exceeded: %v > %v", len(key), MaxKeyLength) + } + + return key, nil +} + +// NewPointFromBytes returns a new Point from a marshalled Point. +func NewPointFromBytes(b []byte) (Point, error) { + p := &point{} + if err := p.UnmarshalBinary(b); err != nil { + return nil, err + } + if len(p.Fields()) == 0 { + return nil, ErrPointMustHaveAField + } + return p, nil +} + +// MustNewPoint returns a new point with the given measurement name, tags, fields and timestamp. If +// an unsupported field value (NaN) is passed, this function panics. +func MustNewPoint(name string, tags Tags, fields Fields, time time.Time) Point { + pt, err := NewPoint(name, tags, fields, time) + if err != nil { + panic(err.Error()) + } + return pt +} + +func (p *point) Data() []byte { + return p.data +} + +func (p *point) SetData(b []byte) { + p.data = b +} + +func (p *point) Key() []byte { + return p.key +} + +func (p *point) name() []byte { + _, name := scanTo(p.key, 0, ',') + return name +} + +// Name return the measurement name for the point +func (p *point) Name() string { + if p.cachedName != "" { + return p.cachedName + } + p.cachedName = string(escape.Unescape(p.name())) + return p.cachedName +} + +// SetName updates the measurement name for the point +func (p *point) SetName(name string) { + p.cachedName = "" + p.key = MakeKey([]byte(name), p.Tags()) +} + +// Time return the timestamp for the point +func (p *point) Time() time.Time { + return p.time +} + +// SetTime updates the timestamp for the point +func (p *point) SetTime(t time.Time) { + p.time = t +} + +// Round implements Point.Round +func (p *point) Round(d time.Duration) { + p.time = p.time.Round(d) +} + +// Tags returns the tag set for the point +func (p *point) Tags() Tags { + if p.cachedTags != nil { + return p.cachedTags + } + p.cachedTags = parseTags(p.key) + return p.cachedTags +} + +func parseTags(buf []byte) Tags { + if len(buf) == 0 { + return nil + } + + pos, name := scanTo(buf, 0, ',') + + // it's an empty key, so there are no tags + if len(name) == 0 { + return nil + } + + tags := make(Tags, 0, bytes.Count(buf, []byte(","))) + hasEscape := bytes.IndexByte(buf, '\\') != -1 + + i := pos + 1 + var key, value []byte + for { + if i >= len(buf) { + break + } + i, key = scanTo(buf, i, '=') + i, value = scanTagValue(buf, i+1) + + if len(value) == 0 { + continue + } + + if hasEscape { + tags = append(tags, Tag{Key: unescapeTag(key), Value: unescapeTag(value)}) + } else { + tags = append(tags, Tag{Key: key, Value: value}) + } + + i++ + } + + return tags +} + +// MakeKey creates a key for a set of tags. +func MakeKey(name []byte, tags Tags) []byte { + // unescape the name and then re-escape it to avoid double escaping. + // The key should always be stored in escaped form. + return append(escapeMeasurement(unescapeMeasurement(name)), tags.HashKey()...) +} + +// SetTags replaces the tags for the point +func (p *point) SetTags(tags Tags) { + p.key = MakeKey([]byte(p.Name()), tags) + p.cachedTags = tags +} + +// AddTag adds or replaces a tag value for a point +func (p *point) AddTag(key, value string) { + tags := p.Tags() + tags = append(tags, Tag{Key: []byte(key), Value: []byte(value)}) + sort.Sort(tags) + p.cachedTags = tags + p.key = MakeKey([]byte(p.Name()), tags) +} + +// Fields returns the fields for the point +func (p *point) Fields() Fields { + if p.cachedFields != nil { + return p.cachedFields + } + p.cachedFields = p.unmarshalBinary() + return p.cachedFields +} + +// SetPrecision will round a time to the specified precision +func (p *point) SetPrecision(precision string) { + switch precision { + case "n": + case "u": + p.SetTime(p.Time().Truncate(time.Microsecond)) + case "ms": + p.SetTime(p.Time().Truncate(time.Millisecond)) + case "s": + p.SetTime(p.Time().Truncate(time.Second)) + case "m": + p.SetTime(p.Time().Truncate(time.Minute)) + case "h": + p.SetTime(p.Time().Truncate(time.Hour)) + } +} + +func (p *point) String() string { + if p.Time().IsZero() { + return string(p.Key()) + " " + string(p.fields) + } + return string(p.Key()) + " " + string(p.fields) + " " + strconv.FormatInt(p.UnixNano(), 10) +} + +// AppendString implements Point.AppendString +func (p *point) AppendString(buf []byte) []byte { + buf = append(buf, p.key...) + buf = append(buf, ' ') + buf = append(buf, p.fields...) + + if !p.time.IsZero() { + buf = append(buf, ' ') + buf = strconv.AppendInt(buf, p.UnixNano(), 10) + } + + return buf +} + +func (p *point) StringSize() int { + size := len(p.key) + len(p.fields) + 1 + + if !p.time.IsZero() { + digits := 1 // even "0" has one digit + t := p.UnixNano() + if t < 0 { + // account for negative sign, then negate + digits++ + t = -t + } + for t > 9 { // already accounted for one digit + digits++ + t /= 10 + } + size += digits + 1 // digits and a space + } + + return size +} + +func (p *point) MarshalBinary() ([]byte, error) { + tb, err := p.time.MarshalBinary() + if err != nil { + return nil, err + } + + b := make([]byte, 8+len(p.key)+len(p.fields)+len(tb)) + i := 0 + + binary.BigEndian.PutUint32(b[i:], uint32(len(p.key))) + i += 4 + + i += copy(b[i:], p.key) + + binary.BigEndian.PutUint32(b[i:i+4], uint32(len(p.fields))) + i += 4 + + i += copy(b[i:], p.fields) + + copy(b[i:], tb) + return b, nil +} + +func (p *point) UnmarshalBinary(b []byte) error { + var i int + keyLen := int(binary.BigEndian.Uint32(b[:4])) + i += int(4) + + p.key = b[i : i+keyLen] + i += keyLen + + fieldLen := int(binary.BigEndian.Uint32(b[i : i+4])) + i += int(4) + + p.fields = b[i : i+fieldLen] + i += fieldLen + + p.time = time.Now() + p.time.UnmarshalBinary(b[i:]) + return nil +} + +func (p *point) PrecisionString(precision string) string { + if p.Time().IsZero() { + return fmt.Sprintf("%s %s", p.Key(), string(p.fields)) + } + return fmt.Sprintf("%s %s %d", p.Key(), string(p.fields), + p.UnixNano()/GetPrecisionMultiplier(precision)) +} + +func (p *point) RoundedString(d time.Duration) string { + if p.Time().IsZero() { + return fmt.Sprintf("%s %s", p.Key(), string(p.fields)) + } + return fmt.Sprintf("%s %s %d", p.Key(), string(p.fields), + p.time.Round(d).UnixNano()) +} + +func (p *point) unmarshalBinary() Fields { + iter := p.FieldIterator() + fields := make(Fields, 8) + for iter.Next() { + if len(iter.FieldKey()) == 0 { + continue + } + switch iter.Type() { + case Float: + fields[string(iter.FieldKey())] = iter.FloatValue() + case Integer: + fields[string(iter.FieldKey())] = iter.IntegerValue() + case String: + fields[string(iter.FieldKey())] = iter.StringValue() + case Boolean: + fields[string(iter.FieldKey())] = iter.BooleanValue() + } + } + return fields +} + +func (p *point) HashID() uint64 { + h := NewInlineFNV64a() + h.Write(p.key) + sum := h.Sum64() + return sum +} + +func (p *point) UnixNano() int64 { + return p.Time().UnixNano() +} + +func (p *point) Split(size int) []Point { + if p.time.IsZero() || len(p.String()) <= size { + return []Point{p} + } + + // key string, timestamp string, spaces + size -= len(p.key) + len(strconv.FormatInt(p.time.UnixNano(), 10)) + 2 + + var points []Point + var start, cur int + + for cur < len(p.fields) { + end, _ := scanTo(p.fields, cur, '=') + end, _ = scanFieldValue(p.fields, end+1) + + if cur > start && end-start > size { + points = append(points, &point{ + key: p.key, + time: p.time, + fields: p.fields[start : cur-1], + }) + start = cur + } + + cur = end + 1 + } + + points = append(points, &point{ + key: p.key, + time: p.time, + fields: p.fields[start:], + }) + + return points +} + +// Tag represents a single key/value tag pair. +type Tag struct { + Key []byte + Value []byte +} + +// Tags represents a sorted list of tags. +type Tags []Tag + +// NewTags returns a new Tags from a map. +func NewTags(m map[string]string) Tags { + if len(m) == 0 { + return nil + } + a := make(Tags, 0, len(m)) + for k, v := range m { + a = append(a, Tag{Key: []byte(k), Value: []byte(v)}) + } + sort.Sort(a) + return a +} + +func (a Tags) Len() int { return len(a) } +func (a Tags) Less(i, j int) bool { return bytes.Compare(a[i].Key, a[j].Key) == -1 } +func (a Tags) Swap(i, j int) { a[i], a[j] = a[j], a[i] } + +// Get returns the value for a key. +func (a Tags) Get(key []byte) []byte { + // OPTIMIZE: Use sort.Search if tagset is large. + + for _, t := range a { + if bytes.Equal(t.Key, key) { + return t.Value + } + } + return nil +} + +// GetString returns the string value for a string key. +func (a Tags) GetString(key string) string { + return string(a.Get([]byte(key))) +} + +// Set sets the value for a key. +func (a *Tags) Set(key, value []byte) { + for _, t := range *a { + if bytes.Equal(t.Key, key) { + t.Value = value + return + } + } + *a = append(*a, Tag{Key: key, Value: value}) + sort.Sort(*a) +} + +// SetString sets the string value for a string key. +func (a *Tags) SetString(key, value string) { + a.Set([]byte(key), []byte(value)) +} + +// Delete removes a tag by key. +func (a *Tags) Delete(key []byte) { + for i, t := range *a { + if bytes.Equal(t.Key, key) { + copy((*a)[i:], (*a)[i+1:]) + (*a)[len(*a)-1] = Tag{} + *a = (*a)[:len(*a)-1] + return + } + } +} + +// Map returns a map representation of the tags. +func (a Tags) Map() map[string]string { + m := make(map[string]string, len(a)) + for _, t := range a { + m[string(t.Key)] = string(t.Value) + } + return m +} + +// Merge merges the tags combining the two. If both define a tag with the +// same key, the merged value overwrites the old value. +// A new map is returned. +func (a Tags) Merge(other map[string]string) Tags { + merged := make(map[string]string, len(a)+len(other)) + for _, t := range a { + merged[string(t.Key)] = string(t.Value) + } + for k, v := range other { + merged[k] = v + } + return NewTags(merged) +} + +// HashKey hashes all of a tag's keys. +func (a Tags) HashKey() []byte { + // Empty maps marshal to empty bytes. + if len(a) == 0 { + return nil + } + + escaped := make(Tags, 0, len(a)) + for _, t := range a { + ek := escapeTag(t.Key) + ev := escapeTag(t.Value) + + if len(ev) > 0 { + escaped = append(escaped, Tag{Key: ek, Value: ev}) + } + } + + // Extract keys and determine final size. + sz := len(escaped) + (len(escaped) * 2) // separators + keys := make([][]byte, len(escaped)+1) + for i, t := range escaped { + keys[i] = t.Key + sz += len(t.Key) + len(t.Value) + } + keys = keys[:len(escaped)] + sort.Sort(byteSlices(keys)) + + // Generate marshaled bytes. + b := make([]byte, sz) + buf := b + idx := 0 + for i, k := range keys { + buf[idx] = ',' + idx++ + copy(buf[idx:idx+len(k)], k) + idx += len(k) + buf[idx] = '=' + idx++ + v := escaped[i].Value + copy(buf[idx:idx+len(v)], v) + idx += len(v) + } + return b[:idx] +} + +// Fields represents a mapping between a Point's field names and their +// values. +type Fields map[string]interface{} + +func parseNumber(val []byte) (interface{}, error) { + if val[len(val)-1] == 'i' { + val = val[:len(val)-1] + return parseIntBytes(val, 10, 64) + } + for i := 0; i < len(val); i++ { + // If there is a decimal or an N (NaN), I (Inf), parse as float + if val[i] == '.' || val[i] == 'N' || val[i] == 'n' || val[i] == 'I' || val[i] == 'i' || val[i] == 'e' { + return parseFloatBytes(val, 64) + } + if val[i] < '0' && val[i] > '9' { + return string(val), nil + } + } + return parseFloatBytes(val, 64) +} + +func (p *point) FieldIterator() FieldIterator { + p.Reset() + return p +} + +type fieldIterator struct { + start, end int + key, keybuf []byte + valueBuf []byte + fieldType FieldType +} + +func (p *point) Next() bool { + p.it.start = p.it.end + if p.it.start >= len(p.fields) { + return false + } + + p.it.end, p.it.key = scanTo(p.fields, p.it.start, '=') + if escape.IsEscaped(p.it.key) { + p.it.keybuf = escape.AppendUnescaped(p.it.keybuf[:0], p.it.key) + p.it.key = p.it.keybuf + } + + p.it.end, p.it.valueBuf = scanFieldValue(p.fields, p.it.end+1) + p.it.end++ + + if len(p.it.valueBuf) == 0 { + p.it.fieldType = Empty + return true + } + + c := p.it.valueBuf[0] + + if c == '"' { + p.it.fieldType = String + return true + } + + if strings.IndexByte(`0123456789-.nNiI`, c) >= 0 { + if p.it.valueBuf[len(p.it.valueBuf)-1] == 'i' { + p.it.fieldType = Integer + p.it.valueBuf = p.it.valueBuf[:len(p.it.valueBuf)-1] + } else { + p.it.fieldType = Float + } + return true + } + + // to keep the same behavior that currently exists, default to boolean + p.it.fieldType = Boolean + return true +} + +func (p *point) FieldKey() []byte { + return p.it.key +} + +func (p *point) Type() FieldType { + return p.it.fieldType +} + +func (p *point) StringValue() string { + return unescapeStringField(string(p.it.valueBuf[1 : len(p.it.valueBuf)-1])) +} + +func (p *point) IntegerValue() int64 { + n, err := parseIntBytes(p.it.valueBuf, 10, 64) + if err != nil { + panic(fmt.Sprintf("unable to parse integer value %q: %v", p.it.valueBuf, err)) + } + return n +} + +func (p *point) BooleanValue() bool { + b, err := parseBoolBytes(p.it.valueBuf) + if err != nil { + panic(fmt.Sprintf("unable to parse bool value %q: %v", p.it.valueBuf, err)) + } + return b +} + +func (p *point) FloatValue() float64 { + f, err := parseFloatBytes(p.it.valueBuf, 64) + if err != nil { + // panic because that's what the non-iterator code does + panic(fmt.Sprintf("unable to parse floating point value %q: %v", p.it.valueBuf, err)) + } + return f +} + +func (p *point) Delete() { + switch { + case p.it.end == p.it.start: + case p.it.end >= len(p.fields): + p.fields = p.fields[:p.it.start] + case p.it.start == 0: + p.fields = p.fields[p.it.end:] + default: + p.fields = append(p.fields[:p.it.start], p.fields[p.it.end:]...) + } + + p.it.end = p.it.start + p.it.key = nil + p.it.valueBuf = nil + p.it.fieldType = Empty +} + +func (p *point) Reset() { + p.it.fieldType = Empty + p.it.key = nil + p.it.valueBuf = nil + p.it.start = 0 + p.it.end = 0 +} + +// MarshalBinary encodes all the fields to their proper type and returns the binary +// represenation +// NOTE: uint64 is specifically not supported due to potential overflow when we decode +// again later to an int64 +// NOTE2: uint is accepted, and may be 64 bits, and is for some reason accepted... +func (p Fields) MarshalBinary() []byte { + var b []byte + keys := make([]string, 0, len(p)) + + for k := range p { + keys = append(keys, k) + } + + // Not really necessary, can probably be removed. + sort.Strings(keys) + + for i, k := range keys { + if i > 0 { + b = append(b, ',') + } + b = appendField(b, k, p[k]) + } + + return b +} + +func appendField(b []byte, k string, v interface{}) []byte { + b = append(b, []byte(escape.String(k))...) + b = append(b, '=') + + // check popular types first + switch v := v.(type) { + case float64: + b = strconv.AppendFloat(b, v, 'f', -1, 64) + case int64: + b = strconv.AppendInt(b, v, 10) + b = append(b, 'i') + case string: + b = append(b, '"') + b = append(b, []byte(EscapeStringField(v))...) + b = append(b, '"') + case bool: + b = strconv.AppendBool(b, v) + case int32: + b = strconv.AppendInt(b, int64(v), 10) + b = append(b, 'i') + case int16: + b = strconv.AppendInt(b, int64(v), 10) + b = append(b, 'i') + case int8: + b = strconv.AppendInt(b, int64(v), 10) + b = append(b, 'i') + case int: + b = strconv.AppendInt(b, int64(v), 10) + b = append(b, 'i') + case uint32: + b = strconv.AppendInt(b, int64(v), 10) + b = append(b, 'i') + case uint16: + b = strconv.AppendInt(b, int64(v), 10) + b = append(b, 'i') + case uint8: + b = strconv.AppendInt(b, int64(v), 10) + b = append(b, 'i') + // TODO: 'uint' should be considered just as "dangerous" as a uint64, + // perhaps the value should be checked and capped at MaxInt64? We could + // then include uint64 as an accepted value + case uint: + b = strconv.AppendInt(b, int64(v), 10) + b = append(b, 'i') + case float32: + b = strconv.AppendFloat(b, float64(v), 'f', -1, 32) + case []byte: + b = append(b, v...) + case nil: + // skip + default: + // Can't determine the type, so convert to string + b = append(b, '"') + b = append(b, []byte(EscapeStringField(fmt.Sprintf("%v", v)))...) + b = append(b, '"') + + } + + return b +} + +type byteSlices [][]byte + +func (a byteSlices) Len() int { return len(a) } +func (a byteSlices) Less(i, j int) bool { return bytes.Compare(a[i], a[j]) == -1 } +func (a byteSlices) Swap(i, j int) { a[i], a[j] = a[j], a[i] } diff --git a/vendor/github.com/influxdata/influxdb/models/rows.go b/vendor/github.com/influxdata/influxdb/models/rows.go new file mode 100644 index 00000000..528b3272 --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/models/rows.go @@ -0,0 +1,58 @@ +package models + +import ( + "sort" +) + +// Row represents a single row returned from the execution of a statement. +type Row struct { + Name string `json:"name,omitempty"` + Tags map[string]string `json:"tags,omitempty"` + Columns []string `json:"columns,omitempty"` + Values [][]interface{} `json:"values,omitempty"` +} + +// SameSeries returns true if r contains values for the same series as o. +func (r *Row) SameSeries(o *Row) bool { + return r.tagsHash() == o.tagsHash() && r.Name == o.Name +} + +// tagsHash returns a hash of tag key/value pairs. +func (r *Row) tagsHash() uint64 { + h := NewInlineFNV64a() + keys := r.tagsKeys() + for _, k := range keys { + h.Write([]byte(k)) + h.Write([]byte(r.Tags[k])) + } + return h.Sum64() +} + +// tagKeys returns a sorted list of tag keys. +func (r *Row) tagsKeys() []string { + a := make([]string, 0, len(r.Tags)) + for k := range r.Tags { + a = append(a, k) + } + sort.Strings(a) + return a +} + +// Rows represents a collection of rows. Rows implements sort.Interface. +type Rows []*Row + +func (p Rows) Len() int { return len(p) } + +func (p Rows) Less(i, j int) bool { + // Sort by name first. + if p[i].Name != p[j].Name { + return p[i].Name < p[j].Name + } + + // Sort by tag set hash. Tags don't have a meaningful sort order so we + // just compute a hash and sort by that instead. This allows the tests + // to receive rows in a predictable order every time. + return p[i].tagsHash() < p[j].tagsHash() +} + +func (p Rows) Swap(i, j int) { p[i], p[j] = p[j], p[i] } diff --git a/vendor/github.com/influxdata/influxdb/models/statistic.go b/vendor/github.com/influxdata/influxdb/models/statistic.go new file mode 100644 index 00000000..33f80899 --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/models/statistic.go @@ -0,0 +1,40 @@ +package models + +type Statistic struct { + Name string `json:"name"` + Tags map[string]string `json:"tags"` + Values map[string]interface{} `json:"values"` +} + +func NewStatistic(name string) Statistic { + return Statistic{ + Name: name, + Tags: make(map[string]string), + Values: make(map[string]interface{}), + } +} + +// StatisticTags is a map that can be merged with others without causing +// mutations to either map. +type StatisticTags map[string]string + +// Merge creates a new map containing the merged contents of tags and t. +// If both tags and the receiver map contain the same key, the value in tags +// is used in the resulting map. +// +// Merge always returns a usable map. +func (t StatisticTags) Merge(tags map[string]string) map[string]string { + // Add everything in tags to the result. + out := make(map[string]string, len(tags)) + for k, v := range tags { + out[k] = v + } + + // Only add values from t that don't appear in tags. + for k, v := range t { + if _, ok := tags[k]; !ok { + out[k] = v + } + } + return out +} diff --git a/vendor/github.com/influxdata/influxdb/models/time.go b/vendor/github.com/influxdata/influxdb/models/time.go new file mode 100644 index 00000000..e98f2cb3 --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/models/time.go @@ -0,0 +1,74 @@ +package models + +// Helper time methods since parsing time can easily overflow and we only support a +// specific time range. + +import ( + "fmt" + "math" + "time" +) + +const ( + // MinNanoTime is the minumum time that can be represented. + // + // 1677-09-21 00:12:43.145224194 +0000 UTC + // + // The two lowest minimum integers are used as sentinel values. The + // minimum value needs to be used as a value lower than any other value for + // comparisons and another separate value is needed to act as a sentinel + // default value that is unusable by the user, but usable internally. + // Because these two values need to be used for a special purpose, we do + // not allow users to write points at these two times. + MinNanoTime = int64(math.MinInt64) + 2 + + // MaxNanoTime is the maximum time that can be represented. + // + // 2262-04-11 23:47:16.854775806 +0000 UTC + // + // The highest time represented by a nanosecond needs to be used for an + // exclusive range in the shard group, so the maximum time needs to be one + // less than the possible maximum number of nanoseconds representable by an + // int64 so that we don't lose a point at that one time. + MaxNanoTime = int64(math.MaxInt64) - 1 +) + +var ( + minNanoTime = time.Unix(0, MinNanoTime).UTC() + maxNanoTime = time.Unix(0, MaxNanoTime).UTC() + + // ErrTimeOutOfRange gets returned when time is out of the representable range using int64 nanoseconds since the epoch. + ErrTimeOutOfRange = fmt.Errorf("time outside range %d - %d", MinNanoTime, MaxNanoTime) +) + +// SafeCalcTime safely calculates the time given. Will return error if the time is outside the +// supported range. +func SafeCalcTime(timestamp int64, precision string) (time.Time, error) { + mult := GetPrecisionMultiplier(precision) + if t, ok := safeSignedMult(timestamp, mult); ok { + tme := time.Unix(0, t).UTC() + return tme, CheckTime(tme) + } + + return time.Time{}, ErrTimeOutOfRange +} + +// CheckTime checks that a time is within the safe range. +func CheckTime(t time.Time) error { + if t.Before(minNanoTime) || t.After(maxNanoTime) { + return ErrTimeOutOfRange + } + return nil +} + +// Perform the multiplication and check to make sure it didn't overflow. +func safeSignedMult(a, b int64) (int64, bool) { + if a == 0 || b == 0 || a == 1 || b == 1 { + return a * b, true + } + if a == MinNanoTime || b == MaxNanoTime { + return 0, false + } + c := a * b + return c, c/b == a +} diff --git a/vendor/github.com/influxdata/influxdb/pkg/escape/bytes.go b/vendor/github.com/influxdata/influxdb/pkg/escape/bytes.go new file mode 100644 index 00000000..5b6ba15a --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/pkg/escape/bytes.go @@ -0,0 +1,95 @@ +package escape + +import ( + "bytes" + "strings" +) + +func Bytes(in []byte) []byte { + for b, esc := range Codes { + in = bytes.Replace(in, []byte{b}, esc, -1) + } + return in +} + +const escapeChars = `," =` + +func IsEscaped(b []byte) bool { + for len(b) > 0 { + i := bytes.IndexByte(b, '\\') + if i < 0 { + return false + } + + if i+1 < len(b) && strings.IndexByte(escapeChars, b[i+1]) >= 0 { + return true + } + b = b[i+1:] + } + return false +} + +func AppendUnescaped(dst, src []byte) []byte { + var pos int + for len(src) > 0 { + next := bytes.IndexByte(src[pos:], '\\') + if next < 0 || pos+next+1 >= len(src) { + return append(dst, src...) + } + + if pos+next+1 < len(src) && strings.IndexByte(escapeChars, src[pos+next+1]) >= 0 { + if pos+next > 0 { + dst = append(dst, src[:pos+next]...) + } + src = src[pos+next+1:] + pos = 0 + } else { + pos += next + 1 + } + } + + return dst +} + +func Unescape(in []byte) []byte { + if len(in) == 0 { + return nil + } + + if bytes.IndexByte(in, '\\') == -1 { + return in + } + + i := 0 + inLen := len(in) + var out []byte + + for { + if i >= inLen { + break + } + if in[i] == '\\' && i+1 < inLen { + switch in[i+1] { + case ',': + out = append(out, ',') + i += 2 + continue + case '"': + out = append(out, '"') + i += 2 + continue + case ' ': + out = append(out, ' ') + i += 2 + continue + case '=': + out = append(out, '=') + i += 2 + continue + } + } + out = append(out, in[i]) + i += 1 + } + return out +} diff --git a/vendor/github.com/influxdata/influxdb/pkg/escape/strings.go b/vendor/github.com/influxdata/influxdb/pkg/escape/strings.go new file mode 100644 index 00000000..d3911428 --- /dev/null +++ b/vendor/github.com/influxdata/influxdb/pkg/escape/strings.go @@ -0,0 +1,38 @@ +package escape + +import "strings" + +var ( + Codes = map[byte][]byte{ + ',': []byte(`\,`), + '"': []byte(`\"`), + ' ': []byte(`\ `), + '=': []byte(`\=`), + } + + codesStr = map[string]string{} +) + +func init() { + for k, v := range Codes { + codesStr[string(k)] = string(v) + } +} + +func UnescapeString(in string) string { + if strings.IndexByte(in, '\\') == -1 { + return in + } + + for b, esc := range codesStr { + in = strings.Replace(in, esc, b, -1) + } + return in +} + +func String(in string) string { + for b, esc := range codesStr { + in = strings.Replace(in, b, esc, -1) + } + return in +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md b/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md index e5894c64..4363b6f9 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md +++ b/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md @@ -154,6 +154,90 @@ that no processes or threads escape the cgroups. This sync is done via a pipe ( specified in the runtime section below ) that the container's init process will block waiting for the parent to finish setup. +### IntelRdt + +Intel platforms with new Xeon CPU support Intel Resource Director Technology +(RDT). Cache Allocation Technology (CAT) is a sub-feature of RDT, which +currently supports L3 cache resource allocation. + +This feature provides a way for the software to restrict cache allocation to a +defined 'subset' of L3 cache which may be overlapping with other 'subsets'. +The different subsets are identified by class of service (CLOS) and each CLOS +has a capacity bitmask (CBM). + +It can be used to handle L3 cache resource allocation for containers if +hardware and kernel support Intel RDT/CAT. + +In Linux 4.10 kernel or newer, the interface is defined and exposed via +"resource control" filesystem, which is a "cgroup-like" interface. + +Comparing with cgroups, it has similar process management lifecycle and +interfaces in a container. But unlike cgroups' hierarchy, it has single level +filesystem layout. + +Intel RDT "resource control" filesystem hierarchy: +``` +mount -t resctrl resctrl /sys/fs/resctrl +tree /sys/fs/resctrl +/sys/fs/resctrl/ +|-- info +| |-- L3 +| |-- cbm_mask +| |-- min_cbm_bits +| |-- num_closids +|-- cpus +|-- schemata +|-- tasks +|-- + |-- cpus + |-- schemata + |-- tasks + +``` + +For runc, we can make use of `tasks` and `schemata` configuration for L3 cache +resource constraints. + +The file `tasks` has a list of tasks that belongs to this group (e.g., +" group). Tasks can be added to a group by writing the task ID +to the "tasks" file (which will automatically remove them from the previous +group to which they belonged). New tasks created by fork(2) and clone(2) are +added to the same group as their parent. If a pid is not in any sub group, it +is in root group. + +The file `schemata` has allocation masks/values for L3 cache on each socket, +which contains L3 cache id and capacity bitmask (CBM). +``` + Format: "L3:=;=;..." +``` +For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` +Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + +The valid L3 cache CBM is a *contiguous bits set* and number of bits that can +be set is less than the max bit. The max bits in the CBM is varied among +supported Intel Xeon platforms. In Intel RDT "resource control" filesystem +layout, the CBM in a group should be a subset of the CBM in root. Kernel will +check if it is valid when writing. e.g., 0xfffff in root indicates the max bits +of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM +values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + +For more information about Intel RDT/CAT kernel interface: +https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt + +An example for runc: +``` +Consider a two-socket machine with two L3 caches where the default CBM is +0xfffff and the max CBM length is 20 bits. With this configuration, tasks +inside the container only have access to the "upper" 80% of L3 cache id 0 and +the "lower" 50% L3 cache id 1: + +"linux": { + "intelRdt": { + "l3CacheSchema": "L3:0=ffff0;1=3ff" + } +} +``` + ### Security The standard set of Linux capabilities that are set in a container @@ -306,7 +390,7 @@ a container. | Exec | Execute a new process inside of the container ( requires setns ) | | Set | Setup configs of the container after it's created | -### Execute a new process inside of a running container. +### Execute a new process inside of a running container User can execute a new process inside of a running container. Any binaries to be executed must be accessible within the container's rootfs. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go index 82ed1a68..7fff0627 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go @@ -2,15 +2,10 @@ package apparmor -// #cgo LDFLAGS: -lapparmor -// #include -// #include -import "C" import ( "fmt" "io/ioutil" "os" - "unsafe" ) // IsEnabled returns true if apparmor is enabled for the host. @@ -24,16 +19,36 @@ func IsEnabled() bool { return false } +func setprocattr(attr, value string) error { + // Under AppArmor you can only change your own attr, so use /proc/self/ + // instead of /proc// like libapparmor does + path := fmt.Sprintf("/proc/self/attr/%s", attr) + + f, err := os.OpenFile(path, os.O_WRONLY, 0) + if err != nil { + return err + } + defer f.Close() + + _, err = fmt.Fprintf(f, "%s", value) + return err +} + +// changeOnExec reimplements aa_change_onexec from libapparmor in Go +func changeOnExec(name string) error { + value := "exec " + name + if err := setprocattr("exec", value); err != nil { + return fmt.Errorf("apparmor failed to apply profile: %s", err) + } + return nil +} + // ApplyProfile will apply the profile with the specified name to the process after // the next exec. func ApplyProfile(name string) error { if name == "" { return nil } - cName := C.CString(name) - defer C.free(unsafe.Pointer(cName)) - if _, err := C.aa_change_onexec(cName); err != nil { - return fmt.Errorf("apparmor failed to apply profile: %s", err) - } - return nil + + return changeOnExec(name) } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go index 22d82acb..43bdccf3 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go @@ -145,8 +145,17 @@ func (m *Manager) Apply(pid int) (err error) { m.Paths[sys.Name()] = p if err := sys.Apply(d); err != nil { + if os.IsPermission(err) && m.Cgroups.Path == "" { + // If we didn't set a cgroup path, then let's defer the error here + // until we know whether we have set limits or not. + // If we hadn't set limits, then it's ok that we couldn't join this cgroup, because + // it will have the same limits as its parent. + delete(m.Paths, sys.Name()) + continue + } return err } + } return nil } @@ -198,6 +207,10 @@ func (m *Manager) Set(container *configs.Config) error { for _, sys := range subsystems { path := paths[sys.Name()] if err := sys.Set(path, container.Cgroups); err != nil { + if path == "" { + // cgroup never applied + return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name()) + } return err } } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go index e70dfe3b..4b19f8a9 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go @@ -29,11 +29,15 @@ func (s *FreezerGroup) Apply(d *cgroupData) error { func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { switch cgroup.Resources.Freezer { case configs.Frozen, configs.Thawed: - if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil { - return err - } - for { + // In case this loop does not exit because it doesn't get the expected + // state, let's write again this state, hoping it's going to be properly + // set this time. Otherwise, this loop could run infinitely, waiting for + // a state change that would never happen. + if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil { + return err + } + state, err := readFile(path, "freezer.state") if err != nil { return err @@ -41,6 +45,7 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error { if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) { break } + time.Sleep(1 * time.Millisecond) } case configs.Undefined: diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go deleted file mode 100644 index b1efbfd9..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/rootless/rootless.go +++ /dev/null @@ -1,128 +0,0 @@ -// +build linux - -package rootless - -import ( - "fmt" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/configs/validate" -) - -// TODO: This is copied from libcontainer/cgroups/fs, which duplicates this code -// needlessly. We should probably export this list. - -var subsystems = []subsystem{ - &fs.CpusetGroup{}, - &fs.DevicesGroup{}, - &fs.MemoryGroup{}, - &fs.CpuGroup{}, - &fs.CpuacctGroup{}, - &fs.PidsGroup{}, - &fs.BlkioGroup{}, - &fs.HugetlbGroup{}, - &fs.NetClsGroup{}, - &fs.NetPrioGroup{}, - &fs.PerfEventGroup{}, - &fs.FreezerGroup{}, - &fs.NameGroup{GroupName: "name=systemd"}, -} - -type subsystem interface { - // Name returns the name of the subsystem. - Name() string - - // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. - GetStats(path string, stats *cgroups.Stats) error -} - -// The noop cgroup manager is used for rootless containers, because we currently -// cannot manage cgroups if we are in a rootless setup. This manager is chosen -// by factory if we are in rootless mode. We error out if any cgroup options are -// set in the config -- this may change in the future with upcoming kernel features -// like the cgroup namespace. - -type Manager struct { - Cgroups *configs.Cgroup - Paths map[string]string -} - -func (m *Manager) Apply(pid int) error { - // If there are no cgroup settings, there's nothing to do. - if m.Cgroups == nil { - return nil - } - - // We can't set paths. - // TODO(cyphar): Implement the case where the runner of a rootless container - // owns their own cgroup, which would allow us to set up a - // cgroup for each path. - if m.Cgroups.Paths != nil { - return fmt.Errorf("cannot change cgroup path in rootless container") - } - - // We load the paths into the manager. - paths := make(map[string]string) - for _, sys := range subsystems { - name := sys.Name() - - path, err := cgroups.GetOwnCgroupPath(name) - if err != nil { - // Ignore paths we couldn't resolve. - continue - } - - paths[name] = path - } - - m.Paths = paths - return nil -} - -func (m *Manager) GetPaths() map[string]string { - return m.Paths -} - -func (m *Manager) Set(container *configs.Config) error { - // We have to re-do the validation here, since someone might decide to - // update a rootless container. - return validate.New().Validate(container) -} - -func (m *Manager) GetPids() ([]int, error) { - dir, err := cgroups.GetOwnCgroupPath("devices") - if err != nil { - return nil, err - } - return cgroups.GetPids(dir) -} - -func (m *Manager) GetAllPids() ([]int, error) { - dir, err := cgroups.GetOwnCgroupPath("devices") - if err != nil { - return nil, err - } - return cgroups.GetAllPids(dir) -} - -func (m *Manager) GetStats() (*cgroups.Stats, error) { - // TODO(cyphar): We can make this work if we figure out a way to allow usage - // of cgroups with a rootless container. While this doesn't - // actually require write access to a cgroup directory, the - // statistics are not useful if they can be affected by - // non-container processes. - return nil, fmt.Errorf("cannot get cgroup stats in rootless container") -} - -func (m *Manager) Freeze(state configs.FreezerState) error { - // TODO(cyphar): We can make this work if we figure out a way to allow usage - // of cgroups with a rootless container. - return fmt.Errorf("cannot use freezer cgroup in rootless container") -} - -func (m *Manager) Destroy() error { - // We don't have to do anything here because we didn't do any setup. - return nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go index 7de9ae60..a65d8e44 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go @@ -1,4 +1,4 @@ -// +build !linux +// +build !linux static_build package systemd @@ -43,7 +43,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) { } func (m *Manager) Set(container *configs.Config) error { - return nil, fmt.Errorf("Systemd not supported") + return fmt.Errorf("Systemd not supported") } func (m *Manager) Freeze(state configs.FreezerState) error { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go index de89ccbe..45bd3acc 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go @@ -1,4 +1,4 @@ -// +build linux +// +build linux,!static_build package systemd @@ -271,6 +271,13 @@ func (m *Manager) Apply(pid int) error { // cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd. if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 { cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod + // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota + // (integer percentage of CPU) internally. This means that if a fractional percent of + // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest + // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. + if cpuQuotaPerSecUSec%10000 != 0 { + cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 + } properties = append(properties, newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) } @@ -288,10 +295,13 @@ func (m *Manager) Apply(pid int) error { } } - if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil && !isUnitExists(err) { + statusChan := make(chan string) + if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err != nil && !isUnitExists(err) { return err } + <-statusChan + if err := joinCgroups(c, pid); err != nil { return err } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go deleted file mode 100644 index c7bdf1f6..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/compat_1.5_linux.go +++ /dev/null @@ -1,10 +0,0 @@ -// +build linux,!go1.5 - -package libcontainer - -import "syscall" - -// GidMappingsEnableSetgroups was added in Go 1.5, so do nothing when building -// with earlier versions -func enableSetgroups(sys *syscall.SysProcAttr) { -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go deleted file mode 100644 index 95e2830a..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go +++ /dev/null @@ -1,6 +0,0 @@ -// +build !windows,!linux,!freebsd - -package configs - -type Cgroup struct { -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go index 269fffff..3cae4fd8 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -187,6 +187,10 @@ type Config struct { // Rootless specifies whether the container is a rootless container. Rootless bool `json:"rootless"` + + // IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into + // to limit the resources (e.g., L3 cache) the container has available + IntelRdt *IntelRdt `json:"intel_rdt,omitempty"` } type Hooks struct { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go index 4d348d21..e4f423c5 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go @@ -1,4 +1,4 @@ -// +build linux freebsd +// +build linux package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go new file mode 100644 index 00000000..36bd5f96 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go @@ -0,0 +1,7 @@ +package configs + +type IntelRdt struct { + // The schema for L3 cache id and capacity bitmask (CBM) + // Format: "L3:=;=;..." + L3CacheSchema string `json:"l3_cache_schema,omitempty"` +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go index 0cebfaf8..7a9f33b7 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go @@ -21,13 +21,6 @@ func (v *ConfigValidator) rootless(config *configs.Config) error { if err := rootlessMount(config); err != nil { return err } - // Currently, cgroups cannot effectively be used in rootless containers. - // The new cgroup namespace doesn't really help us either because it doesn't - // have nice interactions with the user namespace (we're working with upstream - // to fix this). - if err := rootlessCgroup(config); err != nil { - return err - } // XXX: We currently can't verify the user config at all, because // configs.Config doesn't store the user-related configs. So this @@ -36,37 +29,27 @@ func (v *ConfigValidator) rootless(config *configs.Config) error { return nil } -func rootlessMappings(config *configs.Config) error { - rootuid, err := config.HostRootUID() - if err != nil { - return fmt.Errorf("failed to get root uid from uidMappings: %v", err) +func hasIDMapping(id int, mappings []configs.IDMap) bool { + for _, m := range mappings { + if id >= m.ContainerID && id < m.ContainerID+m.Size { + return true + } } + return false +} + +func rootlessMappings(config *configs.Config) error { if euid := geteuid(); euid != 0 { if !config.Namespaces.Contains(configs.NEWUSER) { return fmt.Errorf("rootless containers require user namespaces") } - if rootuid != euid { - return fmt.Errorf("rootless containers cannot map container root to a different host user") - } } - rootgid, err := config.HostRootGID() - if err != nil { - return fmt.Errorf("failed to get root gid from gidMappings: %v", err) + if len(config.UidMappings) == 0 { + return fmt.Errorf("rootless containers requires at least one UID mapping") } - - // Similar to the above test, we need to make sure that we aren't trying to - // map to a group ID that we don't have the right to be. - if rootgid != getegid() { - return fmt.Errorf("rootless containers cannot map container root to a different host group") - } - - // We can only map one user and group inside a container (our own). - if len(config.UidMappings) != 1 || config.UidMappings[0].Size != 1 { - return fmt.Errorf("rootless containers cannot map more than one user") - } - if len(config.GidMappings) != 1 || config.GidMappings[0].Size != 1 { - return fmt.Errorf("rootless containers cannot map more than one group") + if len(config.GidMappings) == 0 { + return fmt.Errorf("rootless containers requires at least one UID mapping") } return nil @@ -104,11 +87,28 @@ func rootlessMount(config *configs.Config) error { // Check that the options list doesn't contain any uid= or gid= entries // that don't resolve to root. for _, opt := range strings.Split(mount.Data, ",") { - if strings.HasPrefix(opt, "uid=") && opt != "uid=0" { - return fmt.Errorf("cannot specify uid= mount options in rootless containers where argument isn't 0") + if strings.HasPrefix(opt, "uid=") { + var uid int + n, err := fmt.Sscanf(opt, "uid=%d", &uid) + if n != 1 || err != nil { + // Ignore unknown mount options. + continue + } + if !hasIDMapping(uid, config.UidMappings) { + return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers") + } } - if strings.HasPrefix(opt, "gid=") && opt != "gid=0" { - return fmt.Errorf("cannot specify gid= mount options in rootless containers where argument isn't 0") + + if strings.HasPrefix(opt, "gid=") { + var gid int + n, err := fmt.Sscanf(opt, "gid=%d", &gid) + if n != 1 || err != nil { + // Ignore unknown mount options. + continue + } + if !hasIDMapping(gid, config.GidMappings) { + return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers") + } } } } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go index 82843454..cbbba9a0 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" selinux "github.com/opencontainers/selinux/go-selinux" ) @@ -40,6 +41,9 @@ func (v *ConfigValidator) Validate(config *configs.Config) error { if err := v.sysctl(config); err != nil { return err } + if err := v.intelrdt(config); err != nil { + return err + } if config.Rootless { if err := v.rootless(config); err != nil { return err @@ -153,6 +157,19 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error { return nil } +func (v *ConfigValidator) intelrdt(config *configs.Config) error { + if config.IntelRdt != nil { + if !intelrdt.IsEnabled() { + return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled") + } + if config.IntelRdt.L3CacheSchema == "" { + return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty") + } + } + + return nil +} + func isSymbolicLink(path string) (bool, error) { fi, err := os.Lstat(path) if err != nil { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console.go b/vendor/github.com/opencontainers/runc/libcontainer/console.go deleted file mode 100644 index 917acc70..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/console.go +++ /dev/null @@ -1,17 +0,0 @@ -package libcontainer - -import ( - "io" - "os" -) - -// Console represents a pseudo TTY. -type Console interface { - io.ReadWriteCloser - - // Path returns the filesystem path to the slave side of the pty. - Path() string - - // Fd returns the fd for the master of the pty. - File() *os.File -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go b/vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go deleted file mode 100644 index b7166a31..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/console_freebsd.go +++ /dev/null @@ -1,13 +0,0 @@ -// +build freebsd - -package libcontainer - -import ( - "errors" -) - -// newConsole returns an initialized console that can be used within a container by copying bytes -// from the master side to the slave that is attached as the tty for the container's init process. -func newConsole() (Console, error) { - return nil, errors.New("libcontainer console is not supported on FreeBSD") -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go index f70de384..9997e93e 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go @@ -1,71 +1,14 @@ package libcontainer import ( - "fmt" "os" - "unsafe" "golang.org/x/sys/unix" ) -func ConsoleFromFile(f *os.File) Console { - return &linuxConsole{ - master: f, - } -} - -// newConsole returns an initialized console that can be used within a container by copying bytes -// from the master side to the slave that is attached as the tty for the container's init process. -func newConsole() (Console, error) { - master, err := os.OpenFile("/dev/ptmx", unix.O_RDWR|unix.O_NOCTTY|unix.O_CLOEXEC, 0) - if err != nil { - return nil, err - } - console, err := ptsname(master) - if err != nil { - return nil, err - } - if err := unlockpt(master); err != nil { - return nil, err - } - return &linuxConsole{ - slavePath: console, - master: master, - }, nil -} - -// linuxConsole is a linux pseudo TTY for use within a container. -type linuxConsole struct { - master *os.File - slavePath string -} - -func (c *linuxConsole) File() *os.File { - return c.master -} - -func (c *linuxConsole) Path() string { - return c.slavePath -} - -func (c *linuxConsole) Read(b []byte) (int, error) { - return c.master.Read(b) -} - -func (c *linuxConsole) Write(b []byte) (int, error) { - return c.master.Write(b) -} - -func (c *linuxConsole) Close() error { - if m := c.master; m != nil { - return m.Close() - } - return nil -} - // mount initializes the console inside the rootfs mounting with the specified mount label // and applying the correct ownership of the console. -func (c *linuxConsole) mount() error { +func mountConsole(slavePath string) error { oldMask := unix.Umask(0000) defer unix.Umask(oldMask) f, err := os.Create("/dev/console") @@ -75,17 +18,20 @@ func (c *linuxConsole) mount() error { if f != nil { f.Close() } - return unix.Mount(c.slavePath, "/dev/console", "bind", unix.MS_BIND, "") + return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "") } // dupStdio opens the slavePath for the console and dups the fds to the current // processes stdio, fd 0,1,2. -func (c *linuxConsole) dupStdio() error { - slave, err := c.open(unix.O_RDWR) +func dupStdio(slavePath string) error { + fd, err := unix.Open(slavePath, unix.O_RDWR, 0) if err != nil { - return err + return &os.PathError{ + Op: "open", + Path: slavePath, + Err: err, + } } - fd := int(slave.Fd()) for _, i := range []int{0, 1, 2} { if err := unix.Dup3(fd, i, 0); err != nil { return err @@ -93,60 +39,3 @@ func (c *linuxConsole) dupStdio() error { } return nil } - -// open is a clone of os.OpenFile without the O_CLOEXEC used to open the pty slave. -func (c *linuxConsole) open(flag int) (*os.File, error) { - r, e := unix.Open(c.slavePath, flag, 0) - if e != nil { - return nil, &os.PathError{ - Op: "open", - Path: c.slavePath, - Err: e, - } - } - return os.NewFile(uintptr(r), c.slavePath), nil -} - -func ioctl(fd uintptr, flag, data uintptr) error { - if _, _, err := unix.Syscall(unix.SYS_IOCTL, fd, flag, data); err != 0 { - return err - } - return nil -} - -// unlockpt unlocks the slave pseudoterminal device corresponding to the master pseudoterminal referred to by f. -// unlockpt should be called before opening the slave side of a pty. -func unlockpt(f *os.File) error { - var u int32 - return ioctl(f.Fd(), unix.TIOCSPTLCK, uintptr(unsafe.Pointer(&u))) -} - -// ptsname retrieves the name of the first available pts for the given master. -func ptsname(f *os.File) (string, error) { - n, err := unix.IoctlGetInt(int(f.Fd()), unix.TIOCGPTN) - if err != nil { - return "", err - } - return fmt.Sprintf("/dev/pts/%d", n), nil -} - -// SaneTerminal sets the necessary tty_ioctl(4)s to ensure that a pty pair -// created by us acts normally. In particular, a not-very-well-known default of -// Linux unix98 ptys is that they have +onlcr by default. While this isn't a -// problem for terminal emulators, because we relay data from the terminal we -// also relay that funky line discipline. -func SaneTerminal(terminal *os.File) error { - termios, err := unix.IoctlGetTermios(int(terminal.Fd()), unix.TCGETS) - if err != nil { - return fmt.Errorf("ioctl(tty, tcgets): %s", err.Error()) - } - - // Set -onlcr so we don't have to deal with \r. - termios.Oflag &^= unix.ONLCR - - if err := unix.IoctlSetTermios(int(terminal.Fd()), unix.TCSETS, termios); err != nil { - return fmt.Errorf("ioctl(tty, tcsets): %s", err.Error()) - } - - return nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go b/vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go deleted file mode 100644 index e5ca5459..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/console_solaris.go +++ /dev/null @@ -1,11 +0,0 @@ -package libcontainer - -import ( - "errors" -) - -// newConsole returns an initialized console that can be used within a container by copying bytes -// from the master side to the slave that is attached as the tty for the container's init process. -func newConsole() (Console, error) { - return nil, errors.New("libcontainer console is not supported on Solaris") -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/console_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/console_windows.go deleted file mode 100644 index c61e866a..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/console_windows.go +++ /dev/null @@ -1,30 +0,0 @@ -package libcontainer - -// newConsole returns an initialized console that can be used within a container -func newConsole() (Console, error) { - return &windowsConsole{}, nil -} - -// windowsConsole is a Windows pseudo TTY for use within a container. -type windowsConsole struct { -} - -func (c *windowsConsole) Fd() uintptr { - return 0 -} - -func (c *windowsConsole) Path() string { - return "" -} - -func (c *windowsConsole) Read(b []byte) (int, error) { - return 0, nil -} - -func (c *windowsConsole) Write(b []byte) (int, error) { - return 0, nil -} - -func (c *windowsConsole) Close() error { - return nil -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go index 9e1b74d7..1ac74b1b 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go @@ -21,6 +21,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/criurpc" + "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" @@ -38,10 +39,14 @@ type linuxContainer struct { root string config *configs.Config cgroupManager cgroups.Manager + intelRdtManager intelrdt.Manager + initPath string initArgs []string initProcess parentProcess initProcessStartTime uint64 criuPath string + newuidmapPath string + newgidmapPath string m sync.Mutex criuVersion int state containerState @@ -67,6 +72,9 @@ type State struct { // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore ExternalDescriptors []string `json:"external_descriptors,omitempty"` + + // Intel RDT "resource control" filesystem path + IntelRdtPath string `json:"intel_rdt_path"` } // Container is a libcontainer container object. @@ -163,6 +171,11 @@ func (c *linuxContainer) Stats() (*Stats, error) { if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { return stats, newSystemErrorWithCause(err, "getting container stats from cgroups") } + if c.intelRdtManager != nil { + if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil { + return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats") + } + } for _, iface := range c.config.Networks { switch iface.Type { case "veth": @@ -193,6 +206,15 @@ func (c *linuxContainer) Set(config configs.Config) error { } return err } + if c.intelRdtManager != nil { + if err := c.intelRdtManager.Set(&config); err != nil { + // Set configs back + if err2 := c.intelRdtManager.Set(c.config); err2 != nil { + logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2) + } + return err + } + } // After config setting succeed, update config and states c.config = &config _, err = c.updateState(nil) @@ -268,7 +290,7 @@ func (c *linuxContainer) start(process *Process, isInit bool) error { } if err := parent.start(); err != nil { // terminate the process to ensure that it properly is reaped. - if err := parent.terminate(); err != nil { + if err := ignoreTerminateErrors(parent.terminate()); err != nil { logrus.Warn(err) } return newSystemErrorWithCause(err, "starting container process") @@ -294,7 +316,7 @@ func (c *linuxContainer) start(process *Process, isInit bool) error { } for i, hook := range c.config.Hooks.Poststart { if err := hook.Run(s); err != nil { - if err := parent.terminate(); err != nil { + if err := ignoreTerminateErrors(parent.terminate()); err != nil { logrus.Warn(err) } return newSystemErrorWithCausef(err, "running poststart hook %d", i) @@ -392,7 +414,8 @@ func (c *linuxContainer) newParentProcess(p *Process, doInit bool) (parentProces } func (c *linuxContainer) commandTemplate(p *Process, childPipe *os.File) (*exec.Cmd, error) { - cmd := exec.Command(c.initArgs[0], c.initArgs[1:]...) + cmd := exec.Command(c.initPath, c.initArgs[1:]...) + cmd.Args[0] = c.initArgs[0] cmd.Stdin = p.Stdin cmd.Stdout = p.Stdout cmd.Stderr = p.Stderr @@ -434,15 +457,16 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, c return nil, err } return &initProcess{ - cmd: cmd, - childPipe: childPipe, - parentPipe: parentPipe, - manager: c.cgroupManager, - config: c.newInitConfig(p), - container: c, - process: p, - bootstrapData: data, - sharePidns: sharePidns, + cmd: cmd, + childPipe: childPipe, + parentPipe: parentPipe, + manager: c.cgroupManager, + intelRdtManager: c.intelRdtManager, + config: c.newInitConfig(p), + container: c, + process: p, + bootstrapData: data, + sharePidns: sharePidns, }, nil } @@ -461,6 +485,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, parentPipe, return &setnsProcess{ cmd: cmd, cgroupPaths: c.cgroupManager.GetPaths(), + intelRdtPath: state.IntelRdtPath, childPipe: childPipe, parentPipe: parentPipe, config: c.newInitConfig(p), @@ -499,6 +524,8 @@ func (c *linuxContainer) newInitConfig(process *Process) *initConfig { cfg.Rlimits = process.Rlimits } cfg.CreateConsole = process.ConsoleSocket != nil + cfg.ConsoleWidth = process.ConsoleWidth + cfg.ConsoleHeight = process.ConsoleHeight return cfg } @@ -600,9 +627,24 @@ func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc. logrus.Debugf("Feature check says: %s", criuFeatures) missingFeatures := false - if *criuFeat.MemTrack && !*criuFeatures.MemTrack { - missingFeatures = true - logrus.Debugf("CRIU does not support MemTrack") + // The outer if checks if the fields actually exist + if (criuFeat.MemTrack != nil) && + (criuFeatures.MemTrack != nil) { + // The inner if checks if they are set to true + if *criuFeat.MemTrack && !*criuFeatures.MemTrack { + missingFeatures = true + logrus.Debugf("CRIU does not support MemTrack") + } + } + + // This needs to be repeated for every new feature check. + // Is there a way to put this in a function. Reflection? + if (criuFeat.LazyPages != nil) && + (criuFeatures.LazyPages != nil) { + if *criuFeat.LazyPages && !*criuFeatures.LazyPages { + missingFeatures = true + logrus.Debugf("CRIU does not support LazyPages") + } } if missingFeatures { @@ -632,9 +674,9 @@ func parseCriuVersion(path string) (int, error) { return 0, fmt.Errorf("Unable to parse the CRIU version: %s", path) } - n, err := fmt.Sscanf(string(version), "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2 + n, err := fmt.Sscanf(version, "GitID: v%d.%d.%d", &x, &y, &z) // 1.5.2 if err != nil { - n, err = fmt.Sscanf(string(version), "GitID: v%d.%d", &x, &y) // 1.6 + n, err = fmt.Sscanf(version, "GitID: v%d.%d", &x, &y) // 1.6 y++ } else { z++ @@ -758,6 +800,25 @@ func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error { } req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) } + return nil +} + +func waitForCriuLazyServer(r *os.File, status string) error { + + data := make([]byte, 1) + _, err := r.Read(data) + if err != nil { + return err + } + fd, err := os.OpenFile(status, os.O_TRUNC|os.O_WRONLY, os.ModeAppend) + if err != nil { + return err + } + _, err = fd.Write(data) + if err != nil { + return err + } + fd.Close() return nil } @@ -825,6 +886,7 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { EmptyNs: proto.Uint32(criuOpts.EmptyNs), OrphanPtsMaster: proto.Bool(true), AutoDedup: proto.Bool(criuOpts.AutoDedup), + LazyPages: proto.Bool(criuOpts.LazyPages), } fcg := c.cgroupManager.GetPaths()["freezer"] @@ -875,6 +937,24 @@ func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { Opts: &rpcOpts, } + if criuOpts.LazyPages { + // lazy migration requested; check if criu supports it + feat := criurpc.CriuFeatures{ + LazyPages: proto.Bool(true), + } + + if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil { + return err + } + + statusRead, statusWrite, err := os.Pipe() + if err != nil { + return err + } + rpcOpts.StatusFd = proto.Int32(int32(statusWrite.Fd())) + go waitForCriuLazyServer(statusRead, criuOpts.StatusFd) + } + //no need to dump these information in pre-dump if !criuOpts.PreDump { for _, m := range c.config.Mounts { @@ -1027,6 +1107,7 @@ func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { EmptyNs: proto.Uint32(criuOpts.EmptyNs), OrphanPtsMaster: proto.Bool(true), AutoDedup: proto.Bool(criuOpts.AutoDedup), + LazyPages: proto.Bool(criuOpts.LazyPages), }, } @@ -1404,7 +1485,7 @@ func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Proc defer master.Close() // While we can access console.master, using the API is a good idea. - if err := utils.SendFd(process.ConsoleSocket, master); err != nil { + if err := utils.SendFd(process.ConsoleSocket, master.Name(), master.Fd()); err != nil { return err } } @@ -1519,6 +1600,10 @@ func (c *linuxContainer) currentState() (*State, error) { startTime, _ = c.initProcess.startTime() externalDescriptors = c.initProcess.externalDescriptors() } + intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID()) + if err != nil { + intelRdtPath = "" + } state := &State{ BaseState: BaseState{ ID: c.ID(), @@ -1529,6 +1614,7 @@ func (c *linuxContainer) currentState() (*State, error) { }, Rootless: c.config.Rootless, CgroupPaths: c.cgroupManager.GetPaths(), + IntelRdtPath: intelRdtPath, NamespacePaths: make(map[configs.NamespaceType]string), ExternalDescriptors: externalDescriptors, } @@ -1627,6 +1713,12 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na if !joinExistingUser { // write uid mappings if len(c.config.UidMappings) > 0 { + if c.config.Rootless && c.newuidmapPath != "" { + r.AddData(&Bytemsg{ + Type: UidmapPathAttr, + Value: []byte(c.newuidmapPath), + }) + } b, err := encodeIDMapping(c.config.UidMappings) if err != nil { return nil, err @@ -1647,6 +1739,12 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na Type: GidmapAttr, Value: b, }) + if c.config.Rootless && c.newgidmapPath != "" { + r.AddData(&Bytemsg{ + Type: GidmapPathAttr, + Value: []byte(c.newgidmapPath), + }) + } // The following only applies if we are root. if !c.config.Rootless { // check if we have CAP_SETGID to setgroup properly @@ -1678,3 +1776,18 @@ func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Na return bytes.NewReader(r.Serialize()), nil } + +// ignoreTerminateErrors returns nil if the given err matches an error known +// to indicate that the terminate occurred successfully or err was nil, otherwise +// err is returned unaltered. +func ignoreTerminateErrors(err error) error { + if err == nil { + return nil + } + s := err.Error() + switch { + case strings.Contains(s, "process already finished"), strings.Contains(s, "Wait was already called"): + return nil + } + return err +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go b/vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go deleted file mode 100644 index bb84ff74..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/container_solaris.go +++ /dev/null @@ -1,20 +0,0 @@ -package libcontainer - -// State represents a running container's state -type State struct { - BaseState - - // Platform specific fields below here -} - -// A libcontainer container object. -// -// Each container is thread-safe within the same process. Since a container can -// be destroyed by a separate process, any function may return that the container -// was not found. -type Container interface { - BaseContainer - - // Methods below here are platform specific - -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/container_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/container_windows.go deleted file mode 100644 index bb84ff74..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/container_windows.go +++ /dev/null @@ -1,20 +0,0 @@ -package libcontainer - -// State represents a running container's state -type State struct { - BaseState - - // Platform specific fields below here -} - -// A libcontainer container object. -// -// Each container is thread-safe within the same process. Since a container can -// be destroyed by a separate process, any function may return that the container -// was not found. -type Container interface { - BaseContainer - - // Methods below here are platform specific - -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go index 8f142c9f..a2e344fc 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go @@ -23,7 +23,7 @@ type VethPairName struct { type CriuOpts struct { ImagesDirectory string // directory for storing image files WorkDirectory string // directory to cd and write logs/pidfiles/stats to - ParentImage string // direcotry for storing parent image files in pre-dump and dump + ParentImage string // directory for storing parent image files in pre-dump and dump LeaveRunning bool // leave container in running state after checkpoint TcpEstablished bool // checkpoint/restore established TCP connections ExternalUnixConnections bool // allow external unix connections @@ -35,4 +35,6 @@ type CriuOpts struct { ManageCgroupsMode cgMode // dump or restore cgroup mode EmptyNs uint32 // don't c/r properties for namespace from this mask AutoDedup bool // auto deduplication for incremental dumps + LazyPages bool // restore memory pages lazily using userfaultfd + StatusFd string // fd for feedback when lazy server is ready } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go deleted file mode 100644 index bc920770..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_windows.go +++ /dev/null @@ -1,6 +0,0 @@ -package libcontainer - -// TODO Windows: This can ultimately be entirely factored out as criu is -// a Unix concept not relevant on Windows. -type CriuOpts struct { -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go index 947bdea1..7d53d5e0 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go @@ -11,13 +11,13 @@ import ( "runtime/debug" "strconv" - "github.com/docker/docker/pkg/mount" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs" - "github.com/opencontainers/runc/libcontainer/cgroups/rootless" "github.com/opencontainers/runc/libcontainer/cgroups/systemd" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/configs/validate" + "github.com/opencontainers/runc/libcontainer/intelrdt" + "github.com/opencontainers/runc/libcontainer/mount" "github.com/opencontainers/runc/libcontainer/utils" "golang.org/x/sys/unix" @@ -72,15 +72,15 @@ func Cgroupfs(l *LinuxFactory) error { return nil } -// RootlessCgroups is an options func to configure a LinuxFactory to -// return containers that use the "rootless" cgroup manager, which will -// fail to do any operations not possible to do with an unprivileged user. -// It should only be used in conjunction with rootless containers. -func RootlessCgroups(l *LinuxFactory) error { - l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { - return &rootless.Manager{ - Cgroups: config, - Paths: paths, +// IntelRdtfs is an options func to configure a LinuxFactory to return +// containers that use the Intel RDT "resource control" filesystem to +// create and manage Intel Xeon platform shared resources (e.g., L3 cache). +func IntelRdtFs(l *LinuxFactory) error { + l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager { + return &intelrdt.IntelRdtManager{ + Config: config, + Id: id, + Path: path, } } return nil @@ -119,12 +119,16 @@ func New(root string, options ...func(*LinuxFactory) error) (Factory, error) { } l := &LinuxFactory{ Root: root, - InitArgs: []string{"/proc/self/exe", "init"}, + InitPath: "/proc/self/exe", + InitArgs: []string{os.Args[0], "init"}, Validator: validate.New(), CriuPath: "criu", } Cgroupfs(l) for _, opt := range options { + if opt == nil { + continue + } if err := opt(l); err != nil { return nil, err } @@ -137,6 +141,10 @@ type LinuxFactory struct { // Root directory for the factory to store state. Root string + // InitPath is the path for calling the init responsibilities for spawning + // a container. + InitPath string + // InitArgs are arguments for calling the init responsibilities for spawning // a container. InitArgs []string @@ -145,11 +153,19 @@ type LinuxFactory struct { // containers. CriuPath string + // New{u,g}uidmapPath is the path to the binaries used for mapping with + // rootless containers. + NewuidmapPath string + NewgidmapPath string + // Validator provides validation to container configurations. Validator validate.Validator // NewCgroupsManager returns an initialized cgroups manager for a single container. NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager + + // NewIntelRdtManager returns an initialized Intel RDT manager for a single container. + NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager } func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { @@ -174,17 +190,20 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil { return nil, newGenericError(err, SystemError) } - if config.Rootless { - RootlessCgroups(l) - } c := &linuxContainer{ id: id, root: containerRoot, config: config, + initPath: l.InitPath, initArgs: l.InitArgs, criuPath: l.CriuPath, + newuidmapPath: l.NewuidmapPath, + newgidmapPath: l.NewgidmapPath, cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), } + if intelrdt.IsEnabled() { + c.intelRdtManager = l.NewIntelRdtManager(config, id, "") + } c.state = &stoppedState{c: c} return c, nil } @@ -203,17 +222,16 @@ func (l *LinuxFactory) Load(id string) (Container, error) { processStartTime: state.InitProcessStartTime, fds: state.ExternalDescriptors, } - // We have to use the RootlessManager. - if state.Rootless { - RootlessCgroups(l) - } c := &linuxContainer{ initProcess: r, initProcessStartTime: state.InitProcessStartTime, id: id, config: &state.Config, + initPath: l.InitPath, initArgs: l.InitArgs, criuPath: l.CriuPath, + newuidmapPath: l.NewuidmapPath, + newgidmapPath: l.NewgidmapPath, cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths), root: containerRoot, created: state.Created, @@ -222,6 +240,9 @@ func (l *LinuxFactory) Load(id string) (Container, error) { if err := c.refreshState(); err != nil { return nil, err } + if intelrdt.IsEnabled() { + c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath) + } return c, nil } @@ -323,3 +344,21 @@ func (l *LinuxFactory) validateID(id string) error { return nil } + +// NewuidmapPath returns an option func to configure a LinuxFactory with the +// provided .. +func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error { + return func(l *LinuxFactory) error { + l.NewuidmapPath = newuidmapPath + return nil + } +} + +// NewgidmapPath returns an option func to configure a LinuxFactory with the +// provided .. +func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error { + return func(l *LinuxFactory) error { + l.NewgidmapPath = newgidmapPath + return nil + } +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go index 2020bb7a..2770be30 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go @@ -12,15 +12,16 @@ import ( "syscall" // only for Errno "unsafe" + "golang.org/x/sys/unix" + + "github.com/containerd/console" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/user" "github.com/opencontainers/runc/libcontainer/utils" - "github.com/sirupsen/logrus" "github.com/vishvananda/netlink" - "golang.org/x/sys/unix" ) type initType string @@ -61,6 +62,8 @@ type initConfig struct { ContainerId string `json:"containerid"` Rlimits []configs.Rlimit `json:"rlimits"` CreateConsole bool `json:"create_console"` + ConsoleWidth uint16 `json:"console_width"` + ConsoleHeight uint16 `json:"console_height"` Rootless bool `json:"rootless"` } @@ -170,29 +173,38 @@ func setupConsole(socket *os.File, config *initConfig, mount bool) error { // however, that setupUser (specifically fixStdioPermissions) *will* change // the UID owner of the console to be the user the process will run as (so // they can actually control their console). - console, err := newConsole() + + pty, slavePath, err := console.NewPty() if err != nil { return err } - // After we return from here, we don't need the console anymore. - defer console.Close() - linuxConsole, ok := console.(*linuxConsole) - if !ok { - return fmt.Errorf("failed to cast console to *linuxConsole") + if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 { + err = pty.Resize(console.WinSize{ + Height: config.ConsoleHeight, + Width: config.ConsoleWidth, + }) + + if err != nil { + return err + } } + + // After we return from here, we don't need the console anymore. + defer pty.Close() + // Mount the console inside our rootfs. if mount { - if err := linuxConsole.mount(); err != nil { + if err := mountConsole(slavePath); err != nil { return err } } // While we can access console.master, using the API is a good idea. - if err := utils.SendFd(socket, linuxConsole.File()); err != nil { + if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil { return err } // Now, dup over all the things. - return linuxConsole.dupStdio() + return dupStdio(slavePath) } // syncParentReady sends to the given pipe a JSON payload which indicates that @@ -261,25 +273,27 @@ func setupUser(config *initConfig) error { } } + // Rather than just erroring out later in setuid(2) and setgid(2), check + // that the user is mapped here. + if _, err := config.Config.HostUID(execUser.Uid); err != nil { + return fmt.Errorf("cannot set uid to unmapped user in user namespace") + } + if _, err := config.Config.HostGID(execUser.Gid); err != nil { + return fmt.Errorf("cannot set gid to unmapped user in user namespace") + } + if config.Rootless { - if execUser.Uid != 0 { - return fmt.Errorf("cannot run as a non-root user in a rootless container") - } - - if execUser.Gid != 0 { - return fmt.Errorf("cannot run as a non-root group in a rootless container") - } - - // We cannot set any additional groups in a rootless container and thus we - // bail if the user asked us to do so. TODO: We currently can't do this - // earlier, but if libcontainer.Process.User was typesafe this might work. + // We cannot set any additional groups in a rootless container and thus + // we bail if the user asked us to do so. TODO: We currently can't do + // this check earlier, but if libcontainer.Process.User was typesafe + // this might work. if len(addGroups) > 0 { return fmt.Errorf("cannot set any additional groups in a rootless container") } } - // before we change to the container's user make sure that the processes STDIO - // is correctly owned by the user that we are switching to. + // Before we change to the container's user make sure that the processes + // STDIO is correctly owned by the user that we are switching to. if err := fixStdioPermissions(config, execUser); err != nil { return err } @@ -298,7 +312,6 @@ func setupUser(config *initConfig) error { if err := system.Setgid(execUser.Gid); err != nil { return err } - if err := system.Setuid(execUser.Uid); err != nil { return err } @@ -335,14 +348,6 @@ func fixStdioPermissions(config *initConfig, u *user.ExecUser) error { continue } - // Skip chown if s.Gid is actually an unmapped gid in the host. While - // this is a bit dodgy if it just so happens that the console _is_ - // owned by overflow_gid, there's no way for us to disambiguate this as - // a userspace program. - if _, err := config.Config.HostGID(int(s.Gid)); err != nil { - continue - } - // We only change the uid owner (as it is possible for the mount to // prefer a different gid, and there's no reason for us to change it). // The reason why we don't just leave the default uid=X mount setup is @@ -350,6 +355,15 @@ func fixStdioPermissions(config *initConfig, u *user.ExecUser) error { // this code, you couldn't effectively run as a non-root user inside a // container and also have a console set up. if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil { + // If we've hit an EINVAL then s.Gid isn't mapped in the user + // namespace. If we've hit an EPERM then the inode's current owner + // is not mapped in our user namespace (in particular, + // privileged_wrt_inode_uidgid() has failed). In either case, we + // are in a configuration where it's better for us to just not + // touch the stdio rather than bail at this point. + if err == unix.EINVAL || err == unix.EPERM { + continue + } return err } } @@ -480,6 +494,16 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error { logrus.Warn(err) } + subreaper, err := system.GetSubreaper() + if err != nil { + // The error here means that PR_GET_CHILD_SUBREAPER is not + // supported because this code might run on a kernel older + // than 3.4. We don't want to throw an error in that case, + // and we simplify things, considering there is no subreaper + // set. + subreaper = 0 + } + for _, p := range procs { if s != unix.SIGKILL { if ok, err := isWaitable(p.Pid); err != nil { @@ -493,9 +517,16 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error { } } - if _, err := p.Wait(); err != nil { - if !isNoChildren(err) { - logrus.Warn("wait: ", err) + // In case a subreaper has been setup, this code must not + // wait for the process. Otherwise, we cannot be sure the + // current process will be reaped by the subreaper, while + // the subreaper might be waiting for this process in order + // to retrieve its exit code. + if subreaper == 0 { + if _, err := p.Wait(); err != nil { + if !isNoChildren(err) { + logrus.Warn("wait: ", err) + } } } } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go new file mode 100644 index 00000000..487c630a --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go @@ -0,0 +1,553 @@ +// +build linux + +package intelrdt + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + + "github.com/opencontainers/runc/libcontainer/configs" +) + +/* + * About Intel RDT/CAT feature: + * Intel platforms with new Xeon CPU support Resource Director Technology (RDT). + * Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3 + * Cache is the only resource that is supported in RDT. + * + * This feature provides a way for the software to restrict cache allocation to a + * defined 'subset' of L3 cache which may be overlapping with other 'subsets'. + * The different subsets are identified by class of service (CLOS) and each CLOS + * has a capacity bitmask (CBM). + * + * For more information about Intel RDT/CAT can be found in the section 17.17 + * of Intel Software Developer Manual. + * + * About Intel RDT/CAT kernel interface: + * In Linux 4.10 kernel or newer, the interface is defined and exposed via + * "resource control" filesystem, which is a "cgroup-like" interface. + * + * Comparing with cgroups, it has similar process management lifecycle and + * interfaces in a container. But unlike cgroups' hierarchy, it has single level + * filesystem layout. + * + * Intel RDT "resource control" filesystem hierarchy: + * mount -t resctrl resctrl /sys/fs/resctrl + * tree /sys/fs/resctrl + * /sys/fs/resctrl/ + * |-- info + * | |-- L3 + * | |-- cbm_mask + * | |-- min_cbm_bits + * | |-- num_closids + * |-- cpus + * |-- schemata + * |-- tasks + * |-- + * |-- cpus + * |-- schemata + * |-- tasks + * + * For runc, we can make use of `tasks` and `schemata` configuration for L3 cache + * resource constraints. + * + * The file `tasks` has a list of tasks that belongs to this group (e.g., + * " group). Tasks can be added to a group by writing the task ID + * to the "tasks" file (which will automatically remove them from the previous + * group to which they belonged). New tasks created by fork(2) and clone(2) are + * added to the same group as their parent. If a pid is not in any sub group, it is + * in root group. + * + * The file `schemata` has allocation bitmasks/values for L3 cache on each socket, + * which contains L3 cache id and capacity bitmask (CBM). + * Format: "L3:=;=;..." + * For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0` + * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + * + * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can + * be set is less than the max bit. The max bits in the CBM is varied among + * supported Intel Xeon platforms. In Intel RDT "resource control" filesystem + * layout, the CBM in a group should be a subset of the CBM in root. Kernel will + * check if it is valid when writing. e.g., 0xfffff in root indicates the max bits + * of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM + * values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + * + * For more information about Intel RDT/CAT kernel interface: + * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt + * + * An example for runc: + * Consider a two-socket machine with two L3 caches where the default CBM is + * 0xfffff and the max CBM length is 20 bits. With this configuration, tasks + * inside the container only have access to the "upper" 80% of L3 cache id 0 and + * the "lower" 50% L3 cache id 1: + * + * "linux": { + * "intelRdt": { + * "l3CacheSchema": "L3:0=ffff0;1=3ff" + * } + * } + */ + +type Manager interface { + // Applies Intel RDT configuration to the process with the specified pid + Apply(pid int) error + + // Returns statistics for Intel RDT + GetStats() (*Stats, error) + + // Destroys the Intel RDT 'container_id' group + Destroy() error + + // Returns Intel RDT path to save in a state file and to be able to + // restore the object later + GetPath() string + + // Set Intel RDT "resource control" filesystem as configured. + Set(container *configs.Config) error +} + +// This implements interface Manager +type IntelRdtManager struct { + mu sync.Mutex + Config *configs.Config + Id string + Path string +} + +const ( + IntelRdtTasks = "tasks" +) + +var ( + // The absolute root path of the Intel RDT "resource control" filesystem + intelRdtRoot string + intelRdtRootLock sync.Mutex + + // The flag to indicate if Intel RDT is supported + isEnabled bool +) + +type intelRdtData struct { + root string + config *configs.Config + pid int +} + +// Check if Intel RDT is enabled in init() +func init() { + // 1. Check if hardware and kernel support Intel RDT/CAT feature + // "cat_l3" flag is set if supported + isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo") + if !isFlagSet || err != nil { + isEnabled = false + return + } + + // 2. Check if Intel RDT "resource control" filesystem is mounted + // The user guarantees to mount the filesystem + isEnabled = isIntelRdtMounted() +} + +// Return the mount point path of Intel RDT "resource control" filesysem +func findIntelRdtMountpointDir() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + text := s.Text() + fields := strings.Split(text, " ") + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + numPostFields := len(postSeparatorFields) + + // This is an error as we can't detect if the mount is for "Intel RDT" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + + if postSeparatorFields[0] == "resctrl" { + // Check that the mount is properly formated. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + return fields[4], nil + } + } + if err := s.Err(); err != nil { + return "", err + } + + return "", NewNotFoundError("Intel RDT") +} + +// Gets the root path of Intel RDT "resource control" filesystem +func getIntelRdtRoot() (string, error) { + intelRdtRootLock.Lock() + defer intelRdtRootLock.Unlock() + + if intelRdtRoot != "" { + return intelRdtRoot, nil + } + + root, err := findIntelRdtMountpointDir() + if err != nil { + return "", err + } + + if _, err := os.Stat(root); err != nil { + return "", err + } + + intelRdtRoot = root + return intelRdtRoot, nil +} + +func isIntelRdtMounted() bool { + _, err := getIntelRdtRoot() + if err != nil { + return false + } + + return true +} + +func parseCpuInfoFile(path string) (bool, error) { + f, err := os.Open(path) + if err != nil { + return false, err + } + defer f.Close() + + s := bufio.NewScanner(f) + for s.Scan() { + if err := s.Err(); err != nil { + return false, err + } + + text := s.Text() + flags := strings.Split(text, " ") + + // "cat_l3" flag is set if Intel RDT/CAT is supported + for _, flag := range flags { + if flag == "cat_l3" { + return true, nil + } + } + } + return false, nil +} + +func parseUint(s string, base, bitSize int) (uint64, error) { + value, err := strconv.ParseUint(s, base, bitSize) + if err != nil { + intValue, intErr := strconv.ParseInt(s, base, bitSize) + // 1. Handle negative values greater than MinInt64 (and) + // 2. Handle negative values lesser than MinInt64 + if intErr == nil && intValue < 0 { + return 0, nil + } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { + return 0, nil + } + + return value, err + } + + return value, nil +} + +// Gets a single uint64 value from the specified file. +func getIntelRdtParamUint(path, file string) (uint64, error) { + fileName := filepath.Join(path, file) + contents, err := ioutil.ReadFile(fileName) + if err != nil { + return 0, err + } + + res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64) + if err != nil { + return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName) + } + return res, nil +} + +// Gets a string value from the specified file +func getIntelRdtParamString(path, file string) (string, error) { + contents, err := ioutil.ReadFile(filepath.Join(path, file)) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(contents)), nil +} + +func readTasksFile(dir string) ([]int, error) { + f, err := os.Open(filepath.Join(dir, IntelRdtTasks)) + if err != nil { + return nil, err + } + defer f.Close() + + var ( + s = bufio.NewScanner(f) + out = []int{} + ) + + for s.Scan() { + if t := s.Text(); t != "" { + pid, err := strconv.Atoi(t) + if err != nil { + return nil, err + } + out = append(out, pid) + } + } + return out, nil +} + +func writeFile(dir, file, data string) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", file) + } + if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", data, file, err) + } + return nil +} + +func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) { + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + return &intelRdtData{ + root: rootPath, + config: c, + pid: pid, + }, nil +} + +// Get the read-only L3 cache information +func getL3CacheInfo() (*L3CacheInfo, error) { + l3CacheInfo := &L3CacheInfo{} + + rootPath, err := getIntelRdtRoot() + if err != nil { + return l3CacheInfo, err + } + + path := filepath.Join(rootPath, "info", "L3") + cbmMask, err := getIntelRdtParamString(path, "cbm_mask") + if err != nil { + return l3CacheInfo, err + } + minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits") + if err != nil { + return l3CacheInfo, err + } + numClosids, err := getIntelRdtParamUint(path, "num_closids") + if err != nil { + return l3CacheInfo, err + } + + l3CacheInfo.CbmMask = cbmMask + l3CacheInfo.MinCbmBits = minCbmBits + l3CacheInfo.NumClosids = numClosids + + return l3CacheInfo, nil +} + +// WriteIntelRdtTasks writes the specified pid into the "tasks" file +func WriteIntelRdtTasks(dir string, pid int) error { + if dir == "" { + return fmt.Errorf("no such directory for %s", IntelRdtTasks) + } + + // Dont attach any pid if -1 is specified as a pid + if pid != -1 { + if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil { + return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err) + } + } + return nil +} + +// Check if Intel RDT is enabled +func IsEnabled() bool { + return isEnabled +} + +// Get the 'container_id' path in Intel RDT "resource control" filesystem +func GetIntelRdtPath(id string) (string, error) { + rootPath, err := getIntelRdtRoot() + if err != nil { + return "", err + } + + path := filepath.Join(rootPath, id) + return path, nil +} + +// Applies Intel RDT configuration to the process with the specified pid +func (m *IntelRdtManager) Apply(pid int) (err error) { + // If intelRdt is not specified in config, we do nothing + if m.Config.IntelRdt == nil { + return nil + } + d, err := getIntelRdtData(m.Config, pid) + if err != nil && !IsNotFound(err) { + return err + } + + m.mu.Lock() + defer m.mu.Unlock() + path, err := d.join(m.Id) + if err != nil { + return err + } + + m.Path = path + return nil +} + +// Destroys the Intel RDT 'container_id' group +func (m *IntelRdtManager) Destroy() error { + m.mu.Lock() + defer m.mu.Unlock() + if err := os.RemoveAll(m.Path); err != nil { + return err + } + m.Path = "" + return nil +} + +// Returns Intel RDT path to save in a state file and to be able to +// restore the object later +func (m *IntelRdtManager) GetPath() string { + if m.Path == "" { + m.Path, _ = GetIntelRdtPath(m.Id) + } + return m.Path +} + +// Returns statistics for Intel RDT +func (m *IntelRdtManager) GetStats() (*Stats, error) { + // If intelRdt is not specified in config + if m.Config.IntelRdt == nil { + return nil, nil + } + + m.mu.Lock() + defer m.mu.Unlock() + stats := NewStats() + + // The read-only L3 cache information + l3CacheInfo, err := getL3CacheInfo() + if err != nil { + return nil, err + } + stats.L3CacheInfo = l3CacheInfo + + // The read-only L3 cache schema in root + rootPath, err := getIntelRdtRoot() + if err != nil { + return nil, err + } + tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata") + if err != nil { + return nil, err + } + // L3 cache schema is in the first line + schemaRootStrings := strings.Split(tmpRootStrings, "\n") + stats.L3CacheSchemaRoot = schemaRootStrings[0] + + // The L3 cache schema in 'container_id' group + tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata") + if err != nil { + return nil, err + } + // L3 cache schema is in the first line + schemaStrings := strings.Split(tmpStrings, "\n") + stats.L3CacheSchema = schemaStrings[0] + + return stats, nil +} + +// Set Intel RDT "resource control" filesystem as configured. +func (m *IntelRdtManager) Set(container *configs.Config) error { + path := m.GetPath() + + // About L3 cache schema file: + // The schema has allocation masks/values for L3 cache on each socket, + // which contains L3 cache id and capacity bitmask (CBM). + // Format: "L3:=;=;..." + // For example, on a two-socket machine, L3's schema line could be: + // L3:0=ff;1=c0 + // Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. + // + // About L3 cache CBM validity: + // The valid L3 cache CBM is a *contiguous bits set* and number of + // bits that can be set is less than the max bit. The max bits in the + // CBM is varied among supported Intel Xeon platforms. In Intel RDT + // "resource control" filesystem layout, the CBM in a group should + // be a subset of the CBM in root. Kernel will check if it is valid + // when writing. + // e.g., 0xfffff in root indicates the max bits of CBM is 20 bits, + // which mapping to entire L3 cache capacity. Some valid CBM values + // to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. + if container.IntelRdt != nil { + l3CacheSchema := container.IntelRdt.L3CacheSchema + if l3CacheSchema != "" { + if err := writeFile(path, "schemata", l3CacheSchema); err != nil { + return err + } + } + } + + return nil +} + +func (raw *intelRdtData) join(id string) (string, error) { + path := filepath.Join(raw.root, id) + if err := os.MkdirAll(path, 0755); err != nil { + return "", err + } + + if err := WriteIntelRdtTasks(path, raw.pid); err != nil { + return "", err + } + return path, nil +} + +type NotFoundError struct { + ResourceControl string +} + +func (e *NotFoundError) Error() string { + return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl) +} + +func NewNotFoundError(res string) error { + return &NotFoundError{ + ResourceControl: res, + } +} + +func IsNotFound(err error) bool { + if err == nil { + return false + } + _, ok := err.(*NotFoundError) + return ok +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go new file mode 100644 index 00000000..095c0a38 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go @@ -0,0 +1,24 @@ +// +build linux + +package intelrdt + +type L3CacheInfo struct { + CbmMask string `json:"cbm_mask,omitempty"` + MinCbmBits uint64 `json:"min_cbm_bits,omitempty"` + NumClosids uint64 `json:"num_closids,omitempty"` +} + +type Stats struct { + // The read-only L3 cache information + L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"` + + // The read-only L3 cache schema in root + L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"` + + // The L3 cache schema in 'container_id' group + L3CacheSchema string `json:"l3_cache_schema,omitempty"` +} + +func NewStats() *Stats { + return &Stats{} +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go b/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go index 82ffa7a8..ce8b4e6b 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go @@ -29,7 +29,7 @@ func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error { return err } - res := strings.Split(string(dest), ";") + res := strings.Split(dest, ";") if len(res) < 5 { return fmt.Errorf("Destination buffer for key description is too small") } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go index 8829b71a..ab453cde 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go @@ -18,6 +18,8 @@ const ( SetgroupAttr uint16 = 27285 OomScoreAdjAttr uint16 = 27286 RootlessAttr uint16 = 27287 + UidmapPathAttr uint16 = 27288 + GidmapPathAttr uint16 = 27289 ) type Int32msg struct { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go b/vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go new file mode 100644 index 00000000..e8965e08 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go @@ -0,0 +1,23 @@ +package mount + +// GetMounts retrieves a list of mounts for the current running process. +func GetMounts() ([]*Info, error) { + return parseMountTable() +} + +// Mounted looks at /proc/self/mountinfo to determine of the specified +// mountpoint has been mounted +func Mounted(mountpoint string) (bool, error) { + entries, err := parseMountTable() + if err != nil { + return false, err + } + + // Search the table for the mountpoint + for _, e := range entries { + if e.Mountpoint == mountpoint { + return true, nil + } + } + return false, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go new file mode 100644 index 00000000..1e519192 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go @@ -0,0 +1,82 @@ +// +build linux + +package mount + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" +) + +const ( + /* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue + (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) + + (1) mount ID: unique identifier of the mount (may be reused after umount) + (2) parent ID: ID of parent (or of self for the top of the mount tree) + (3) major:minor: value of st_dev for files on filesystem + (4) root: root of the mount within the filesystem + (5) mount point: mount point relative to the process's root + (6) mount options: per mount options + (7) optional fields: zero or more fields of the form "tag[:value]" + (8) separator: marks the end of the optional fields + (9) filesystem type: name of filesystem of the form "type[.subtype]" + (10) mount source: filesystem specific information or "none" + (11) super options: per super block options*/ + mountinfoFormat = "%d %d %d:%d %s %s %s %s" +) + +// Parse /proc/self/mountinfo because comparing Dev and ino does not work from +// bind mounts +func parseMountTable() ([]*Info, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return nil, err + } + defer f.Close() + + return parseInfoFile(f) +} + +func parseInfoFile(r io.Reader) ([]*Info, error) { + var ( + s = bufio.NewScanner(r) + out = []*Info{} + ) + + for s.Scan() { + if err := s.Err(); err != nil { + return nil, err + } + + var ( + p = &Info{} + text = s.Text() + optionalFields string + ) + + if _, err := fmt.Sscanf(text, mountinfoFormat, + &p.ID, &p.Parent, &p.Major, &p.Minor, + &p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil { + return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err) + } + // Safe as mountinfo encodes mountpoints with spaces as \040. + index := strings.Index(text, " - ") + postSeparatorFields := strings.Fields(text[index+3:]) + if len(postSeparatorFields) < 3 { + return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + + if optionalFields != "-" { + p.Optional = optionalFields + } + + p.Fstype = postSeparatorFields[0] + p.Source = postSeparatorFields[1] + p.VfsOpts = strings.Join(postSeparatorFields[2:], " ") + out = append(out, p) + } + return out, nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go b/vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go new file mode 100644 index 00000000..e3fc3535 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go @@ -0,0 +1,40 @@ +package mount + +// Info reveals information about a particular mounted filesystem. This +// struct is populated from the content in the /proc//mountinfo file. +type Info struct { + // ID is a unique identifier of the mount (may be reused after umount). + ID int + + // Parent indicates the ID of the mount parent (or of self for the top of the + // mount tree). + Parent int + + // Major indicates one half of the device ID which identifies the device class. + Major int + + // Minor indicates one half of the device ID which identifies a specific + // instance of device. + Minor int + + // Root of the mount within the filesystem. + Root string + + // Mountpoint indicates the mount point relative to the process's root. + Mountpoint string + + // Opts represents mount-specific options. + Opts string + + // Optional represents optional fields. + Optional string + + // Fstype indicates the type of filesystem, such as EXT3. + Fstype string + + // Source indicates filesystem specific information or "none". + Source string + + // VfsOpts represents per super block options. + VfsOpts string +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/process.go b/vendor/github.com/opencontainers/runc/libcontainer/process.go index f1ad0814..86bf7387 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/process.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/process.go @@ -47,6 +47,10 @@ type Process struct { // ExtraFiles specifies additional open files to be inherited by the container ExtraFiles []*os.File + // Initial sizings for the console + ConsoleWidth uint16 + ConsoleHeight uint16 + // Capabilities specify the capabilities to keep when executing the process inside the container // All capabilities not specified will be dropped from the processes capability mask Capabilities *configs.Capabilities diff --git a/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go index 50f9af57..149b1126 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go @@ -15,6 +15,7 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/utils" @@ -49,6 +50,7 @@ type setnsProcess struct { parentPipe *os.File childPipe *os.File cgroupPaths map[string]string + intelRdtPath string config *initConfig fds []string process *Process @@ -83,12 +85,20 @@ func (p *setnsProcess) start() (err error) { if err = p.execSetns(); err != nil { return newSystemErrorWithCause(err, "executing setns process") } - // We can't join cgroups if we're in a rootless container. - if !p.config.Rootless && len(p.cgroupPaths) > 0 { + if len(p.cgroupPaths) > 0 { if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil { return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) } } + if p.intelRdtPath != "" { + // if Intel RDT "resource control" filesystem path exists + _, err := os.Stat(p.intelRdtPath) + if err == nil { + if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil { + return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid()) + } + } + } // set rlimits, this has to be done here because we lose permissions // to raise the limits once we enter a user-namespace if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { @@ -193,16 +203,17 @@ func (p *setnsProcess) setExternalDescriptors(newFds []string) { } type initProcess struct { - cmd *exec.Cmd - parentPipe *os.File - childPipe *os.File - config *initConfig - manager cgroups.Manager - container *linuxContainer - fds []string - process *Process - bootstrapData io.Reader - sharePidns bool + cmd *exec.Cmd + parentPipe *os.File + childPipe *os.File + config *initConfig + manager cgroups.Manager + intelRdtManager intelrdt.Manager + container *linuxContainer + fds []string + process *Process + bootstrapData io.Reader + sharePidns bool } func (p *initProcess) pid() int { @@ -261,12 +272,35 @@ func (p *initProcess) start() error { p.process.ops = nil return newSystemErrorWithCause(err, "starting init process command") } + // Do this before syncing with child so that no children can escape the + // cgroup. We don't need to worry about not doing this and not being root + // because we'd be using the rootless cgroup manager in that case. + if err := p.manager.Apply(p.pid()); err != nil { + return newSystemErrorWithCause(err, "applying cgroup configuration for process") + } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Apply(p.pid()); err != nil { + return newSystemErrorWithCause(err, "applying Intel RDT configuration for process") + } + } + defer func() { + if err != nil { + // TODO: should not be the responsibility to call here + p.manager.Destroy() + if p.intelRdtManager != nil { + p.intelRdtManager.Destroy() + } + } + }() + if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil { return newSystemErrorWithCause(err, "copying bootstrap data to pipe") } + if err := p.execSetns(); err != nil { return newSystemErrorWithCause(err, "running exec setns process for init") } + // Save the standard descriptor names before the container process // can potentially move them (e.g., via dup2()). If we don't do this now, // we won't know at checkpoint time which file descriptor to look up. @@ -275,18 +309,6 @@ func (p *initProcess) start() error { return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid()) } p.setExternalDescriptors(fds) - // Do this before syncing with child so that no children can escape the - // cgroup. We don't need to worry about not doing this and not being root - // because we'd be using the rootless cgroup manager in that case. - if err := p.manager.Apply(p.pid()); err != nil { - return newSystemErrorWithCause(err, "applying cgroup configuration for process") - } - defer func() { - if err != nil { - // TODO: should not be the responsibility to call here - p.manager.Destroy() - } - }() if err := p.createNetworkInterfaces(); err != nil { return newSystemErrorWithCause(err, "creating network interfaces") } @@ -312,6 +334,11 @@ func (p *initProcess) start() error { if err := p.manager.Set(p.config.Config); err != nil { return newSystemErrorWithCause(err, "setting cgroup config for ready process") } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting Intel RDT config for ready process") + } + } if p.config.Config.Hooks != nil { s := configs.HookState{ @@ -337,6 +364,11 @@ func (p *initProcess) start() error { if err := p.manager.Set(p.config.Config); err != nil { return newSystemErrorWithCause(err, "setting cgroup config for procHooks process") } + if p.intelRdtManager != nil { + if err := p.intelRdtManager.Set(p.config.Config); err != nil { + return newSystemErrorWithCause(err, "setting Intel RDT config for procHooks process") + } + } if p.config.Config.Hooks != nil { s := configs.HookState{ Version: p.container.config.Version, diff --git a/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go index e2e734a8..eb9e0253 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go @@ -13,11 +13,11 @@ import ( "strings" "time" - "github.com/docker/docker/pkg/mount" - "github.com/docker/docker/pkg/symlink" + "github.com/cyphar/filepath-securejoin" "github.com/mrunalp/fileutils" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/opencontainers/runc/libcontainer/mount" "github.com/opencontainers/runc/libcontainer/system" libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/selinux/go-selinux/label" @@ -40,7 +40,8 @@ func needsSetupDev(config *configs.Config) bool { // prepareRootfs sets up the devices, mount points, and filesystems for use // inside a new mount namespace. It doesn't set anything as ro. You must call // finalizeRootfs after this function to finish setting up the rootfs. -func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) { +func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) { + config := iConfig.Config if err := prepareRoot(config); err != nil { return newSystemErrorWithCause(err, "preparing rootfs") } @@ -80,6 +81,7 @@ func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) { // The hooks are run after the mounts are setup, but before we switch to the new // root, so that the old root is still available in the hooks for any mount // manipulations. + // Note that iConfig.Cwd is not guaranteed to exist here. if err := syncParentHooks(pipe); err != nil { return err } @@ -111,6 +113,14 @@ func prepareRootfs(pipe io.ReadWriter, config *configs.Config) (err error) { } } + if cwd := iConfig.Cwd; cwd != "" { + // Note that spec.Process.Cwd can contain unclean value like "../../../../foo/bar...". + // However, we are safe to call MkDirAll directly because we are in the jail here. + if err := os.MkdirAll(cwd, 0755); err != nil { + return err + } + } + return nil } @@ -230,7 +240,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { // any previous mounts can invalidate the next mount's destination. // this can happen when a user specifies mounts within other mounts to cause breakouts or other // evil stuff to try to escape the container's rootfs. - if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil { + if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil { return err } if err := checkMountDestination(rootfs, dest); err != nil { @@ -318,7 +328,7 @@ func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error { // this can happen when a user specifies mounts within other mounts to cause breakouts or other // evil stuff to try to escape the container's rootfs. var err error - if dest, err = symlink.FollowSymlinkInScope(dest, rootfs); err != nil { + if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil { return err } if err := checkMountDestination(rootfs, dest); err != nil { @@ -668,9 +678,12 @@ func pivotRoot(rootfs string) error { return err } - // Make oldroot rprivate to make sure our unmounts don't propagate to the - // host (and thus bork the machine). - if err := unix.Mount("", ".", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil { + // Make oldroot rslave to make sure our unmounts don't propagate to the + // host (and thus bork the machine). We don't use rprivate because this is + // known to cause issues due to races where we still have a reference to a + // mount while a process in the host namespace are trying to operate on + // something they think has no mounts (devicemapper in particular). + if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { return err } // Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd. @@ -733,7 +746,14 @@ func remountReadonly(m *configs.Mount) error { flags = m.Flags ) for i := 0; i < 5; i++ { - if err := unix.Mount("", dest, "", uintptr(flags|unix.MS_REMOUNT|unix.MS_RDONLY), ""); err != nil { + // There is a special case in the kernel for + // MS_REMOUNT | MS_BIND, which allows us to change only the + // flags even as an unprivileged user (i.e. user namespace) + // assuming we don't drop any security related flags (nodev, + // nosuid, etc.). So, let's use that case so that we can do + // this re-mount without failing in a userns. + flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY + if err := unix.Mount("", dest, "", uintptr(flags), ""); err != nil { switch err { case unix.EBUSY: time.Sleep(100 * time.Millisecond) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go index 2523cbf9..d99f3fe6 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go @@ -22,6 +22,11 @@ var ( actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM)) ) +const ( + // Linux system calls can have at most 6 arguments + syscallMaxArguments int = 6 +) + // Filters given syscalls in a container, preventing them from being used // Started in the container init process, and carried over to all child processes // Setns calls, however, require a separate invocation, as they are not children @@ -45,11 +50,11 @@ func InitSeccomp(config *configs.Seccomp) error { for _, arch := range config.Architectures { scmpArch, err := libseccomp.GetArchFromString(arch) if err != nil { - return err + return fmt.Errorf("error validating Seccomp architecture: %s", err) } if err := filter.AddArch(scmpArch); err != nil { - return err + return fmt.Errorf("error adding architecture to seccomp filter: %s", err) } } @@ -170,29 +175,55 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { // Convert the call's action to the libseccomp equivalent callAct, err := getAction(call.Action) if err != nil { - return err + return fmt.Errorf("action in seccomp profile is invalid: %s", err) } // Unconditional match - just add the rule if len(call.Args) == 0 { if err = filter.AddRule(callNum, callAct); err != nil { - return err + return fmt.Errorf("error adding seccomp filter rule for syscall %s: %s", call.Name, err) } } else { - // Conditional match - convert the per-arg rules into library format + // If two or more arguments have the same condition, + // Revert to old behavior, adding each condition as a separate rule + argCounts := make([]uint, syscallMaxArguments) conditions := []libseccomp.ScmpCondition{} for _, cond := range call.Args { newCond, err := getCondition(cond) if err != nil { - return err + return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %s", call.Name, err) } + argCounts[cond.Index] += 1 + conditions = append(conditions, newCond) } - if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil { - return err + hasMultipleArgs := false + for _, count := range argCounts { + if count > 1 { + hasMultipleArgs = true + break + } + } + + if hasMultipleArgs { + // Revert to old behavior + // Add each condition attached to a separate rule + for _, cond := range conditions { + condArr := []libseccomp.ScmpCondition{cond} + + if err = filter.AddRuleConditional(callNum, callAct, condArr); err != nil { + return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err) + } + } + } else { + // No conditions share same argument + // Use new, proper behavior + if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil { + return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err) + } } } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go deleted file mode 100644 index c7bdb605..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/setgroups_linux.go +++ /dev/null @@ -1,11 +0,0 @@ -// +build linux,go1.5 - -package libcontainer - -import "syscall" - -// Set the GidMappingsEnableSetgroups member to true, so the process's -// setgroups proc entry wont be set to 'deny' if GidMappings are set -func enableSetgroups(sys *syscall.SysProcAttr) { - sys.GidMappingsEnableSetgroups = true -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go index 35b84219..096c601e 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go @@ -47,7 +47,10 @@ func (l *linuxSetnsInit) Init() error { return err } } - if l.config.Config.Seccomp != nil { + // Without NoNewPrivileges seccomp is a privileged operation, so we need to + // do this before dropping capabilities; otherwise do it as late as possible + // just before execve so as few syscalls take place after it as possible. + if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return err } @@ -61,5 +64,13 @@ func (l *linuxSetnsInit) Init() error { if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil { return err } + // Set seccomp as close to execve as possible, so as few syscalls take + // place afterward (reducing the amount of syscalls that users need to + // enable in their seccomp profiles). + if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { + if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { + return newSystemErrorWithCause(err, "init seccomp") + } + } return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go index fbcf3a6a..8a544ed5 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go @@ -30,15 +30,15 @@ func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) { var newperms uint32 if l.config.Config.Namespaces.Contains(configs.NEWUSER) { - // with user ns we need 'other' search permissions + // With user ns we need 'other' search permissions. newperms = 0x8 } else { - // without user ns we need 'UID' search permissions + // Without user ns we need 'UID' search permissions. newperms = 0x80000 } - // create a unique per session container name that we can - // join in setns; however, other containers can also join it + // Create a unique per session container name that we can join in setns; + // However, other containers can also join it. return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms } @@ -46,12 +46,12 @@ func (l *linuxStandardInit) Init() error { if !l.config.Config.NoNewKeyring { ringname, keepperms, newperms := l.getSessionRingParams() - // do not inherit the parent's session keyring + // Do not inherit the parent's session keyring. sessKeyId, err := keys.JoinSessionKeyring(ringname) if err != nil { return err } - // make session keyring searcheable + // Make session keyring searcheable. if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil { return err } @@ -68,7 +68,7 @@ func (l *linuxStandardInit) Init() error { // prepareRootfs() can be executed only for a new mount namespace. if l.config.Config.Namespaces.Contains(configs.NEWNS) { - if err := prepareRootfs(l.pipe, l.config.Config); err != nil { + if err := prepareRootfs(l.pipe, l.config); err != nil { return err } } @@ -150,19 +150,20 @@ func (l *linuxStandardInit) Init() error { if err := pdeath.Restore(); err != nil { return err } - // compare the parent from the initial start of the init process and make sure that it did not change. - // if the parent changes that means it died and we were reparented to something else so we should - // just kill ourself and not cause problems for someone else. + // Compare the parent from the initial start of the init process and make + // sure that it did not change. if the parent changes that means it died + // and we were reparented to something else so we should just kill ourself + // and not cause problems for someone else. if unix.Getppid() != l.parentPid { return unix.Kill(unix.Getpid(), unix.SIGKILL) } - // check for the arg before waiting to make sure it exists and it is returned - // as a create time error. + // Check for the arg before waiting to make sure it exists and it is + // returned as a create time error. name, err := exec.LookPath(l.config.Args[0]) if err != nil { return err } - // close the pipe to signal that we have completed our init. + // Close the pipe to signal that we have completed our init. l.pipe.Close() // Wait for the FIFO to be opened on the other side before exec-ing the // user process. We open it through /proc/self/fd/$fd, because the fd that @@ -170,19 +171,26 @@ func (l *linuxStandardInit) Init() error { // re-open an O_PATH fd through /proc. fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0) if err != nil { - return newSystemErrorWithCause(err, "openat exec fifo") + return newSystemErrorWithCause(err, "open exec fifo") } if _, err := unix.Write(fd, []byte("0")); err != nil { return newSystemErrorWithCause(err, "write 0 exec fifo") } + // Close the O_PATH fifofd fd before exec because the kernel resets + // dumpable in the wrong order. This has been fixed in newer kernels, but + // we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels. + // N.B. the core issue itself (passing dirfds to the host filesystem) has + // since been resolved. + // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318 + unix.Close(l.fifoFd) + // Set seccomp as close to execve as possible, so as few syscalls take + // place afterward (reducing the amount of syscalls that users need to + // enable in their seccomp profiles). if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { return newSystemErrorWithCause(err, "init seccomp") } } - // close the statedir fd before exec because the kernel resets dumpable in the wrong order - // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318 - unix.Close(l.fifoFd) if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil { return newSystemErrorWithCause(err, "exec user process") } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go index 44fa6b43..1f8c5e71 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go @@ -45,6 +45,11 @@ func destroy(c *linuxContainer) error { } } err := c.cgroupManager.Destroy() + if c.intelRdtManager != nil { + if ierr := c.intelRdtManager.Destroy(); err == nil { + err = ierr + } + } if rerr := os.RemoveAll(c.root); err == nil { err = rerr } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go b/vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go deleted file mode 100644 index f8d1d689..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/stats_freebsd.go +++ /dev/null @@ -1,5 +0,0 @@ -package libcontainer - -type Stats struct { - Interfaces []*NetworkInterface -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go index c629dc67..29fd641e 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go @@ -1,8 +1,10 @@ package libcontainer import "github.com/opencontainers/runc/libcontainer/cgroups" +import "github.com/opencontainers/runc/libcontainer/intelrdt" type Stats struct { - Interfaces []*NetworkInterface - CgroupStats *cgroups.Stats + Interfaces []*NetworkInterface + CgroupStats *cgroups.Stats + IntelRdtStats *intelrdt.Stats } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go b/vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go deleted file mode 100644 index da78c1c2..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/stats_solaris.go +++ /dev/null @@ -1,7 +0,0 @@ -package libcontainer - -// Solaris - TODO - -type Stats struct { - Interfaces []*NetworkInterface -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go deleted file mode 100644 index f8d1d689..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/stats_windows.go +++ /dev/null @@ -1,5 +0,0 @@ -package libcontainer - -type Stats struct { - Interfaces []*NetworkInterface -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go index 4837085a..5f124cd8 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go @@ -134,3 +134,14 @@ func RunningInUserNS() bool { func SetSubreaper(i int) error { return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) } + +// GetSubreaper returns the subreaper setting for the calling process +func GetSubreaper() (int, error) { + var i uintptr + + if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil { + return -1, err + } + + return int(i), nil +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_32.go similarity index 93% rename from vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go rename to vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_32.go index 31ff3deb..c5ca5d86 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_32.go @@ -1,4 +1,5 @@ -// +build linux,arm +// +build linux +// +build 386 arm package system diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go deleted file mode 100644 index 3f7235ed..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go +++ /dev/null @@ -1,25 +0,0 @@ -// +build linux,386 - -package system - -import ( - "golang.org/x/sys/unix" -) - -// Setuid sets the uid of the calling thread to the specified uid. -func Setuid(uid int) (err error) { - _, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0) - if e1 != 0 { - err = e1 - } - return -} - -// Setgid sets the gid of the calling thread to the specified gid. -func Setgid(gid int) (err error) { - _, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0) - if e1 != 0 { - err = e1 - } - return -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go index d7891a2f..11c3faaf 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go @@ -1,4 +1,5 @@ -// +build linux,arm64 linux,amd64 linux,ppc linux,ppc64 linux,ppc64le linux,s390x +// +build linux +// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le s390x package system diff --git a/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go b/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go index b3a07cba..b8434f10 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/system/sysconfig.go @@ -1,4 +1,4 @@ -// +build cgo,linux cgo,freebsd +// +build cgo,linux package system diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unsupported.go deleted file mode 100644 index 4a8d00ac..00000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unsupported.go +++ /dev/null @@ -1,38 +0,0 @@ -// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris - -package user - -import ( - "io" - "syscall" -) - -func GetPasswdPath() (string, error) { - return "", ErrUnsupported -} - -func GetPasswd() (io.ReadCloser, error) { - return nil, ErrUnsupported -} - -func GetGroupPath() (string, error) { - return "", ErrUnsupported -} - -func GetGroup() (io.ReadCloser, error) { - return nil, ErrUnsupported -} - -// CurrentUser looks up the current user by their user id in /etc/passwd. If the -// user cannot be found (or there is no /etc/passwd file on the filesystem), -// then CurrentUser returns an error. -func CurrentUser() (User, error) { - return LookupUid(syscall.Getuid()) -} - -// CurrentGroup looks up the current user's group by their primary group id's -// entry in /etc/passwd. If the group cannot be found (or there is no -// /etc/group file on the filesystem), then CurrentGroup returns an error. -func CurrentGroup() (Group, error) { - return LookupGid(syscall.Getgid()) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go index 2cbb6491..c8a9364d 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go @@ -84,12 +84,10 @@ func RecvFd(socket *os.File) (*os.File, error) { // addition, the file.Name() of the given file will also be sent as // non-auxiliary data in the same payload (allowing to send contextual // information for a file descriptor). -func SendFd(socket, file *os.File) error { - name := []byte(file.Name()) +func SendFd(socket *os.File, name string, fd uintptr) error { if len(name) >= MaxNameLen { - return fmt.Errorf("sendfd: filename too long: %s", file.Name()) + return fmt.Errorf("sendfd: filename too long: %s", name) } - oob := unix.UnixRights(int(file.Fd())) - - return unix.Sendmsg(int(socket.Fd()), name, oob, nil, 0) + oob := unix.UnixRights(int(fd)) + return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0) }