cadvisor/integration/runner/runner.go
Vishnu kannan 38ea12a533 update scripts and docs to not mention godep for builds
Signed-off-by: Vishnu kannan <vishnuk@google.com>
2016-07-11 12:40:05 -07:00

297 lines
8.3 KiB
Go

// Copyright 2015 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"bufio"
"bytes"
"encoding/json"
"errors"
"flag"
"fmt"
"io/ioutil"
"net/http"
"os"
"os/exec"
"path"
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/golang/glog"
cadvisorApi "github.com/google/cadvisor/info/v2"
)
// must be able to ssh into hosts without password
// go run ./integration/runner/runner.go --logtostderr --v 2 --ssh-config <.ssh/config file> <list of hosts>
const (
cadvisorBinary = "cadvisor"
testTimeout = 15 * time.Minute
)
var cadvisorTimeout = flag.Duration("cadvisor_timeout", 15*time.Second, "Time to wait for cAdvisor to come up on the remote host")
var port = flag.Int("port", 8080, "Port in which to start cAdvisor in the remote host")
var testRetryCount = flag.Int("test-retry-count", 3, "Number of times to retry failed tests before failing.")
var testRetryWhitelist = flag.String("test-retry-whitelist", "", "Path to newline separated list of regexexp for test failures that should be retried. If empty, no tests are retried.")
var sshOptions = flag.String("ssh-options", "", "Commandline options passed to ssh.")
var retryRegex *regexp.Regexp
func getAttributes(ipAddress, portStr string) (*cadvisorApi.Attributes, error) {
// Get host attributes and log attributes if the tests fail.
var attributes cadvisorApi.Attributes
resp, err := http.Get(fmt.Sprintf("http://%s:%s/api/v2.1/attributes", ipAddress, portStr))
if err != nil {
return nil, fmt.Errorf("failed to get attributes - %v", err)
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to get attributes. Status code - %v", resp.StatusCode)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("unable to read attributes response body - %v", err)
}
if err := json.Unmarshal(body, &attributes); err != nil {
return nil, fmt.Errorf("failed to unmarshal attributes - %v", err)
}
return &attributes, nil
}
func RunCommand(cmd string, args ...string) error {
output, err := exec.Command(cmd, args...).CombinedOutput()
if err != nil {
return fmt.Errorf("command %q %q failed with error: %v and output: %s", cmd, args, err, output)
}
return nil
}
func RunSshCommand(cmd string, args ...string) error {
if *sshOptions != "" {
args = append(strings.Split(*sshOptions, " "), args...)
}
return RunCommand(cmd, args...)
}
func PushAndRunTests(host, testDir string) (result error) {
// Push binary.
glog.Infof("Pushing cAdvisor binary to %q...", host)
err := RunSshCommand("ssh", host, "--", "mkdir", "-p", testDir)
if err != nil {
return fmt.Errorf("failed to make remote testing directory: %v", err)
}
defer func() {
err = RunSshCommand("ssh", host, "--", "rm", "-rf", testDir)
if err != nil {
glog.Errorf("Failed to cleanup test directory: %v", err)
}
}()
err = RunSshCommand("scp", "-r", cadvisorBinary, fmt.Sprintf("%s:%s", host, testDir))
if err != nil {
return fmt.Errorf("failed to copy binary: %v", err)
}
// Start cAdvisor.
glog.Infof("Running cAdvisor on %q...", host)
portStr := strconv.Itoa(*port)
errChan := make(chan error)
go func() {
err = RunSshCommand("ssh", host, "--", fmt.Sprintf("sudo GORACE='halt_on_error=1' %s --port %s --logtostderr --docker_env_metadata_whitelist=TEST_VAR &> %s/log.txt", path.Join(testDir, cadvisorBinary), portStr, testDir))
if err != nil {
errChan <- fmt.Errorf("error running cAdvisor: %v", err)
}
}()
defer func() {
err = RunSshCommand("ssh", host, "--", "sudo", "pkill", cadvisorBinary)
if err != nil {
glog.Errorf("Failed to cleanup: %v", err)
}
}()
defer func() {
if result != nil {
// Copy logs from the host
err := RunSshCommand("scp", fmt.Sprintf("%s:%s/log.txt", host, testDir), "./")
if err != nil {
result = fmt.Errorf("error fetching logs: %v for %v", err, result)
return
}
defer os.Remove("./log.txt")
logs, err := ioutil.ReadFile("./log.txt")
if err != nil {
result = fmt.Errorf("error reading local log file: %v for %v", err, result)
return
}
glog.Errorf("----------------------\nLogs from Host: %q\n%v\n", host, string(logs))
// Get attributes for debugging purposes.
attributes, err := getAttributes(host, portStr)
if err != nil {
glog.Errorf("Failed to read host attributes: %v", err)
}
result = fmt.Errorf("error on host %s: %v\n%+v", host, result, attributes)
}
}()
// Wait for cAdvisor to come up.
endTime := time.Now().Add(*cadvisorTimeout)
done := false
for endTime.After(time.Now()) && !done {
select {
case err := <-errChan:
// Quit early if there was an error.
return err
case <-time.After(500 * time.Millisecond):
// Stop waiting when cAdvisor is healthy..
resp, err := http.Get(fmt.Sprintf("http://%s:%s/healthz", host, portStr))
if err == nil && resp.StatusCode == http.StatusOK {
done = true
break
}
}
}
if !done {
return fmt.Errorf("timed out waiting for cAdvisor to come up at host %q", host)
}
// Run the tests in a retry loop.
glog.Infof("Running integration tests targeting %q...", host)
for i := 0; i <= *testRetryCount; i++ {
// Check if this is a retry
if i > 0 {
time.Sleep(time.Second * 15) // Wait 15 seconds before retrying
glog.Warningf("Retrying (%d of %d) tests on host %s due to error %v", i, *testRetryCount, host, err)
}
// Run the command
err = RunCommand("go", "test", "--timeout", testTimeout.String(), "github.com/google/cadvisor/integration/tests/...", "--host", host, "--port", portStr, "--ssh-options", *sshOptions)
if err == nil {
// On success, break out of retry loop
break
}
// Only retry on test failures caused by these known flaky failure conditions
if retryRegex == nil || !retryRegex.Match([]byte(err.Error())) {
glog.Warningf("Skipping retry for tests on host %s because error is not whitelisted", host)
break
}
}
return err
}
func Run() error {
start := time.Now()
defer func() {
glog.Infof("Execution time %v", time.Since(start))
}()
defer glog.Flush()
hosts := flag.Args()
testDir := fmt.Sprintf("/tmp/cadvisor-%d", os.Getpid())
glog.Infof("Running integration tests on host(s) %q", strings.Join(hosts, ","))
// Build cAdvisor.
glog.Infof("Building cAdvisor...")
err := RunCommand("build/build.sh")
if err != nil {
return err
}
defer func() {
err := RunCommand("rm", cadvisorBinary)
if err != nil {
glog.Error(err)
}
}()
// Run test on all hosts in parallel.
var wg sync.WaitGroup
allErrors := make([]error, 0)
var allErrorsLock sync.Mutex
for _, host := range hosts {
wg.Add(1)
go func(host string) {
defer wg.Done()
err := PushAndRunTests(host, testDir)
if err != nil {
func() {
allErrorsLock.Lock()
defer allErrorsLock.Unlock()
allErrors = append(allErrors, err)
}()
}
}(host)
}
wg.Wait()
if len(allErrors) != 0 {
var buffer bytes.Buffer
for i, err := range allErrors {
buffer.WriteString(fmt.Sprintf("Error %d: ", i))
buffer.WriteString(err.Error())
buffer.WriteString("\n")
}
return errors.New(buffer.String())
}
glog.Infof("All tests pass!")
return nil
}
// initRetryWhitelist initializes the whitelist of test failures that can be retried.
func initRetryWhitelist() {
if *testRetryWhitelist == "" {
return
}
file, err := os.Open(*testRetryWhitelist)
if err != nil {
glog.Fatal(err)
}
defer file.Close()
retryStrings := []string{}
scanner := bufio.NewScanner(file)
for scanner.Scan() {
text := scanner.Text()
if text != "" {
retryStrings = append(retryStrings, text)
}
}
if err := scanner.Err(); err != nil {
glog.Fatal(err)
}
retryRegex = regexp.MustCompile(strings.Join(retryStrings, "|"))
}
func main() {
flag.Parse()
// Check usage.
if len(flag.Args()) == 0 {
glog.Fatalf("USAGE: runner <hosts to test>")
}
initRetryWhitelist()
// Run the tests.
err := Run()
if err != nil {
glog.Fatal(err)
}
}