// Copyright 2015 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package main import ( "bufio" "bytes" "encoding/json" "errors" "flag" "fmt" "io/ioutil" "net/http" "os" "os/exec" "path" "regexp" "strconv" "strings" "sync" "time" "github.com/google/cadvisor/integration/common" cadvisorApi "github.com/google/cadvisor/info/v2" "github.com/golang/glog" ) const cadvisorBinary = "cadvisor" var cadvisorTimeout = flag.Duration("cadvisor_timeout", 15*time.Second, "Time to wait for cAdvisor to come up on the remote host") var port = flag.Int("port", 8080, "Port in which to start cAdvisor in the remote host") var testRetryCount = flag.Int("test-retry-count", 3, "Number of times to retry failed tests before failing.") var testRetryWhitelist = flag.String("test-retry-whitelist", "", "Path to newline separated list of regexexp for test failures that should be retried. If empty, no tests are retried.") var retryRegex *regexp.Regexp func getAttributes(ipAddress, portStr string) (*cadvisorApi.Attributes, error) { // Get host attributes and log attributes if the tests fail. var attributes cadvisorApi.Attributes resp, err := http.Get(fmt.Sprintf("http://%s:%s/api/v2.1/attributes", ipAddress, portStr)) if err != nil { return nil, fmt.Errorf("failed to get attributes - %v", err) } if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("failed to get attributes. Status code - %v", resp.StatusCode) } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("unable to read attributes response body - %v", err) } if err := json.Unmarshal(body, &attributes); err != nil { return nil, fmt.Errorf("failed to unmarshal attributes - %v", err) } return &attributes, nil } func RunCommand(cmd string, args ...string) error { output, err := exec.Command(cmd, args...).CombinedOutput() if err != nil { return fmt.Errorf("command %q %q failed with error: %v and output: %q", cmd, args, err, output) } return nil } func PushAndRunTests(host, testDir string) error { // Push binary. glog.Infof("Pushing cAdvisor binary to %q...", host) args := common.GetGCComputeArgs("ssh", host, "--", "mkdir", "-p", testDir) err := RunCommand("gcloud", args...) if err != nil { return fmt.Errorf("failed to make remote testing directory: %v", err) } defer func() { args := common.GetGCComputeArgs("ssh", host, "--", "rm", "-rf", testDir) err := RunCommand("gcloud", args...) if err != nil { glog.Errorf("Failed to cleanup test directory: %v", err) } }() args = common.GetGCComputeArgs("copy-files", cadvisorBinary, fmt.Sprintf("%s:%s", host, testDir)) err = RunCommand("gcloud", args...) if err != nil { return fmt.Errorf("failed to copy binary: %v", err) } // Start cAdvisor. glog.Infof("Running cAdvisor on %q...", host) portStr := strconv.Itoa(*port) errChan := make(chan error) go func() { args = common.GetGCComputeArgs("ssh", host, "--", fmt.Sprintf("sudo %s --port %s --logtostderr &> %s/log.txt", path.Join(testDir, cadvisorBinary), portStr, testDir)) err = RunCommand("gcloud", args...) if err != nil { errChan <- fmt.Errorf("error running cAdvisor: %v", err) } }() defer func() { args = common.GetGCComputeArgs("ssh", host, "--", "sudo", "pkill", cadvisorBinary) err := RunCommand("gcloud", args...) if err != nil { glog.Errorf("Failed to cleanup: %v", err) } }() ipAddress, err := common.GetGceIp(host) if err != nil { return fmt.Errorf("failed to get GCE IP: %v", err) } // Wait for cAdvisor to come up. endTime := time.Now().Add(*cadvisorTimeout) done := false for endTime.After(time.Now()) && !done { select { case err := <-errChan: // Quit early if there was an error. return err case <-time.After(500 * time.Millisecond): // Stop waiting when cAdvisor is healthy.. resp, err := http.Get(fmt.Sprintf("http://%s:%s/healthz", ipAddress, portStr)) if err == nil && resp.StatusCode == http.StatusOK { done = true break } } } if !done { return fmt.Errorf("timed out waiting for cAdvisor to come up at host %q", host) } // Get attributes for debugging purposes. attributes, err := getAttributes(ipAddress, portStr) if err != nil { return fmt.Errorf("%v - %q", err, host) } // Run the tests in a retry loop. glog.Infof("Running integration tests targeting %q...", host) for i := 0; i <= *testRetryCount; i++ { // Check if this is a retry if i > 0 { time.Sleep(time.Second * 15) // Wait 15 seconds before retrying glog.Warningf("Retrying (%d of %d) tests on host %s due to error %v", i, *testRetryCount, host, err) } // Run the command err = RunCommand("godep", "go", "test", "github.com/google/cadvisor/integration/tests/...", "--host", host, "--port", portStr) if err == nil { // On success, break out of retry loop break } // Only retry on test failures caused by these known flaky failure conditions if retryRegex == nil || !retryRegex.Match([]byte(err.Error())) { glog.Warningf("Skipping retry for tests on host %s because error is not whitelisted: %s", host, err.Error()) break } } if err != nil { // Copy logs from the host args = common.GetGCComputeArgs("copy-files", fmt.Sprintf("%s:%s/log.txt", host, testDir), "./") err = RunCommand("gcloud", args...) if err != nil { return fmt.Errorf("error fetching logs: %v", err) } defer os.Remove("./log.txt") logs, err := ioutil.ReadFile("./log.txt") if err != nil { return fmt.Errorf("error reading local log file: %v", err) } glog.Errorf("%v", string(logs)) err = fmt.Errorf("error on host %s: %v\n%+v", host, err, attributes) } return err } func Run() error { start := time.Now() defer func() { glog.Infof("Execution time %v", time.Since(start)) }() defer glog.Flush() hosts := flag.Args() testDir := fmt.Sprintf("/tmp/cadvisor-%d", os.Getpid()) glog.Infof("Running integration tests on host(s) %q", strings.Join(hosts, ",")) // Build cAdvisor. glog.Infof("Building cAdvisor...") err := RunCommand("godep", "go", "build", "github.com/google/cadvisor") if err != nil { return err } defer func() { err := RunCommand("rm", cadvisorBinary) if err != nil { glog.Error(err) } }() // Run test on all hosts in parallel. var wg sync.WaitGroup allErrors := make([]error, 0) var allErrorsLock sync.Mutex for _, host := range hosts { wg.Add(1) go func(host string) { defer wg.Done() err := PushAndRunTests(host, testDir) if err != nil { func() { allErrorsLock.Lock() defer allErrorsLock.Unlock() allErrors = append(allErrors, err) }() } }(host) } wg.Wait() if len(allErrors) != 0 { var buffer bytes.Buffer for i, err := range allErrors { buffer.WriteString(fmt.Sprintf("Error %d: ", i)) buffer.WriteString(err.Error()) buffer.WriteString("\n") } return errors.New(buffer.String()) } glog.Infof("All tests pass!") return nil } // initRetryWhitelist initializes the whitelist of test failures that can be retried. func initRetryWhitelist() { if *testRetryWhitelist == "" { return } file, err := os.Open(*testRetryWhitelist) if err != nil { glog.Fatal(err) } defer file.Close() retryStrings := []string{} scanner := bufio.NewScanner(file) for scanner.Scan() { text := scanner.Text() if text != "" { retryStrings = append(retryStrings, text) } } if err := scanner.Err(); err != nil { glog.Fatal(err) } retryRegex = regexp.MustCompile(strings.Join(retryStrings, "|")) } func main() { flag.Parse() // Check usage. if len(flag.Args()) == 0 { glog.Fatalf("USAGE: runner ") } initRetryWhitelist() // Run the tests. err := Run() if err != nil { glog.Fatal(err) } }