handle the new 5.0 linux format for oom messages
This commit is contained in:
parent
820136cdba
commit
f6f5fd21b4
@ -26,7 +26,9 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
containerRegexp = regexp.MustCompile(`Task in (.*) killed as a result of limit of (.*)`)
|
||||
legacyContainerRegexp = regexp.MustCompile(`Task in (.*) killed as a result of limit of (.*)`)
|
||||
// Starting in 5.0 linux kernels, the OOM message changed
|
||||
containerRegexp = regexp.MustCompile(`oom-kill:constraint=(.*),nodemask=(.*),cpuset=(.*),mems_allowed=(.*),oom_memcg=(.*) (.*),task_memcg=(.*),task=(.*),pid=(.*),uid=(.*)`)
|
||||
lastLineRegexp = regexp.MustCompile(`Killed process ([0-9]+) \((.+)\)`)
|
||||
firstLineRegexp = regexp.MustCompile(`invoked oom-killer:`)
|
||||
)
|
||||
@ -51,11 +53,14 @@ type OomInstance struct {
|
||||
// the absolute name of the container that was killed
|
||||
// due to the OOM.
|
||||
VictimContainerName string
|
||||
// the constraint that triggered the OOM. One of CONSTRAINT_NONE,
|
||||
// CONSTRAINT_CPUSET, CONSTRAINT_MEMORY_POLICY, CONSTRAINT_MEMCG
|
||||
Constraint string
|
||||
}
|
||||
|
||||
// gets the container name from a line and adds it to the oomInstance.
|
||||
func getContainerName(line string, currentOomInstance *OomInstance) error {
|
||||
parsedLine := containerRegexp.FindStringSubmatch(line)
|
||||
func getLegacyContainerName(line string, currentOomInstance *OomInstance) error {
|
||||
parsedLine := legacyContainerRegexp.FindStringSubmatch(line)
|
||||
if parsedLine == nil {
|
||||
return nil
|
||||
}
|
||||
@ -64,6 +69,25 @@ func getContainerName(line string, currentOomInstance *OomInstance) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// gets the container name from a line and adds it to the oomInstance.
|
||||
func getContainerName(line string, currentOomInstance *OomInstance) (bool, error) {
|
||||
parsedLine := containerRegexp.FindStringSubmatch(line)
|
||||
if parsedLine == nil {
|
||||
// Fall back to the legacy format if it isn't found here.
|
||||
return false, getLegacyContainerName(line, currentOomInstance)
|
||||
}
|
||||
currentOomInstance.ContainerName = parsedLine[7]
|
||||
currentOomInstance.VictimContainerName = parsedLine[5]
|
||||
currentOomInstance.Constraint = parsedLine[1]
|
||||
pid, err := strconv.Atoi(parsedLine[9])
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
currentOomInstance.Pid = pid
|
||||
currentOomInstance.ProcessName = parsedLine[8]
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// gets the pid, name, and date from a line and adds it to oomInstance
|
||||
func getProcessNamePid(line string, currentOomInstance *OomInstance) (bool, error) {
|
||||
reList := lastLineRegexp.FindStringSubmatch(line)
|
||||
@ -106,13 +130,15 @@ func (self *OomParser) StreamOoms(outStream chan<- *OomInstance) {
|
||||
TimeOfDeath: msg.Timestamp,
|
||||
}
|
||||
for msg := range kmsgEntries {
|
||||
err := getContainerName(msg.Message, oomCurrentInstance)
|
||||
finished, err := getContainerName(msg.Message, oomCurrentInstance)
|
||||
if err != nil {
|
||||
klog.Errorf("%v", err)
|
||||
}
|
||||
finished, err := getProcessNamePid(msg.Message, oomCurrentInstance)
|
||||
if err != nil {
|
||||
klog.Errorf("%v", err)
|
||||
if !finished {
|
||||
finished, err = getProcessNamePid(msg.Message, oomCurrentInstance)
|
||||
if err != nil {
|
||||
klog.Errorf("%v", err)
|
||||
}
|
||||
}
|
||||
if finished {
|
||||
oomCurrentInstance.TimeOfDeath = msg.Timestamp
|
||||
|
@ -23,23 +23,33 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
const startLine = "ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0"
|
||||
const endLine = "Killed process 19667 (evil-program2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB"
|
||||
const containerLine = "Task in /mem2 killed as a result of limit of /mem3"
|
||||
const (
|
||||
startLine = "ruby invoked oom-killer: gfp_mask=0x201da, order=0, oom_score_adj=0"
|
||||
endLine = "Killed process 19667 (evil-program2) total-vm:1460016kB, anon-rss:1414008kB, file-rss:4kB"
|
||||
legacyContainerLine = "Task in /mem2 killed as a result of limit of /mem3"
|
||||
containerLine = "oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null),cpuset=ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8,mems_allowed=0,oom_memcg=/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907 c-b2ec8e25d012,task_memcg=/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907c-b2ec8e25d012/ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8,task=manager,pid=966,uid=0"
|
||||
)
|
||||
|
||||
func TestGetContainerName(t *testing.T) {
|
||||
func TestGetLegacyContainerName(t *testing.T) {
|
||||
currentOomInstance := new(OomInstance)
|
||||
err := getContainerName(startLine, currentOomInstance)
|
||||
finished, err := getContainerName(startLine, currentOomInstance)
|
||||
if err != nil {
|
||||
t.Errorf("bad line fed to getContainerName should yield no error, but had error %v", err)
|
||||
}
|
||||
if finished {
|
||||
t.Errorf("bad line fed to getContainerName should not result in a finished oom log, but it did")
|
||||
}
|
||||
if currentOomInstance.ContainerName != "" {
|
||||
t.Errorf("bad line fed to getContainerName yielded no container name but set it to %s", currentOomInstance.ContainerName)
|
||||
}
|
||||
err = getContainerName(containerLine, currentOomInstance)
|
||||
finished, err = getContainerName(legacyContainerLine, currentOomInstance)
|
||||
if err != nil {
|
||||
t.Errorf("container line fed to getContainerName should yield no error, but had error %v", err)
|
||||
}
|
||||
if finished {
|
||||
t.Errorf("getContainerName with the legacy log line should not result in a finished oom log, but it did")
|
||||
|
||||
}
|
||||
if currentOomInstance.ContainerName != "/mem2" {
|
||||
t.Errorf("getContainerName should have set containerName to /mem2, not %s", currentOomInstance.ContainerName)
|
||||
}
|
||||
@ -48,6 +58,43 @@ func TestGetContainerName(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetContainerName(t *testing.T) {
|
||||
currentOomInstance := new(OomInstance)
|
||||
finished, err := getContainerName(startLine, currentOomInstance)
|
||||
if err != nil {
|
||||
t.Errorf("bad line fed to getContainerName should yield no error, but had error %v", err)
|
||||
}
|
||||
if finished {
|
||||
t.Errorf("bad line fed to getContainerName should not result in a finished oom log, but it did")
|
||||
}
|
||||
if currentOomInstance.ContainerName != "" {
|
||||
t.Errorf("bad line fed to getContainerName yielded no container name but set it to %s", currentOomInstance.ContainerName)
|
||||
}
|
||||
finished, err = getContainerName(containerLine, currentOomInstance)
|
||||
if err != nil {
|
||||
t.Errorf("container line fed to getContainerName should yield no error, but had error %v", err)
|
||||
}
|
||||
if !finished {
|
||||
t.Errorf("getContainerName with the complete log line should result in a finished oom log, but it did not")
|
||||
|
||||
}
|
||||
if currentOomInstance.ContainerName != "/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907c-b2ec8e25d012/ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8" {
|
||||
t.Errorf("getContainerName should have set containerName to /kubepods/burstable/podfbdfe8e3-1c87-4ff2-907c-b2ec8e25d012/ef807430361e6e82b45db92e2e9b6fbec98f419b12c591e655c1a725565e73a8, not %s", currentOomInstance.ContainerName)
|
||||
}
|
||||
if currentOomInstance.VictimContainerName != "/kubepods/burstable/podfbdfe8e3-1c87-4ff2-907" {
|
||||
t.Errorf("getContainerName should have set victimContainerName to /kubepods/burstable/podfbdfe8e3-1c87-4ff2-907, not %s", currentOomInstance.VictimContainerName)
|
||||
}
|
||||
if currentOomInstance.Pid != 966 {
|
||||
t.Errorf("getContainerName should have set Pid to 966, not %d", currentOomInstance.Pid)
|
||||
}
|
||||
if currentOomInstance.ProcessName != "manager" {
|
||||
t.Errorf("getContainerName should have set ProcessName to manager, not %s", currentOomInstance.ProcessName)
|
||||
}
|
||||
if currentOomInstance.Constraint != "CONSTRAINT_MEMCG" {
|
||||
t.Errorf("getContainerName should have set ProcessName to CONSTRAINT_MEMCG, not %s", currentOomInstance.Constraint)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetProcessNamePid(t *testing.T) {
|
||||
currentOomInstance := new(OomInstance)
|
||||
couldParseLine, err := getProcessNamePid(startLine, currentOomInstance)
|
||||
|
Loading…
Reference in New Issue
Block a user