Open Bug 1361362 Opened 3 years ago Updated 9 months ago

Treeherder index missing lots of TH data

Categories

(Testing :: ActiveData, defect)

defect
Not set
normal

Tracking

(Not tracked)

People

(Reporter: ekyle, Assigned: ekyle)

References

Details

There is the issue of missing data in the historical record, mentioned in bug 1296634, but it seems there are other missing records too.

For example, why is job.id==84375661 not in ActiveData?

Hopefully this example will reveal why the historical record does not load when asked to.
Another example of missing data

{
	"from":"treeherder",
	"edges":[
		"job.type.symbol",
		{
			"value":"action.start_time",
			"domain":{
				"type":"time",
				"min":"today-13week",
				"max":"today",
				"interval":"week"
			}
		}
	],
	"where":{"and":[
		{"eq":{"run.state":"completed"}},
		{"in":{"job.type.group.symbol":["tc-L10n","L10n"]}},
		{"gte":{"action.start_time":{"date":"today-2week"}}},
		{"regex":{"job.type.symbol":"N."}},
		{"eq":{"repo.branch.name":"mozilla-central"}}
	]},
	"select":[
		{"value":"action.duration","aggregate":"average"},
		{"aggregate":"count"}
	],
	"format":"cube"
}

vs

SELECT WEEKOFYEAR(job.submit_time) as week, 
    r.build_platform,
    sum(CASE WHEN r.job_type_symbol='N1' THEN job.end_time-job.start_time END) AS N1,
    sum(CASE WHEN r.job_type_symbol='N2' THEN job.end_time-job.start_time END) AS N2,
    sum(CASE WHEN r.job_type_symbol='N3' THEN job.end_time-job.start_time END) AS N3,
    sum(CASE WHEN r.job_type_symbol='N4' THEN job.end_time-job.start_time END) AS N4,
    sum(CASE WHEN r.job_type_symbol='N5' THEN job.end_time-job.start_time END) AS N5,
    sum(CASE WHEN r.job_type_symbol='N6' THEN job.end_time-job.start_time END) AS N6,
    sum(CASE WHEN r.job_type_symbol='N7' THEN job.end_time-job.start_time END) AS N7,
    sum(CASE WHEN r.job_type_symbol='N8' THEN job.end_time-job.start_time END) AS N8,
    sum(CASE WHEN r.job_type_symbol='N9' THEN job.end_time-job.start_time END) AS N9,
    sum(CASE WHEN r.job_type_symbol='N10' THEN job.end_time-job.start_time END) AS N10,
    count(1) "count",
    sum(job.end_time-job.start_time) total_duration,
    sum(job.end_time-job.start_time)/count(1) average_duration,
    max(job.end_time-job.start_time) max_duration
FROM reference_data_signatures r
left join job on job.signature_id = r.id
WHERE job.repository_id = 1 AND 
     r.job_group_symbol IN ('tc-L10n', 'L10n') AND
     r.job_type_symbol IN ('N1', 'N2', 'N3', 'N4', 'N5', 'N6', 'N7', 'N8', 'N9', 'N10') AND
     job.result = 'success' AND
     DATE(job.submit_time) > SUBDATE(CURDATE(), INTERVAL 16 MONTH)
GROUP BY 
r.build_platform, 
WEEKOFYEAR(job.submit_time) desc
ORDER BY 
WEEKOFYEAR(job.submit_time) desc
See Also: → 1496521

I had forgotten about this bug. I will check to see what's happening.

job.id == 272036789 is not in ActiveData

Past week in activedata:

{
	"from":"treeherder",
	"where":{"and":[
		{"eq":{"run.result":"success"}},
		{"in":{"job.type.group.symbol":["tc-L10n","L10n"]}},
		{"lt":{"action.request_time":{"date":"today-day"}}},
		{"gte":{"action.request_time":{"date":"today-8day"}}},
		{"eq":{"repo.branch.name":"mozilla-central"}}
	]},
	"select":"job.id",
	"sort":{"job.id":"desc"},
	"limit":4000
}

Use the returned job.id list to query the RO database:

SELECT  
    job.id, job.submit_time
FROM reference_data_signatures r
left join job on job.signature_id = r.id
WHERE job.repository_id = 1 AND 
     r.job_group_symbol IN ('tc-L10n', 'L10n') AND
     job.result = 'success' AND
     DATE(job.submit_time) >= SUBDATE(CURDATE(), INTERVAL 8 DAY) AND
     DATE(job.submit_time) < SUBDATE(CURDATE(), INTERVAL 1 DAY) AND
     NOT job.id IN (
		272100572,
		272100484,
		272100475,
<clip>
		271114570,
		271114564
)
order by job.id desc
You need to log in before you can comment on or make changes to this bug.