Skip to content

Commit

Permalink
Make dependency-group-timeout check ignored until all tasks scheduled (
Browse files Browse the repository at this point in the history
  • Loading branch information
zuston authored Dec 9, 2021
1 parent d0bd0ca commit ce3f207
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,15 @@ public boolean isHealthy(Configuration tonyConf) {

@VisibleForTesting
protected String groupDependencyTimeout(Configuration tonyConf) {
/**
* precheck:
* Is group dependency checking timeout enabled?
* If not, directly return null.
*/
if (taskWithDependentGrpsIndex == null) {
taskWithDependentGrpsIndex = Utils.getJobTypeDependentGrps(tonyConf);
}

// groupDependencies is map, key: waiting role, value: pre-dependent groups and waiting timeout
if (taskWithDependentGrpsIndex == null || taskWithDependentGrpsIndex.isEmpty()) {
return null;
Expand All @@ -169,6 +175,15 @@ protected String groupDependencyTimeout(Configuration tonyConf) {
grpWithMembersIndex = Utils.getAllGroupJobTypes(tonyConf);
}

if (grpWithMembersIndex == null || grpWithMembersIndex.isEmpty()) {
return null;
}

if (!session.allTasksScheduled()) {
log.info("Group dependency timeout check will be ignored until all tasks scheduled.");
return null;
}

// memberInGroups is map. key: jobtype name, value: in which groups
if (taskInGrpsIndex == null) {
taskInGrpsIndex = getMemberInGroups(grpWithMembersIndex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,46 @@ public void testGrpDependentWithoutTimeout() {
);
}

/**
* Test case for partial tasks scheduled, but others are not.
* DependentGroup timeout should pass.
*/
@Test
public void testPartialTaskScheduledShouldPass() {
Configuration conf = new Configuration();
conf.addResource("tony-default.xml");
conf.set("tony.application.group.A", "chief");
conf.set("tony.application.dependency.worker.timeout.after.A", String.valueOf(60 * 240));
conf.set("tony.chief.instances", "1");
conf.set("tony.worker.instances", "4");
conf.set("tony.ps.instances", "2");

TonySession session = buildPartialTaskScheduledSession(conf);
MLGenericRuntime.AM am = (MLGenericRuntime.AM) runtime.getAMAdapter();
am.setTonySession(session);
Assert.assertNull(
am.groupDependencyTimeout(conf)
);
}

private TonySession buildPartialTaskScheduledSession(Configuration conf) {
TonySession session = new TonySession.Builder().setTonyConf(conf).build();

TonySession.TonyTask worker0 = session.buildTonyTask(Constants.WORKER_JOB_NAME, "0", "localhost");
TonySession.TonyTask worker1 = session.buildTonyTask(Constants.WORKER_JOB_NAME, "1", "localhost");
TonySession.TonyTask worker2 = session.buildTonyTask(Constants.WORKER_JOB_NAME, "2", "localhost");

worker0.setTaskInfo();
worker1.setTaskInfo();
worker2.setTaskInfo();

session.addTask(worker0);
session.addTask(worker1);
session.addTask(worker2);

return session;
}

private TonySession buildMockSession(Configuration tonyConf) {
TonySession session = new TonySession.Builder().setTonyConf(tonyConf).build();

Expand Down

0 comments on commit ce3f207

Please sign in to comment.