@@ -35,17 +35,13 @@ use ballista_core::serde::scheduler::ExecutorMetadata;
35
35
use datafusion_proto:: logical_plan:: AsLogicalPlan ;
36
36
use datafusion_proto:: physical_plan:: AsExecutionPlan ;
37
37
use log:: { debug, error, info, trace, warn} ;
38
- use std:: collections:: HashMap ;
39
38
use std:: net:: SocketAddr ;
40
39
41
40
use std:: ops:: Deref ;
42
41
43
- use crate :: cluster:: {
44
- bind_task_bias, bind_task_round_robin, unbind_prepare_failed_tasks,
45
- } ;
42
+ use crate :: cluster:: { bind_task_bias, bind_task_round_robin} ;
46
43
use crate :: config:: TaskDistributionPolicy ;
47
44
use crate :: scheduler_server:: event:: QueryStageSchedulerEvent ;
48
- use crate :: state:: execution_graph:: TaskDescription ;
49
45
use std:: time:: { SystemTime , UNIX_EPOCH } ;
50
46
use tonic:: { Request , Response , Status } ;
51
47
@@ -116,10 +112,10 @@ impl<T: 'static + AsLogicalPlan, U: 'static + AsExecutionPlan> SchedulerGrpc
116
112
let active_jobs = self . state . task_manager . get_running_job_cache ( ) ;
117
113
let schedulable_tasks = match self . state . config . task_distribution {
118
114
TaskDistributionPolicy :: Bias => {
119
- bind_task_bias ( available_slots, active_jobs. clone ( ) , |_| false ) . await
115
+ bind_task_bias ( available_slots, active_jobs, |_| false ) . await
120
116
}
121
117
TaskDistributionPolicy :: RoundRobin => {
122
- bind_task_round_robin ( available_slots, active_jobs. clone ( ) , |_| false ) . await
118
+ bind_task_round_robin ( available_slots, active_jobs, |_| false ) . await
123
119
}
124
120
TaskDistributionPolicy :: ConsistentHash { ..} => {
125
121
return Err ( Status :: unimplemented (
@@ -128,36 +124,19 @@ impl<T: 'static + AsLogicalPlan, U: 'static + AsExecutionPlan> SchedulerGrpc
128
124
} ;
129
125
130
126
let mut tasks = vec ! [ ] ;
131
- let mut prepare_failed_jobs = HashMap :: < String , Vec < TaskDescription > > :: new ( ) ;
132
127
for ( _, task) in schedulable_tasks {
133
128
let job_id = task. partition . job_id . clone ( ) ;
134
- if prepare_failed_jobs. contains_key ( & job_id) {
135
- prepare_failed_jobs. entry ( job_id) . or_default ( ) . push ( task) ;
136
- continue ;
137
- }
138
- match self
139
- . state
140
- . task_manager
141
- . prepare_task_definition ( task. clone ( ) )
142
- {
129
+ match self . state . task_manager . prepare_task_definition ( task) {
143
130
Ok ( task_definition) => tasks. push ( task_definition) ,
144
131
Err ( e) => {
145
132
error ! ( "Error preparing task definition: {:?}" , e) ;
146
- prepare_failed_jobs. entry ( job_id) . or_default ( ) . push ( task) ;
133
+ info ! ( "Cancel prepare task definition failed job: {}" , job_id) ;
134
+ if let Err ( err) = self . cancel_job ( job_id) . await {
135
+ error ! ( "Failed to cancel job {err:?}" ) ;
136
+ }
147
137
}
148
138
}
149
139
}
150
-
151
- unbind_prepare_failed_tasks ( active_jobs, & prepare_failed_jobs) . await ;
152
- for job_id in prepare_failed_jobs. into_keys ( ) {
153
- info ! ( "Cancel prepare task definition failed job: {}" , job_id) ;
154
- self . cancel_job ( job_id) . await . map_err ( |e| {
155
- let msg = format ! ( "Cancel job error due to {e:?}" ) ;
156
- error ! ( "{}" , msg) ;
157
- Status :: internal ( msg)
158
- } ) ?;
159
- }
160
-
161
140
Ok ( Response :: new ( PollWorkResult { tasks } ) )
162
141
} else {
163
142
warn ! ( "Received invalid executor poll_work request" ) ;
@@ -553,11 +532,21 @@ impl<T: 'static + AsLogicalPlan, U: 'static + AsExecutionPlan> SchedulerGrpc
553
532
) -> Result < Response < CancelJobResult > , Status > {
554
533
let job_id = request. into_inner ( ) . job_id ;
555
534
info ! ( "Received cancellation request for job {}" , job_id) ;
556
- self . cancel_job ( job_id) . await . map_err ( |e| {
557
- let msg = format ! ( "Cancel job error due to {e:?}" ) ;
558
- error ! ( "{}" , msg) ;
559
- Status :: internal ( msg)
560
- } ) ?;
535
+
536
+ self . query_stage_event_loop
537
+ . get_sender ( )
538
+ . map_err ( |e| {
539
+ let msg = format ! ( "Get query stage event loop error due to {e:?}" ) ;
540
+ error ! ( "{}" , msg) ;
541
+ Status :: internal ( msg)
542
+ } ) ?
543
+ . post_event ( QueryStageSchedulerEvent :: JobCancel ( job_id) )
544
+ . await
545
+ . map_err ( |e| {
546
+ let msg = format ! ( "Post to query stage event loop error due to {e:?}" ) ;
547
+ error ! ( "{}" , msg) ;
548
+ Status :: internal ( msg)
549
+ } ) ?;
561
550
Ok ( Response :: new ( CancelJobResult { cancelled : true } ) )
562
551
}
563
552
0 commit comments