Commit 790e5d9 1 parent 561025f commit 790e5d9 Copy full SHA for 790e5d9
File tree 1 file changed +62
-0
lines changed
1 file changed +62
-0
lines changed Original file line number Diff line number Diff line change
1
+ import argparse
2
+ import os
3
+
4
+ from dance .utils import try_import
5
+
6
+ entity = "xzy11632"
7
+ project = "dance-dev"
8
+ wandb = try_import ("wandb" )
9
+ parser = argparse .ArgumentParser ()
10
+ parser .add_argument ("--sweep_id" , type = str , default = "c3yy5fd3" )
11
+ args = parser .parse_args ()
12
+ sweep_id = args .sweep_id
13
+ sweep = wandb .Api (timeout = 1000 ).sweep (f"{ entity } /{ project } /{ sweep_id } " )
14
+ import time
15
+
16
+
17
+ def delete_unfinished_runs (sweep , max_attempts = 3 , check_interval = 1 ):
18
+ """删除所有未完成的运行,并确保删除成功.
19
+
20
+ 参数:
21
+ - sweep: sweep对象
22
+ - max_attempts: 最大重试次数
23
+ - check_interval: 每次检查间隔的秒数
24
+
25
+ """
26
+ attempt = 0
27
+ while attempt < max_attempts :
28
+ # 检查是否还有未完成的运行
29
+ unfinished_runs = [run for run in sweep .runs if run .state != 'finished' ]
30
+
31
+ if not unfinished_runs :
32
+ print ("所有运行都已完成或已删除" )
33
+ break
34
+
35
+ print (f"第 { attempt + 1 } 次尝试删除 { len (unfinished_runs )} 个未完成的运行" )
36
+
37
+ # 尝试删除所有未完成的运行
38
+ for run in unfinished_runs :
39
+ try :
40
+ run .delete ()
41
+ except Exception as e :
42
+ print (f"删除运行 { run .id } 时出错: { str (e )} " )
43
+
44
+ # 等待一段时间后再次检查
45
+ time .sleep (check_interval )
46
+ attempt += 1
47
+
48
+ # 最终检查
49
+ remaining_unfinished = [run for run in sweep .runs if run .state != 'finished' ]
50
+ if remaining_unfinished :
51
+ print (f"警告: 仍有 { len (remaining_unfinished )} 个运行未能成功删除" )
52
+ return False
53
+ return True
54
+
55
+
56
+ # 使用示例
57
+ success = delete_unfinished_runs (sweep )
58
+ if success :
59
+ print ("所有未完成的运行已成功删除" )
60
+ else :
61
+ print ("部分运行可能未能成功删除" )
62
+ os .system (f"wandb sweep --resume { entity } /{ project } /{ sweep_id } " )
You can’t perform that action at this time.
0 commit comments