Skip to content

Commit fd752b8

Browse files
authored
Add du to CLI (#207)
1 parent f59bd11 commit fd752b8

File tree

2 files changed

+221
-32
lines changed

2 files changed

+221
-32
lines changed

python/hdfs_native/cli.py

+173-30
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,29 @@ def _upload_file(
142142
client.rename(write_destination, remote_dst, overwrite=force)
143143

144144

145+
def _get_widths(parsed: list[dict]) -> dict[str, int]:
146+
widths: dict[str, int] = defaultdict(lambda: 0)
147+
148+
for file in parsed:
149+
for key, value in file.items():
150+
if isinstance(value, str):
151+
widths[key] = max(widths[key], len(value))
152+
153+
return widths
154+
155+
156+
def _human_size(num: int):
157+
if num < 1024:
158+
return str(num)
159+
160+
adjusted = num / 1024.0
161+
for unit in ("K", "M", "G", "T", "P", "E", "Z"):
162+
if abs(adjusted) < 1024.0:
163+
return f"{adjusted:.1f}{unit}"
164+
adjusted /= 1024.0
165+
return f"{adjusted:.1f}Y"
166+
167+
145168
def cat(args: Namespace):
146169
for src in args.src:
147170
client = _client_for_url(src)
@@ -191,6 +214,85 @@ def chown(args: Namespace):
191214
client.set_owner(path, owner, group)
192215

193216

217+
def du(args: Namespace):
218+
parsed: List[Dict[str, str]] = []
219+
220+
if args.verbose:
221+
header = {
222+
"file_size": "File Size",
223+
"disk_size": "Disk Size",
224+
"path": "Path",
225+
}
226+
if args.file_count:
227+
header["file_count"] = "File Count"
228+
header["directory_count"] = "Directory Count"
229+
230+
parsed.append(header)
231+
232+
for url in args.path:
233+
client = _client_for_url(url)
234+
for path in _glob_path(client, _path_for_url(url)):
235+
prefix = _prefix_for_url(url)
236+
237+
if args.summary:
238+
summaries = [(prefix + path, client.get_content_summary(path))]
239+
else:
240+
summaries = []
241+
for status in client.list_status(path):
242+
summaries.append(
243+
(prefix + status.path, client.get_content_summary(status.path))
244+
)
245+
246+
for path, summary in summaries:
247+
if args.human_readable:
248+
file_size = _human_size(summary.length)
249+
disk_size = _human_size(summary.space_consumed)
250+
else:
251+
file_size = str(summary.length)
252+
disk_size = str(summary.space_consumed)
253+
254+
parsed_file = {
255+
"file_size": file_size,
256+
"disk_size": disk_size,
257+
"path": path,
258+
}
259+
260+
if args.file_count:
261+
parsed_file["file_count"] = str(summary.file_count)
262+
parsed_file["directory_count"] = str(summary.directory_count)
263+
264+
parsed.append(parsed_file)
265+
266+
widths = _get_widths(parsed)
267+
268+
def format(
269+
file: Dict[str, str],
270+
field: str,
271+
right_align: bool = False,
272+
):
273+
value = str(file[field])
274+
275+
width = len(value)
276+
if widths and field in widths:
277+
width = widths[field]
278+
279+
if right_align:
280+
return f"{value:>{width}}"
281+
return f"{value:{width}}"
282+
283+
for file in parsed:
284+
formatted_fields = [
285+
format(file, "file_size", True),
286+
format(file, "disk_size", True),
287+
format(file, "path"),
288+
]
289+
if args.file_count:
290+
formatted_fields.append(format(file, "file_count", True))
291+
formatted_fields.append(format(file, "directory_count", True))
292+
293+
print(" ".join(formatted_fields))
294+
295+
194296
def get(args: Namespace):
195297
paths: List[Tuple[Client, str]] = []
196298

@@ -234,17 +336,6 @@ def get(args: Namespace):
234336

235337

236338
def ls(args: Namespace):
237-
def human_size(num: int):
238-
if num < 1024:
239-
return str(num)
240-
241-
adjusted = num / 1024.0
242-
for unit in ("K", "M", "G", "T", "P", "E", "Z"):
243-
if abs(adjusted) < 1024.0:
244-
return f"{adjusted:.1f} {unit}"
245-
adjusted /= 1024.0
246-
return f"{adjusted:.1f} Y"
247-
248339
def parse_status(status: FileStatus, prefix: str) -> Dict[str, Union[int, str]]:
249340
file_time = status.modification_time
250341
if args.access_time:
@@ -263,7 +354,7 @@ def parse_status(status: FileStatus, prefix: str) -> Dict[str, Union[int, str]]:
263354
mode = stat.filemode(permission)
264355

265356
if args.human_readable:
266-
length_string = human_size(status.length)
357+
length_string = _human_size(status.length)
267358
else:
268359
length_string = str(status.length)
269360

@@ -281,16 +372,6 @@ def parse_status(status: FileStatus, prefix: str) -> Dict[str, Union[int, str]]:
281372
"path": path,
282373
}
283374

284-
def get_widths(parsed: list[dict]) -> dict[str, int]:
285-
widths: dict[str, int] = defaultdict(lambda: 0)
286-
287-
for file in parsed:
288-
for key, value in file.items():
289-
if isinstance(value, str):
290-
widths[key] = max(widths[key], len(value))
291-
292-
return widths
293-
294375
def print_files(
295376
parsed: List[Dict[str, Union[int, str]]],
296377
widths: Optional[Dict[str, int]] = None,
@@ -346,7 +427,7 @@ def format(
346427
if not args.path_only:
347428
print(f"Found {len(parsed)} items")
348429

349-
widths = get_widths(parsed)
430+
widths = _get_widths(parsed)
350431
print_files(parsed, widths)
351432
else:
352433
print_files([parse_status(status, prefix)])
@@ -474,10 +555,6 @@ def touch(args: Namespace):
474555
for url in args.path:
475556
client = _client_for_url(url)
476557
for path in _glob_path(client, _path_for_url(url)):
477-
if args.access_time and args.modification_time:
478-
raise ValueError(
479-
"--access-time and --modification-time cannot both be passed"
480-
)
481558
timestamp = None
482559
if args.timestamp:
483560
timestamp = datetime.strptime(args.timestamp, r"%Y%m%d:%H%M%S")
@@ -516,6 +593,7 @@ def main(in_args: Optional[Sequence[str]] = None):
516593
"cat",
517594
help="Print the contents of a file",
518595
description="Print the contents of a file to stdout",
596+
add_help=False,
519597
)
520598
cat_parser.add_argument("src", nargs="+", help="File pattern to print")
521599
cat_parser.set_defaults(func=cat)
@@ -524,6 +602,7 @@ def main(in_args: Optional[Sequence[str]] = None):
524602
"chmod",
525603
help="Changes permissions of a file",
526604
description="Changes permissions of a file. Only octal permissions are supported.",
605+
add_help=False,
527606
)
528607
chmod_parser.add_argument(
529608
"-R",
@@ -542,6 +621,7 @@ def main(in_args: Optional[Sequence[str]] = None):
542621
"chown",
543622
help="Changes owner and group of a file",
544623
description="Changes owner and group of a file",
624+
add_help=False,
545625
)
546626
chown_parser.add_argument(
547627
"-R",
@@ -557,12 +637,50 @@ def main(in_args: Optional[Sequence[str]] = None):
557637
chown_parser.add_argument("path", nargs="+", help="File pattern to modify")
558638
chown_parser.set_defaults(func=chown)
559639

640+
du_parser = subparsers.add_parser(
641+
"du",
642+
help="Show the amount of space used by the files that match the specified file pattern",
643+
description="Show the amount of space used by the files that match the specified file pattern",
644+
add_help=False,
645+
)
646+
du_parser.add_argument(
647+
"-s",
648+
"--summary",
649+
action="store_true",
650+
default=False,
651+
help="Show the total size of matching directories instead of traversing their children",
652+
)
653+
du_parser.add_argument(
654+
"-h",
655+
"--human-readable",
656+
action="store_true",
657+
default=False,
658+
help="Format the size in a human-readable fashion",
659+
)
660+
du_parser.add_argument(
661+
"-f",
662+
"--file-count",
663+
action="store_true",
664+
default=False,
665+
help="Include the file count",
666+
)
667+
du_parser.add_argument(
668+
"-v",
669+
"--verbose",
670+
action="store_true",
671+
default=False,
672+
help="Include a header line",
673+
)
674+
du_parser.add_argument("path", nargs="+")
675+
du_parser.set_defaults(func=du)
676+
560677
get_parser = subparsers.add_parser(
561678
"get",
562679
aliases=["copyToLocal"],
563680
help="Copy files to a local destination",
564681
description="""Copy files matching a pattern to a local destination.
565682
When copying multiple files, the destination must be a directory""",
683+
add_help=False,
566684
)
567685
get_parser.add_argument(
568686
"-p",
@@ -601,6 +719,7 @@ def main(in_args: Optional[Sequence[str]] = None):
601719
help="List contents that match the specified patterns",
602720
description="""List contents that match the specified patterns. For a directory, list its
603721
direct children.""",
722+
add_help=False,
604723
)
605724
ls_parser.add_argument(
606725
"-C",
@@ -610,7 +729,7 @@ def main(in_args: Optional[Sequence[str]] = None):
610729
help="Display the path of files and directories only.",
611730
)
612731
ls_parser.add_argument(
613-
"-H",
732+
"-h",
614733
"--human-readable",
615734
action="store_true",
616735
default=False,
@@ -658,6 +777,7 @@ def main(in_args: Optional[Sequence[str]] = None):
658777
"mkdir",
659778
help="Create a directory",
660779
description="Create a directory in a specified path",
780+
add_help=False,
661781
)
662782
mkdir_parser.add_argument(
663783
"path",
@@ -677,6 +797,7 @@ def main(in_args: Optional[Sequence[str]] = None):
677797
help="Move files or directories",
678798
description="""Move a file or directory from <src> to <dst>. Must be part of the same name service.
679799
If multiple src are provided, dst must be a directory""",
800+
add_help=False,
680801
)
681802
mv_parser.add_argument("src", nargs="+", help="Files or directories to move")
682803
mv_parser.add_argument("dst", help="Target destination of file or directory")
@@ -688,6 +809,7 @@ def main(in_args: Optional[Sequence[str]] = None):
688809
help="Copy local files to a remote destination",
689810
description="""Copy files matching a pattern to a remote destination.
690811
When copying multiple files, the destination must be a directory""",
812+
add_help=False,
691813
)
692814
put_parser.add_argument(
693815
"-p",
@@ -732,6 +854,7 @@ def main(in_args: Optional[Sequence[str]] = None):
732854
"rm",
733855
help="Delete files",
734856
description="Delete all files matching the specified file patterns",
857+
add_help=False,
735858
)
736859
rm_parser.add_argument(
737860
"-f",
@@ -766,6 +889,7 @@ def main(in_args: Optional[Sequence[str]] = None):
766889
"rmdir",
767890
help="Delete an empty directory",
768891
description="Delete an empty directory",
892+
add_help=False,
769893
)
770894
rmdir_parser.add_argument(
771895
"dir",
@@ -780,15 +904,17 @@ def main(in_args: Optional[Sequence[str]] = None):
780904
description="""Updates the access and modification times of the file specified by the <path> to
781905
the current time. If the file does not exist, then a zero length file is created
782906
at <path> with current time as the timestamp of that <path>.""",
907+
add_help=False,
783908
)
784-
touch_parser.add_argument(
909+
touch_parser_time_group = touch_parser.add_mutually_exclusive_group()
910+
touch_parser_time_group.add_argument(
785911
"-a",
786912
"--access-time",
787913
action="store_true",
788914
default=False,
789915
help="Only change the access time",
790916
)
791-
touch_parser.add_argument(
917+
touch_parser_time_group.add_argument(
792918
"-m",
793919
"--modification-time",
794920
action="store_true",
@@ -813,6 +939,23 @@ def main(in_args: Optional[Sequence[str]] = None):
813939
)
814940
touch_parser.set_defaults(func=touch)
815941

942+
def show_help(args: Namespace):
943+
subparsers.choices[args.cmd].print_help()
944+
945+
subparser_keys = list(subparsers.choices.keys())
946+
947+
help_parser = subparsers.add_parser(
948+
"help",
949+
help="Display usage of a subcommand",
950+
description="Display usage of a subcommand",
951+
)
952+
help_parser.add_argument(
953+
"cmd",
954+
choices=subparser_keys,
955+
help="Command to show usage for",
956+
)
957+
help_parser.set_defaults(func=show_help)
958+
816959
args = parser.parse_args(in_args)
817960
args.func(args)
818961

0 commit comments

Comments
 (0)