|
6 | 6 | import stat
|
7 | 7 | import sys
|
8 | 8 | from argparse import ArgumentParser, Namespace
|
| 9 | +from collections import defaultdict |
9 | 10 | from concurrent.futures import ThreadPoolExecutor, as_completed
|
10 |
| -from typing import List, Optional, Sequence, Tuple |
| 11 | +from datetime import datetime |
| 12 | +from typing import Dict, List, Optional, Sequence, Tuple, Union |
11 | 13 | from urllib.parse import urlparse
|
12 | 14 |
|
13 | 15 | from hdfs_native import Client
|
14 |
| -from hdfs_native._internal import WriteOptions |
| 16 | +from hdfs_native._internal import FileStatus, WriteOptions |
| 17 | + |
| 18 | +__all__ = ["main"] |
15 | 19 |
|
16 | 20 |
|
17 | 21 | @functools.cache
|
18 | 22 | def _get_client(connection_url: Optional[str] = None):
|
19 | 23 | return Client(connection_url)
|
20 | 24 |
|
21 | 25 |
|
| 26 | +def _prefix_for_url(url: str) -> str: |
| 27 | + parsed = urlparse(url) |
| 28 | + |
| 29 | + if parsed.scheme: |
| 30 | + prefix = f"{parsed.scheme}://{parsed.hostname}" |
| 31 | + if parsed.port: |
| 32 | + prefix += f":{parsed.port}" |
| 33 | + return prefix |
| 34 | + |
| 35 | + return "" |
| 36 | + |
| 37 | + |
22 | 38 | def _client_for_url(url: str) -> Client:
|
23 | 39 | parsed = urlparse(url)
|
24 | 40 |
|
@@ -217,6 +233,125 @@ def get(args: Namespace):
|
217 | 233 | f.result()
|
218 | 234 |
|
219 | 235 |
|
| 236 | +def ls(args: Namespace): |
| 237 | + def human_size(num: int): |
| 238 | + if num < 1024: |
| 239 | + return str(num) |
| 240 | + |
| 241 | + adjusted = num / 1024.0 |
| 242 | + for unit in ("K", "M", "G", "T", "P", "E", "Z"): |
| 243 | + if abs(adjusted) < 1024.0: |
| 244 | + return f"{adjusted:.1f} {unit}" |
| 245 | + adjusted /= 1024.0 |
| 246 | + return f"{adjusted:.1f} Y" |
| 247 | + |
| 248 | + def parse_status(status: FileStatus, prefix: str) -> Dict[str, Union[int, str]]: |
| 249 | + file_time = status.modification_time |
| 250 | + if args.access_time: |
| 251 | + file_time = status.access_time |
| 252 | + |
| 253 | + file_time_string = datetime.fromtimestamp(file_time / 1000).strftime( |
| 254 | + r"%Y-%m-%d %H:%M" |
| 255 | + ) |
| 256 | + |
| 257 | + permission = status.permission |
| 258 | + if status.isdir: |
| 259 | + permission |= stat.S_IFDIR |
| 260 | + else: |
| 261 | + permission |= stat.S_IFREG |
| 262 | + |
| 263 | + mode = stat.filemode(permission) |
| 264 | + |
| 265 | + if args.human_readable: |
| 266 | + length_string = human_size(status.length) |
| 267 | + else: |
| 268 | + length_string = str(status.length) |
| 269 | + |
| 270 | + path = prefix + status.path |
| 271 | + |
| 272 | + return { |
| 273 | + "mode": mode, |
| 274 | + "replication": str(status.replication) if status.replication else "-", |
| 275 | + "owner": status.owner, |
| 276 | + "group": status.group, |
| 277 | + "length": status.length, |
| 278 | + "length_formatted": length_string, |
| 279 | + "time": file_time, |
| 280 | + "time_formatted": file_time_string, |
| 281 | + "path": path, |
| 282 | + } |
| 283 | + |
| 284 | + def get_widths(parsed: list[dict]) -> dict[str, int]: |
| 285 | + widths: dict[str, int] = defaultdict(lambda: 0) |
| 286 | + |
| 287 | + for file in parsed: |
| 288 | + for key, value in file.items(): |
| 289 | + if isinstance(value, str): |
| 290 | + widths[key] = max(widths[key], len(value)) |
| 291 | + |
| 292 | + return widths |
| 293 | + |
| 294 | + def print_files( |
| 295 | + parsed: List[Dict[str, Union[int, str]]], |
| 296 | + widths: Optional[Dict[str, int]] = None, |
| 297 | + ): |
| 298 | + if args.sort_time: |
| 299 | + parsed = sorted(parsed, key=lambda x: x["time"], reverse=not args.reverse) |
| 300 | + elif args.sort_size: |
| 301 | + parsed = sorted(parsed, key=lambda x: x["length"], reverse=not args.reverse) |
| 302 | + |
| 303 | + def format( |
| 304 | + file: Dict[str, Union[int, str]], |
| 305 | + field: str, |
| 306 | + right_align: bool = False, |
| 307 | + ): |
| 308 | + value = str(file[field]) |
| 309 | + |
| 310 | + width = len(value) |
| 311 | + if widths and field in widths: |
| 312 | + width = widths[field] |
| 313 | + |
| 314 | + if right_align: |
| 315 | + return f"{value:>{width}}" |
| 316 | + return f"{value:{width}}" |
| 317 | + |
| 318 | + for file in parsed: |
| 319 | + if args.path_only: |
| 320 | + print(file["path"]) |
| 321 | + else: |
| 322 | + formatted_fields = [ |
| 323 | + format(file, "mode"), |
| 324 | + format(file, "replication"), |
| 325 | + format(file, "owner"), |
| 326 | + format(file, "group"), |
| 327 | + format(file, "length_formatted", True), |
| 328 | + format(file, "time_formatted"), |
| 329 | + format(file, "path"), |
| 330 | + ] |
| 331 | + print(" ".join(formatted_fields)) |
| 332 | + |
| 333 | + for url in args.path: |
| 334 | + client = _client_for_url(url) |
| 335 | + for path in _glob_path(client, _path_for_url(url)): |
| 336 | + status = client.get_file_info(path) |
| 337 | + |
| 338 | + prefix = _prefix_for_url(url) |
| 339 | + |
| 340 | + if status.isdir: |
| 341 | + parsed = [ |
| 342 | + parse_status(status, prefix) |
| 343 | + for status in client.list_status(path, args.recursive) |
| 344 | + ] |
| 345 | + |
| 346 | + if not args.path_only: |
| 347 | + print(f"Found {len(parsed)} items") |
| 348 | + |
| 349 | + widths = get_widths(parsed) |
| 350 | + print_files(parsed, widths) |
| 351 | + else: |
| 352 | + print_files([parse_status(status, prefix)]) |
| 353 | + |
| 354 | + |
220 | 355 | def mkdir(args: Namespace):
|
221 | 356 | create_parent = args.parent
|
222 | 357 |
|
@@ -427,6 +562,64 @@ def main(in_args: Optional[Sequence[str]] = None):
|
427 | 562 | )
|
428 | 563 | get_parser.set_defaults(func=get)
|
429 | 564 |
|
| 565 | + ls_parser = subparsers.add_parser( |
| 566 | + "ls", |
| 567 | + help="List contents that match the specified patterns", |
| 568 | + description="""List contents that match the specified patterns. For a directory, list its |
| 569 | + direct children.""", |
| 570 | + ) |
| 571 | + ls_parser.add_argument( |
| 572 | + "-C", |
| 573 | + "--path-only", |
| 574 | + action="store_true", |
| 575 | + default=False, |
| 576 | + help="Display the path of files and directories only.", |
| 577 | + ) |
| 578 | + ls_parser.add_argument( |
| 579 | + "-H", |
| 580 | + "--human-readable", |
| 581 | + action="store_true", |
| 582 | + default=False, |
| 583 | + help="Formats the sizes of files in a human-readable fashion rather than a number of bytes", |
| 584 | + ) |
| 585 | + ls_parser.add_argument( |
| 586 | + "-R", |
| 587 | + "--recursive", |
| 588 | + action="store_true", |
| 589 | + default=False, |
| 590 | + help="Recursively list the contents of directories", |
| 591 | + ) |
| 592 | + ls_parser.add_argument( |
| 593 | + "-t", |
| 594 | + "--sort-time", |
| 595 | + action="store_true", |
| 596 | + default=False, |
| 597 | + help="Sort files by modification time (most recent first)", |
| 598 | + ) |
| 599 | + ls_parser.add_argument( |
| 600 | + "-S", |
| 601 | + "--sort-size", |
| 602 | + action="store_true", |
| 603 | + default=False, |
| 604 | + help="Sort files by size (largest first)", |
| 605 | + ) |
| 606 | + ls_parser.add_argument( |
| 607 | + "-r", |
| 608 | + "--reverse", |
| 609 | + action="store_true", |
| 610 | + default=False, |
| 611 | + help="Reverse the order of the sort", |
| 612 | + ) |
| 613 | + ls_parser.add_argument( |
| 614 | + "-u", |
| 615 | + "--access-time", |
| 616 | + action="store_true", |
| 617 | + default=False, |
| 618 | + help="Use the last access time instead of modification time for display and sorting", |
| 619 | + ) |
| 620 | + ls_parser.add_argument("path", nargs="+", help="Path to display contents of") |
| 621 | + ls_parser.set_defaults(func=ls) |
| 622 | + |
430 | 623 | mkdir_parser = subparsers.add_parser(
|
431 | 624 | "mkdir",
|
432 | 625 | help="Create a directory",
|
|
0 commit comments