diff --git a/python/conftest.py b/python/conftest.py index c4d150c..bc2362f 100644 --- a/python/conftest.py +++ b/python/conftest.py @@ -1,3 +1,4 @@ +import os import subprocess import urllib import urllib.parse @@ -32,6 +33,8 @@ def minidfs(): output = child.stdout.readline().strip() assert output == "Ready!", output + os.environ["HADOOP_CONF_DIR"] = "target/test" + yield "hdfs://127.0.0.1:9000" try: diff --git a/python/hdfs_native/cli.py b/python/hdfs_native/cli.py index 5c9a6d8..b931162 100644 --- a/python/hdfs_native/cli.py +++ b/python/hdfs_native/cli.py @@ -1,11 +1,17 @@ +import functools import os from argparse import ArgumentParser, Namespace -from typing import Optional, Sequence +from typing import List, Optional, Sequence from urllib.parse import urlparse from hdfs_native import Client +@functools.cache +def _get_client(connection_url: Optional[str] = None): + return Client(connection_url) + + def _client_for_url(url: str) -> Client: parsed = urlparse(url) @@ -13,13 +19,13 @@ def _client_for_url(url: str) -> Client: connection_url = f"{parsed.scheme}://{parsed.hostname}" if parsed.port: connection_url += f":{parsed.port}" - return Client(connection_url) + return _get_client(connection_url) elif parsed.hostname or parsed.port: raise ValueError( f"Cannot provide host or port without scheme: {parsed.hostname}" ) else: - return Client() + return _get_client() def _verify_nameservices_match(url: str, *urls: str) -> None: @@ -37,6 +43,19 @@ def _path_for_url(url: str) -> str: return urlparse(url).path +def _glob_path(client: Client, glob: str) -> List[str]: + # TODO: Actually implement this, for now just pretend we have multiple results + return [glob] + + +def mkdir(args: Namespace): + create_parent = args.parent + + for path in args.path: + client = _client_for_url(path) + client.mkdirs(path, create_parent=create_parent) + + def mv(args: Namespace): _verify_nameservices_match(args.dst, *args.src) @@ -49,12 +68,16 @@ def mv(args: Namespace): except FileNotFoundError: pass - if len(args.src) > 1 and not dst_isdir: + resolved_src = [ + path for pattern in args.src for path in _glob_path(client, pattern) + ] + + if len(resolved_src) > 1 and not dst_isdir: raise ValueError( "destination must be a directory if multiple sources are provided" ) - for src in args.src: + for src in resolved_src: src_path = _path_for_url(src) if dst_isdir: target_path = os.path.join(dst_path, os.path.basename(src_path)) @@ -72,6 +95,24 @@ def main(in_args: Optional[Sequence[str]] = None): subparsers = parser.add_subparsers(title="Subcommands", required=True) + mkdir_parser = subparsers.add_parser( + "mkdir", + help="Create a directory", + description="Create a directory in a specified path", + ) + mkdir_parser.add_argument( + "path", + nargs="+", + help="Path for the directory to create", + ) + mkdir_parser.add_argument( + "-p", + "--parent", + action="store_true", + help="Create any missing parent directories", + ) + mkdir_parser.set_defaults(func=mkdir) + mv_parser = subparsers.add_parser( "mv", help="Move files or directories", diff --git a/python/tests/test_cli.py b/python/tests/test_cli.py index ee5c5fb..0047629 100644 --- a/python/tests/test_cli.py +++ b/python/tests/test_cli.py @@ -7,24 +7,33 @@ def test_cli(minidfs: str): client = Client(minidfs) - def qualify(path: str) -> str: - return f"{minidfs}{path}" + # mkdir + cli_main(["mkdir", "/testdir"]) + assert client.get_file_info("/testdir").isdir + + with pytest.raises(RuntimeError): + cli_main(["mkdir", "/testdir/nested/dir"]) + + cli_main(["mkdir", "-p", "/testdir/nested/dir"]) + assert client.get_file_info("/testdir/nested/dir").isdir + + client.delete("/testdir", True) # mv client.create("/testfile").close() client.mkdirs("/testdir") - cli_main(["mv", qualify("/testfile"), qualify("/testfile2")]) + cli_main(["mv", "/testfile", "/testfile2"]) client.get_file_info("/testfile2") with pytest.raises(ValueError): - cli_main(["mv", qualify("/testfile2"), "hdfs://badnameservice/testfile"]) + cli_main(["mv", "/testfile2", "hdfs://badnameservice/testfile"]) with pytest.raises(RuntimeError): - cli_main(["mv", qualify("/testfile2"), qualify("/nonexistent/testfile")]) + cli_main(["mv", "/testfile2", "/nonexistent/testfile"]) - cli_main(["mv", qualify("/testfile2"), qualify("/testdir")]) + cli_main(["mv", "/testfile2", "/testdir"]) client.get_file_info("/testdir/testfile2") @@ -32,11 +41,9 @@ def qualify(path: str) -> str: client.create("/testfile2").close() with pytest.raises(ValueError): - cli_main( - ["mv", qualify("/testfile1"), qualify("/testfile2"), qualify("/testfile3")] - ) + cli_main(["mv", "/testfile1", "/testfile2", "/testfile3"]) - cli_main(["mv", qualify("/testfile1"), qualify("/testfile2"), qualify("/testdir/")]) + cli_main(["mv", "/testfile1", "/testfile2", "/testdir/"]) client.get_file_info("/testdir/testfile1") client.get_file_info("/testdir/testfile2")