From 17eadd441659bbc3e17600d5a0388d4079a3fc03 Mon Sep 17 00:00:00 2001
From: Adam Binford <adamq43@gmail.com>
Date: Sun, 17 Mar 2024 18:12:25 -0400
Subject: [PATCH 1/2] Remove need for hadoop binary

---
 README.md                                     |  2 +-
 .../minidfs/src/main/java/main/Main.java      | 13 +++++++
 crates/hdfs-native/src/minidfs.rs             |  2 +
 crates/hdfs-native/tests/common/mod.rs        | 37 -------------------
 crates/hdfs-native/tests/test_integration.rs  | 10 +++--
 crates/hdfs-native/tests/test_read.rs         |  9 +++--
 crates/hdfs-native/tests/test_write.rs        |  9 +++--
 7 files changed, 35 insertions(+), 47 deletions(-)

diff --git a/README.md b/README.md
index 8a172c2..f4d873b 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ cargo build --features token,kerberos
 An object_store implementation for HDFS is provided in the [hdfs-native-object-store](./crates/hdfs-native-object-store/) crate.
 
 ## Running tests
-The tests are mostly integration tests that utilize a small Java application in `rust/mindifs/` that runs a custom `MiniDFSCluster`. To run the tests, you need to have Java, Maven, Hadoop binaries, and Kerberos tools available and on your path. Any Java version between 8 and 17 should work.
+The tests are mostly integration tests that utilize a small Java application in `rust/mindifs/` that runs a custom `MiniDFSCluster`. To run the tests, you need to have Java, Maven, and Kerberos tools available and on your path. Any Java version between 8 and 17 should work.
 
 ```bash
 cargo test -p hdfs-native --features token,kerberos,intergation-test
diff --git a/crates/hdfs-native/minidfs/src/main/java/main/Main.java b/crates/hdfs-native/minidfs/src/main/java/main/Main.java
index 410577d..af6336e 100644
--- a/crates/hdfs-native/minidfs/src/main/java/main/Main.java
+++ b/crates/hdfs-native/minidfs/src/main/java/main/Main.java
@@ -9,6 +9,8 @@
 import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
@@ -34,6 +36,8 @@
 
 public class Main {
 
+    static int TEST_FILE_INTS = 64 * 1024 * 1024;
+
     public static void main(String args[]) throws Exception {
         Set<String> flags = new HashSet<>();
         for (String arg : args) {
@@ -162,6 +166,15 @@ public static void main(String args[]) throws Exception {
 
         hdfsConf.writeXml(new FileOutputStream("target/test/core-site.xml"));
 
+        if (flags.contains("testfile")) {
+            FileSystem fs = FileSystem.get(hdfsConf);
+            FSDataOutputStream os = fs.create(new Path("/testfile"));
+            for (int i=0; i < TEST_FILE_INTS; i++) {
+                os.writeInt(i);
+            }
+            os.close();
+        }
+
         System.out.println("Ready!");
         if (flags.contains("security")) {
             System.out.println(kdc.getKrb5conf().toPath().toString());
diff --git a/crates/hdfs-native/src/minidfs.rs b/crates/hdfs-native/src/minidfs.rs
index 8bc0389..9a98092 100644
--- a/crates/hdfs-native/src/minidfs.rs
+++ b/crates/hdfs-native/src/minidfs.rs
@@ -9,6 +9,7 @@ use which::which;
 
 #[derive(PartialEq, Eq, Hash, Debug)]
 pub enum DfsFeatures {
+    TESTFILE,
     SECURITY,
     TOKEN,
     PRIVACY,
@@ -21,6 +22,7 @@ pub enum DfsFeatures {
 impl DfsFeatures {
     pub fn as_str(&self) -> &str {
         match self {
+            DfsFeatures::TESTFILE => "testfile",
             DfsFeatures::EC => "ec",
             DfsFeatures::HA => "ha",
             DfsFeatures::VIEWFS => "viewfs",
diff --git a/crates/hdfs-native/tests/common/mod.rs b/crates/hdfs-native/tests/common/mod.rs
index 047df63..48eba64 100644
--- a/crates/hdfs-native/tests/common/mod.rs
+++ b/crates/hdfs-native/tests/common/mod.rs
@@ -1,45 +1,8 @@
 #![allow(dead_code)]
 use bytes::Buf;
-use std::collections::HashSet;
-use std::io::{BufWriter, Write};
-use std::process::Command;
-use tempfile::NamedTempFile;
-use which::which;
-
-use hdfs_native::minidfs::{DfsFeatures, MiniDfs};
 
 pub const TEST_FILE_INTS: usize = 64 * 1024 * 1024;
 
-pub fn setup(features: &HashSet<DfsFeatures>) -> MiniDfs {
-    let hadoop_exc = which("hadoop").expect("Failed to find hadoop executable");
-
-    let dfs = MiniDfs::with_features(features);
-
-    let mut file = NamedTempFile::new_in("target/test").unwrap();
-    {
-        let mut writer = BufWriter::new(file.as_file_mut());
-        for i in 0..TEST_FILE_INTS as i32 {
-            let bytes = i.to_be_bytes();
-            writer.write_all(&bytes).unwrap();
-        }
-        writer.flush().unwrap();
-    }
-
-    let status = Command::new(hadoop_exc)
-        .args([
-            "fs",
-            "-copyFromLocal",
-            "-f",
-            file.path().to_str().unwrap(),
-            &format!("{}/testfile", dfs.url),
-        ])
-        .status()
-        .unwrap();
-    assert!(status.success());
-
-    dfs
-}
-
 pub fn assert_bufs_equal(buf1: &impl Buf, buf2: &impl Buf, message: Option<String>) {
     assert_eq!(buf1.chunk().len(), buf2.chunk().len());
 
diff --git a/crates/hdfs-native/tests/test_integration.rs b/crates/hdfs-native/tests/test_integration.rs
index 8dadbbd..15986eb 100644
--- a/crates/hdfs-native/tests/test_integration.rs
+++ b/crates/hdfs-native/tests/test_integration.rs
@@ -3,9 +3,13 @@ mod common;
 
 #[cfg(feature = "integration-test")]
 mod test {
-    use crate::common::{assert_bufs_equal, setup, TEST_FILE_INTS};
+    use crate::common::{assert_bufs_equal, TEST_FILE_INTS};
     use bytes::{BufMut, BytesMut};
-    use hdfs_native::{client::FileStatus, minidfs::DfsFeatures, Client, Result, WriteOptions};
+    use hdfs_native::{
+        client::FileStatus,
+        minidfs::{DfsFeatures, MiniDfs},
+        Client, Result, WriteOptions,
+    };
     use serial_test::serial;
     use std::collections::HashSet;
 
@@ -104,7 +108,7 @@ mod test {
     pub async fn test_with_features(features: &HashSet<DfsFeatures>) -> Result<()> {
         let _ = env_logger::builder().is_test(true).try_init();
 
-        let _dfs = setup(features);
+        let _dfs = MiniDfs::with_features(features);
         let client = Client::default();
 
         test_file_info(&client).await?;
diff --git a/crates/hdfs-native/tests/test_read.rs b/crates/hdfs-native/tests/test_read.rs
index fad48da..e9852a6 100644
--- a/crates/hdfs-native/tests/test_read.rs
+++ b/crates/hdfs-native/tests/test_read.rs
@@ -3,9 +3,12 @@ mod common;
 
 #[cfg(feature = "integration-test")]
 mod test {
-    use crate::common::{setup, TEST_FILE_INTS};
+    use crate::common::TEST_FILE_INTS;
     use bytes::Buf;
-    use hdfs_native::{minidfs::DfsFeatures, Client, Result};
+    use hdfs_native::{
+        minidfs::{DfsFeatures, MiniDfs},
+        Client, Result,
+    };
     use serial_test::serial;
     use std::collections::HashSet;
 
@@ -14,7 +17,7 @@ mod test {
     async fn test_read() -> Result<()> {
         let _ = env_logger::builder().is_test(true).try_init();
 
-        let _dfs = setup(&HashSet::from([DfsFeatures::HA]));
+        let _dfs = MiniDfs::with_features(&HashSet::from([DfsFeatures::HA, DfsFeatures::TESTFILE]));
         let client = Client::default();
 
         // Read the whole file
diff --git a/crates/hdfs-native/tests/test_write.rs b/crates/hdfs-native/tests/test_write.rs
index 93f1267..3886e78 100644
--- a/crates/hdfs-native/tests/test_write.rs
+++ b/crates/hdfs-native/tests/test_write.rs
@@ -3,9 +3,12 @@ mod common;
 
 #[cfg(feature = "integration-test")]
 mod test {
-    use crate::common::{assert_bufs_equal, setup};
+    use crate::common::assert_bufs_equal;
     use bytes::{BufMut, BytesMut};
-    use hdfs_native::{minidfs::DfsFeatures, Client, Result, WriteOptions};
+    use hdfs_native::{
+        minidfs::{DfsFeatures, MiniDfs},
+        Client, Result, WriteOptions,
+    };
     use serial_test::serial;
     use std::collections::HashSet;
 
@@ -14,7 +17,7 @@ mod test {
     async fn test_write() {
         let _ = env_logger::builder().is_test(true).try_init();
 
-        let _dfs = setup(&HashSet::from([DfsFeatures::HA]));
+        let _dfs = MiniDfs::with_features(&HashSet::from([DfsFeatures::HA]));
         let client = Client::default();
 
         test_create(&client).await.unwrap();

From 641ecdf8da684d789c618894fbea740124ff562e Mon Sep 17 00:00:00 2001
From: Adam Binford <adamq43@gmail.com>
Date: Mon, 18 Mar 2024 19:18:14 -0400
Subject: [PATCH 2/2] Add testfile feature for normal integration tests

---
 crates/hdfs-native/tests/test_integration.rs | 31 ++++++++++----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/crates/hdfs-native/tests/test_integration.rs b/crates/hdfs-native/tests/test_integration.rs
index 15986eb..0ec6247 100644
--- a/crates/hdfs-native/tests/test_integration.rs
+++ b/crates/hdfs-native/tests/test_integration.rs
@@ -16,14 +16,14 @@ mod test {
     #[tokio::test]
     #[serial]
     async fn test_basic() {
-        test_with_features(&HashSet::new()).await.unwrap();
+        test_with_features(HashSet::new()).await.unwrap();
     }
 
     #[tokio::test]
     #[serial]
     #[cfg(feature = "kerberos")]
     async fn test_security_kerberos() {
-        test_with_features(&HashSet::from([DfsFeatures::SECURITY]))
+        test_with_features(HashSet::from([DfsFeatures::SECURITY]))
             .await
             .unwrap();
     }
@@ -32,7 +32,7 @@ mod test {
     #[serial]
     #[cfg(feature = "token")]
     async fn test_security_token() {
-        test_with_features(&HashSet::from([DfsFeatures::SECURITY, DfsFeatures::TOKEN]))
+        test_with_features(HashSet::from([DfsFeatures::SECURITY, DfsFeatures::TOKEN]))
             .await
             .unwrap();
     }
@@ -42,7 +42,7 @@ mod test {
     #[serial]
     #[cfg(feature = "token")]
     async fn test_privacy_token() {
-        test_with_features(&HashSet::from([
+        test_with_features(HashSet::from([
             DfsFeatures::SECURITY,
             DfsFeatures::TOKEN,
             DfsFeatures::PRIVACY,
@@ -55,18 +55,15 @@ mod test {
     #[serial]
     #[cfg(feature = "kerberos")]
     async fn test_privacy_kerberos() {
-        test_with_features(&HashSet::from([
-            DfsFeatures::SECURITY,
-            DfsFeatures::PRIVACY,
-        ]))
-        .await
-        .unwrap();
+        test_with_features(HashSet::from([DfsFeatures::SECURITY, DfsFeatures::PRIVACY]))
+            .await
+            .unwrap();
     }
 
     #[tokio::test]
     #[serial]
     async fn test_basic_ha() {
-        test_with_features(&HashSet::from([DfsFeatures::HA]))
+        test_with_features(HashSet::from([DfsFeatures::HA]))
             .await
             .unwrap();
     }
@@ -75,7 +72,7 @@ mod test {
     #[serial]
     #[cfg(feature = "kerberos")]
     async fn test_security_privacy_ha() {
-        test_with_features(&HashSet::from([
+        test_with_features(HashSet::from([
             DfsFeatures::SECURITY,
             DfsFeatures::PRIVACY,
             DfsFeatures::HA,
@@ -88,7 +85,7 @@ mod test {
     #[serial]
     #[cfg(feature = "token")]
     async fn test_security_token_ha() {
-        test_with_features(&HashSet::from([
+        test_with_features(HashSet::from([
             DfsFeatures::SECURITY,
             DfsFeatures::TOKEN,
             DfsFeatures::HA,
@@ -100,15 +97,17 @@ mod test {
     #[tokio::test]
     #[serial]
     async fn test_rbf() {
-        test_with_features(&HashSet::from([DfsFeatures::RBF]))
+        test_with_features(HashSet::from([DfsFeatures::RBF]))
             .await
             .unwrap();
     }
 
-    pub async fn test_with_features(features: &HashSet<DfsFeatures>) -> Result<()> {
+    pub async fn test_with_features(mut features: HashSet<DfsFeatures>) -> Result<()> {
         let _ = env_logger::builder().is_test(true).try_init();
 
-        let _dfs = MiniDfs::with_features(features);
+        features.insert(DfsFeatures::TESTFILE);
+
+        let _dfs = MiniDfs::with_features(&features);
         let client = Client::default();
 
         test_file_info(&client).await?;