Skip to content

Commit 137e366

Browse files
committed
feat: Handle duplicate dir names better
If we run `dust /usr/*/Trash` We see several 'Trash' directories in the output but do not know which user they belong to. This fix means if we see duplicate names in a directory we will display the parent directory name as well
1 parent a962b80 commit 137e366

File tree

6 files changed

+83
-2
lines changed

6 files changed

+83
-2
lines changed

src/display.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ fn clean_indentation_string(s: &str) -> String {
273273
is
274274
}
275275

276-
fn get_printable_name<P: AsRef<Path>>(dir_name: &P, short_paths: bool) -> String {
276+
pub fn get_printable_name<P: AsRef<Path>>(dir_name: &P, short_paths: bool) -> String {
277277
let dir_name = dir_name.as_ref();
278278
let printable_name = {
279279
if short_paths {

src/filter.rs

+65-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
use stfu8::encode_u8;
2+
3+
use crate::display::get_printable_name;
14
use crate::display_node::DisplayNode;
25
use crate::node::FileTime;
36
use crate::node::Node;
@@ -14,6 +17,7 @@ pub struct AggregateData {
1417
pub number_of_lines: usize,
1518
pub depth: usize,
1619
pub using_a_filter: bool,
20+
pub short_paths: bool,
1721
}
1822

1923
pub fn get_biggest(
@@ -40,13 +44,17 @@ pub fn get_biggest(
4044
} else {
4145
top_level_nodes.iter().map(|node| node.size).sum()
4246
};
47+
48+
let nodes = handle_duplicate_top_level_names(top_level_nodes, display_data.short_paths);
49+
4350
root = Node {
4451
name: PathBuf::from("(total)"),
4552
size,
46-
children: top_level_nodes,
53+
children: nodes,
4754
inode_device: None,
4855
depth: 0,
4956
};
57+
5058
// Always include the base nodes if we add a 'parent' (total) node
5159
heap = always_add_children(&display_data, &root, heap);
5260
} else {
@@ -74,6 +82,8 @@ pub fn fill_remaining_lines<'a>(
7482
let line = heap.pop();
7583
match line {
7684
Some(line) => {
85+
// If we are not doing only_file OR if we are doing
86+
// only_file and it has no children (ie is a file not a dir)
7787
if !display_data.only_file || line.children.is_empty() {
7888
allowed_nodes.insert(line.name.as_path(), line);
7989
}
@@ -161,3 +171,57 @@ fn build_display_node(mut new_children: Vec<DisplayNode>, current: &Node) -> Dis
161171
children: new_children,
162172
}
163173
}
174+
175+
fn names_have_dup(top_level_nodes: &Vec<Node>) -> bool {
176+
let mut stored = HashSet::new();
177+
for node in top_level_nodes {
178+
let name = get_printable_name(&node.name, true);
179+
if stored.contains(&name) {
180+
return true;
181+
}
182+
stored.insert(name);
183+
}
184+
false
185+
}
186+
187+
fn handle_duplicate_top_level_names(top_level_nodes: Vec<Node>, short_paths: bool) -> Vec<Node> {
188+
// If we have top level names that are the same - we need to tweak them:
189+
if short_paths && names_have_dup(&top_level_nodes) {
190+
let mut new_top_nodes = top_level_nodes.clone();
191+
let mut dir_walk_up_count = 0;
192+
193+
while names_have_dup(&new_top_nodes) && dir_walk_up_count < 10 {
194+
dir_walk_up_count += 1;
195+
let mut newer = vec![];
196+
197+
for node in new_top_nodes.iter() {
198+
let mut folders = node.name.iter().rev();
199+
// Get parent folder (if second time round get grandparent and so on)
200+
for _ in 0..dir_walk_up_count {
201+
folders.next();
202+
}
203+
match folders.next() {
204+
// Add (parent_name) to path of Node
205+
Some(data) => {
206+
let parent = encode_u8(data.as_encoded_bytes());
207+
let current_node = node.name.display();
208+
let n = Node {
209+
name: PathBuf::from(format!("{current_node}({parent})")),
210+
size: node.size,
211+
children: node.children.clone(),
212+
inode_device: node.inode_device,
213+
depth: node.depth,
214+
};
215+
newer.push(n)
216+
}
217+
// Node does not have a parent
218+
None => newer.push(node.clone()),
219+
}
220+
}
221+
new_top_nodes = newer;
222+
}
223+
new_top_nodes
224+
} else {
225+
top_level_nodes
226+
}
227+
}

src/main.rs

+1
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ fn main() {
293293
number_of_lines,
294294
depth,
295295
using_a_filter: !filter_regexs.is_empty() || !invert_filter_regexs.is_empty(),
296+
short_paths: !config.get_full_paths(&options),
296297
};
297298
get_biggest(top_level_nodes, agg_data, &by_filetime, keep_collapsed)
298299
}

tests/test_dir_matching/andy/dup_name/hello

Whitespace-only changes.

tests/test_dir_matching/dave/dup_name/hello

Whitespace-only changes.

tests/test_flags.rs

+16
Original file line numberDiff line numberDiff line change
@@ -261,3 +261,19 @@ pub fn test_collapse() {
261261
assert!(output.contains("many"));
262262
assert!(!output.contains("hello_file"));
263263
}
264+
265+
#[test]
266+
pub fn test_handle_duplicate_names() {
267+
// Check that even if we run on a multiple directories with the same name
268+
// we still show the distinct parent dir in the output
269+
let output = build_command(vec![
270+
"tests/test_dir_matching/dave/dup_name",
271+
"tests/test_dir_matching/andy/dup_name",
272+
"ci",
273+
]);
274+
assert!(output.contains("andy"));
275+
assert!(output.contains("dave"));
276+
assert!(output.contains("ci"));
277+
assert!(output.contains("dup_name"));
278+
assert!(!output.contains("test_dir_matching"));
279+
}

0 commit comments

Comments
 (0)