Merge remote-tracking branch 'remotes/origin/issue_420_richness_estim…

…ators' Updates issue #420 # Conflicts were: # lib/Biodiverse/GUI/BasedataImport.pm # lib/Biodiverse/GUI/Exclusions.pm # lib/Biodiverse/GUI/Export.pm # lib/Biodiverse/GUI/GUIManager.pm # lib/Biodiverse/GUI/MatrixImport.pm # lib/Biodiverse/GUI/Overlays.pm # lib/Biodiverse/GUI/ParametersTable.pm # lib/Biodiverse/GUI/PhylogenyImport.pm # lib/Biodiverse/GUI/Popup.pm # lib/Biodiverse/GUI/ProgressDialog.pm # lib/Biodiverse/GUI/Tabs/Clustering.pm # lib/Biodiverse/GUI/Tabs/Labels.pm # lib/Biodiverse/GUI/Tabs/Outputs.pm # lib/Biodiverse/GUI/Tabs/Randomise.pm # lib/Biodiverse/GUI/Tabs/Spatial.pm # lib/Biodiverse/GUI/Tabs/SpatialMatrix.pm # lib/Biodiverse/GUI/YesNoCancel.pm # lib/Biodiverse/Indices.pm # lib/Biodiverse/Metadata/Indices.pm # lib/Biodiverse/SpatialConditions.pm # t/lib/Biodiverse/TestHelpers.pm
shawnlaffan · Apr 30, 2016 · 086cae6 · 086cae6
2 parents 9673aeb + c68f533
commit 086cae6
Show file tree

Hide file tree

Showing 20 changed files with 2,789 additions and 34 deletions.
diff --git a/bin/experiments/bench_explicit_return.pl b/bin/experiments/bench_explicit_return.pl
@@ -0,0 +1,28 @@
+
+use Benchmark qw {:all};
+use 5.016;
+
+$| = 1;
+
+
+cmpthese (
+    -3,
+    {
+        er => sub {rand(); return},
+        nr => sub {rand(); },
+    }
+);
+
+
+__END__
+
+#  results:
+50_000_000
+        Rate   er   nr
+er 20517029/s   -- -45%
+nr 37202381/s  81%   --
+
+-3
+         Rate   er   nr
+er 18205338/s   -- -52%
+nr 37690645/s 107%   --
diff --git a/bin/experiments/bench_get_params.pl b/bin/experiments/bench_get_params.pl
@@ -0,0 +1,47 @@
+
+use Benchmark qw {:all};
+use 5.016;
+use Data::Dumper;
+
+#use List::Util qw {:all};
+
+my @vals = 1 .. 20;
+my $self = {PARAMS => {label_hash1 => 5, @vals}};
+
+#my $n = 1000;
+#my @a1 = (0 .. $n);
+
+my $param = 'label_hash1';
+
+cmpthese (
+    3000000,
+    {
+        old1 => sub {old1 ($self, $param)},
+        new1 => sub {new1 ($self, $param)},
+        new2 => sub {new2 ($self, $param)},
+        new3 => sub {new3 ($self, $param)},
+    }
+);
+
+
+#say Dumper $self;
+
+sub old1 {
+    return if ! exists $_[0]->{PARAMS}{$_[1]};
+    return $_[0]->{PARAMS}{$_[1]};
+}
+
+
+sub new1 {
+    no autovivification;
+    return $_[0]->{PARAMS}{$_[1]};
+}
+
+sub new2 {
+    no autovivification;
+    $_[0]->{PARAMS}{$_[1]};
+}
+
+sub new3 {
+    exists $_[0]->{PARAMS}{$_[1]} && $_[0]->{PARAMS}{$_[1]};
+}
diff --git a/bin/experiments/bench_hash_key_set.pl b/bin/experiments/bench_hash_key_set.pl
@@ -0,0 +1,105 @@
+
+use Benchmark qw {:all};
+use 5.016;
+use Data::Dumper;
+
+my @keys;
+for (0..16000) {
+    push @keys, "$_:$_";
+}
+
+my @keys_outer = @keys[0..257];
+
+
+$| = 1;
+
+cmpthese (
+    3,
+    {
+        none       => sub {no_set_keys()},
+        outer      => sub {set_keys_outer()},
+        outer_init => sub {set_keys_outer_init()},
+        inner      => sub {set_keys_inner()},
+    }
+);
+
+
+sub no_set_keys {
+    state $run_count;
+    $run_count ++;
+    say 'nsk ' . $run_count if !($run_count % 5);
+    my %hash;
+    foreach my $key1 (@keys_outer) {
+        foreach my $key2 (@keys) {
+            $hash{$key1}{$key2}++;
+        }
+    }
+
+}
+
+sub set_keys_outer {
+    state $run_count;
+    $run_count ++;
+    say 'sko ' . $run_count if !($run_count % 5);
+
+    my %hash;
+    keys %hash = scalar @keys_outer;
+    foreach my $key1 (@keys_outer) {
+        foreach my $key2 (@keys) {
+            $hash{$key1}{$key2}++;
+        }
+    }
+}
+
+sub set_keys_outer_init {
+    state $run_count;
+    $run_count ++;
+    say 'skoi ' . $run_count if !($run_count % 5);
+
+    my %hash;
+    keys %hash = scalar @keys_outer;
+    foreach my $key1 (@keys_outer) {
+        $hash{$key1} //= {};
+        foreach my $key2 (@keys) {
+            $hash{$key1}{$key2}++;
+        }
+    }
+}
+
+sub set_keys_inner {
+    state $run_count;
+    $run_count ++;
+    say 'ski ' . $run_count if !($run_count % 5);
+
+    my %hash;
+    keys %hash = scalar @keys_outer;
+    foreach my $key1 (@keys_outer) {
+        $hash{$key1} //= {};
+        keys %{$hash{$key1}} = scalar @keys;
+        foreach my $key2 (@keys) {
+            $hash{$key1}{$key2}++;
+        }
+    }
+}
+
+__END__
+
+The differences are all in the noise.
+
+results on HPC with 5.20.0 using rand() as the keys:
+
+           s/iter outer_init      outer       none      inner
+outer_init   5.17         --        -0%        -0%        -2%
+outer        5.17         0%         --        -0%        -2%
+none         5.17         0%         0%         --        -2%
+inner        5.07         2%         2%         2%         --
+
+
+Small relative improvement when using "$_:$_" as the keys,
+but the absolute values are also far less than for rand() keys:
+
+           s/iter       none      outer outer_init      inner
+none         1.55         --        -1%        -2%        -5%
+outer        1.53         1%         --        -0%        -4%
+outer_init   1.52         2%         0%         --        -4%
+inner        1.47         6%         4%         4%         --
diff --git a/bin/experiments/bench_hash_slice_vs_for_and_last.pl b/bin/experiments/bench_hash_slice_vs_for_and_last.pl
@@ -0,0 +1,124 @@
+#  Benchmark two approaches wihch could be used to get a total tree path length
+#  It is actually to do with a hash slice vs for-last approach
+use 5.016;
+
+use Benchmark qw {:all};
+use List::Util qw /pairs pairkeys pairvalues pairmap/;
+use Test::More;
+
+#srand (2000);
+
+my $n = 200; #  depth of the paths
+my $m = 80;  #  number of paths
+my %path_arrays;  #  ordered key-value pairs
+my %path_hashes;  #  unordered key-value pairs
+my %len_hash;
+
+#  generate a set of paths 
+foreach my $i (0 .. $m) {
+    my $same_to = int (rand() * $n/4);
+    my @a;
+    @a = map {(1+$m)*$i*$n+$_.'_', 1} (0 .. $same_to);
+    push @a, map {$_, 1} ($same_to+1 .. $n);
+    $path_arrays{$i} = \@a;
+    #say join ' ', @a;
+    my %hash = @a;
+    $path_hashes{$i} = \%hash;
+
+    @len_hash{keys %hash} = values %hash;
+}
+
+
+my $sliced = slice (\%path_hashes);
+my $forled = for_last (\%path_arrays);
+my $slice2 = slice_mk2 (\%path_hashes);
+
+is_deeply ($forled, $sliced, 'slice results are the same');
+is_deeply ($forled, $slice2, 'slice2 results are the same');
+
+
+done_testing;
+
+say "Testing $m paths of depth $n";
+cmpthese (
+    -2,
+    {
+        sliced => sub {slice (\%path_hashes)},
+        slice2 => sub {slice_mk2 (\%path_hashes)},
+        forled => sub {for_last (\%path_arrays)},
+    }
+);
+
+
+
+sub slice {
+    my $paths = shift;
+
+    my %combined;
+
+    foreach my $path (values %$paths) {
+        @combined{keys %$path} = values %$path;
+    }
+
+    return \%combined;
+}
+
+#  assign values at end
+sub slice_mk2 {
+    my $paths = shift;
+
+    my %combined;
+
+    foreach my $path (values %$paths) {
+        @combined{keys %$path} = undef;
+    }
+
+    @combined{keys %combined} = @len_hash{keys %combined};
+
+    return \%combined;
+}
+
+sub for_last {
+    my $paths = shift;
+
+
+    my @keys = keys %$paths;
+    my $first = shift @keys;
+    my $first_list = $paths->{$first};
+
+    #  initialise
+    my %combined;
+    @combined{pairkeys @$first_list} = pairvalues @$first_list;
+
+    foreach my $list (values %$paths) {
+        foreach my $pair (pairs @$list) {
+            my ($key, $val) = @$pair;
+            last if exists $combined{$key};
+            $combined{$key} = $val;
+        }
+    }
+
+    return \%combined;
+}
+
+1;
+
+__END__
+
+Sample results below.
+Some runs have no meaningful difference from sliced to slice2,
+but slice2 is always faster (even if only 2%).
+Normally it is ~15% faster.  
+
+
+Testing 800 paths of depth 20
+         Rate forled sliced slice2
+forled 59.2/s     --   -69%   -73%
+sliced  192/s   225%     --   -13%
+slice2  222/s   275%    15%     --
+
+Testing 800 paths of depth 20
+         Rate forled sliced slice2
+forled 49.5/s     --   -74%   -77%
+sliced  194/s   292%     --   -12%
+slice2  219/s   342%    13%     --
diff --git a/bin/experiments/bench_or_vs_plus.pl b/bin/experiments/bench_or_vs_plus.pl
@@ -0,0 +1,52 @@
+use 5.010;
+use Test::More;
+use Benchmark qw {:all};
+#use List::Util qw {:all};
+
+my $a = 1;
+my $b = rand();
+my $c = rand();
+
+for $a (0, rand(), 1, 2000) {
+    my $result_or   = !!use_or();
+    my $result_or2  = !!use_or2();
+    my $result_plus = !!use_plus();
+    my $result_factored = !!factored();
+
+    is ($result_or, $result_or2,  'use_or2()');
+    is ($result_or, $result_plus, 'use_plus()');
+    is ($result_or, $result_factored, 'factored()');
+
+    say "$a: $result_or, $result_or2, $result_plus, $result_factored";
+
+    cmpthese (
+        5000000,
+        {
+            or   => sub {use_or ()},
+            or2  => sub {use_or2 ()},
+            plus => sub {use_plus ()},
+            factored => sub {factored ()},
+        }
+    );
+
+}
+
+done_testing();
+
+
+sub use_or {
+    my $x = $a || $b and $a || $c;
+}
+
+sub use_or2 {
+    my $x = ($a || $b) && ($a || $c);
+}
+
+sub factored {
+    my $x = $a || ($b && $c);
+}
+
+sub use_plus {
+    my $x = $a + $b and $a + $c;
+}
+