Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/origin/issue_420_richness_estim…
Browse files Browse the repository at this point in the history
…ators'

Updates issue #420

# Conflicts were:
#	lib/Biodiverse/GUI/BasedataImport.pm
#	lib/Biodiverse/GUI/Exclusions.pm
#	lib/Biodiverse/GUI/Export.pm
#	lib/Biodiverse/GUI/GUIManager.pm
#	lib/Biodiverse/GUI/MatrixImport.pm
#	lib/Biodiverse/GUI/Overlays.pm
#	lib/Biodiverse/GUI/ParametersTable.pm
#	lib/Biodiverse/GUI/PhylogenyImport.pm
#	lib/Biodiverse/GUI/Popup.pm
#	lib/Biodiverse/GUI/ProgressDialog.pm
#	lib/Biodiverse/GUI/Tabs/Clustering.pm
#	lib/Biodiverse/GUI/Tabs/Labels.pm
#	lib/Biodiverse/GUI/Tabs/Outputs.pm
#	lib/Biodiverse/GUI/Tabs/Randomise.pm
#	lib/Biodiverse/GUI/Tabs/Spatial.pm
#	lib/Biodiverse/GUI/Tabs/SpatialMatrix.pm
#	lib/Biodiverse/GUI/YesNoCancel.pm
#	lib/Biodiverse/Indices.pm
#	lib/Biodiverse/Metadata/Indices.pm
#	lib/Biodiverse/SpatialConditions.pm
#	t/lib/Biodiverse/TestHelpers.pm
  • Loading branch information
shawnlaffan committed Apr 30, 2016
2 parents 9673aeb + c68f533 commit 086cae6
Show file tree
Hide file tree
Showing 20 changed files with 2,789 additions and 34 deletions.
28 changes: 28 additions & 0 deletions bin/experiments/bench_explicit_return.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

use Benchmark qw {:all};
use 5.016;

$| = 1;


cmpthese (
-3,
{
er => sub {rand(); return},
nr => sub {rand(); },
}
);


__END__
# results:
50_000_000
Rate er nr
er 20517029/s -- -45%
nr 37202381/s 81% --
-3
Rate er nr
er 18205338/s -- -52%
nr 37690645/s 107% --
47 changes: 47 additions & 0 deletions bin/experiments/bench_get_params.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

use Benchmark qw {:all};
use 5.016;
use Data::Dumper;

#use List::Util qw {:all};

my @vals = 1 .. 20;
my $self = {PARAMS => {label_hash1 => 5, @vals}};

#my $n = 1000;
#my @a1 = (0 .. $n);

my $param = 'label_hash1';

cmpthese (
3000000,
{
old1 => sub {old1 ($self, $param)},
new1 => sub {new1 ($self, $param)},
new2 => sub {new2 ($self, $param)},
new3 => sub {new3 ($self, $param)},
}
);


#say Dumper $self;

sub old1 {
return if ! exists $_[0]->{PARAMS}{$_[1]};
return $_[0]->{PARAMS}{$_[1]};
}


sub new1 {
no autovivification;
return $_[0]->{PARAMS}{$_[1]};
}

sub new2 {
no autovivification;
$_[0]->{PARAMS}{$_[1]};
}

sub new3 {
exists $_[0]->{PARAMS}{$_[1]} && $_[0]->{PARAMS}{$_[1]};
}
105 changes: 105 additions & 0 deletions bin/experiments/bench_hash_key_set.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@

use Benchmark qw {:all};
use 5.016;
use Data::Dumper;

my @keys;
for (0..16000) {
push @keys, "$_:$_";
}

my @keys_outer = @keys[0..257];


$| = 1;

cmpthese (
3,
{
none => sub {no_set_keys()},
outer => sub {set_keys_outer()},
outer_init => sub {set_keys_outer_init()},
inner => sub {set_keys_inner()},
}
);


sub no_set_keys {
state $run_count;
$run_count ++;
say 'nsk ' . $run_count if !($run_count % 5);
my %hash;
foreach my $key1 (@keys_outer) {
foreach my $key2 (@keys) {
$hash{$key1}{$key2}++;
}
}

}

sub set_keys_outer {
state $run_count;
$run_count ++;
say 'sko ' . $run_count if !($run_count % 5);

my %hash;
keys %hash = scalar @keys_outer;
foreach my $key1 (@keys_outer) {
foreach my $key2 (@keys) {
$hash{$key1}{$key2}++;
}
}
}

sub set_keys_outer_init {
state $run_count;
$run_count ++;
say 'skoi ' . $run_count if !($run_count % 5);

my %hash;
keys %hash = scalar @keys_outer;
foreach my $key1 (@keys_outer) {
$hash{$key1} //= {};
foreach my $key2 (@keys) {
$hash{$key1}{$key2}++;
}
}
}

sub set_keys_inner {
state $run_count;
$run_count ++;
say 'ski ' . $run_count if !($run_count % 5);

my %hash;
keys %hash = scalar @keys_outer;
foreach my $key1 (@keys_outer) {
$hash{$key1} //= {};
keys %{$hash{$key1}} = scalar @keys;
foreach my $key2 (@keys) {
$hash{$key1}{$key2}++;
}
}
}

__END__
The differences are all in the noise.
results on HPC with 5.20.0 using rand() as the keys:
s/iter outer_init outer none inner
outer_init 5.17 -- -0% -0% -2%
outer 5.17 0% -- -0% -2%
none 5.17 0% 0% -- -2%
inner 5.07 2% 2% 2% --
Small relative improvement when using "$_:$_" as the keys,
but the absolute values are also far less than for rand() keys:
s/iter none outer outer_init inner
none 1.55 -- -1% -2% -5%
outer 1.53 1% -- -0% -4%
outer_init 1.52 2% 0% -- -4%
inner 1.47 6% 4% 4% --
124 changes: 124 additions & 0 deletions bin/experiments/bench_hash_slice_vs_for_and_last.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Benchmark two approaches wihch could be used to get a total tree path length
# It is actually to do with a hash slice vs for-last approach
use 5.016;

use Benchmark qw {:all};
use List::Util qw /pairs pairkeys pairvalues pairmap/;
use Test::More;

#srand (2000);

my $n = 200; # depth of the paths
my $m = 80; # number of paths
my %path_arrays; # ordered key-value pairs
my %path_hashes; # unordered key-value pairs
my %len_hash;

# generate a set of paths
foreach my $i (0 .. $m) {
my $same_to = int (rand() * $n/4);
my @a;
@a = map {(1+$m)*$i*$n+$_.'_', 1} (0 .. $same_to);
push @a, map {$_, 1} ($same_to+1 .. $n);
$path_arrays{$i} = \@a;
#say join ' ', @a;
my %hash = @a;
$path_hashes{$i} = \%hash;

@len_hash{keys %hash} = values %hash;
}


my $sliced = slice (\%path_hashes);
my $forled = for_last (\%path_arrays);
my $slice2 = slice_mk2 (\%path_hashes);

is_deeply ($forled, $sliced, 'slice results are the same');
is_deeply ($forled, $slice2, 'slice2 results are the same');


done_testing;

say "Testing $m paths of depth $n";
cmpthese (
-2,
{
sliced => sub {slice (\%path_hashes)},
slice2 => sub {slice_mk2 (\%path_hashes)},
forled => sub {for_last (\%path_arrays)},
}
);



sub slice {
my $paths = shift;

my %combined;

foreach my $path (values %$paths) {
@combined{keys %$path} = values %$path;
}

return \%combined;
}

# assign values at end
sub slice_mk2 {
my $paths = shift;

my %combined;

foreach my $path (values %$paths) {
@combined{keys %$path} = undef;
}

@combined{keys %combined} = @len_hash{keys %combined};

return \%combined;
}

sub for_last {
my $paths = shift;


my @keys = keys %$paths;
my $first = shift @keys;
my $first_list = $paths->{$first};

# initialise
my %combined;
@combined{pairkeys @$first_list} = pairvalues @$first_list;

foreach my $list (values %$paths) {
foreach my $pair (pairs @$list) {
my ($key, $val) = @$pair;
last if exists $combined{$key};
$combined{$key} = $val;
}
}

return \%combined;
}

1;

__END__
Sample results below.
Some runs have no meaningful difference from sliced to slice2,
but slice2 is always faster (even if only 2%).
Normally it is ~15% faster.
Testing 800 paths of depth 20
Rate forled sliced slice2
forled 59.2/s -- -69% -73%
sliced 192/s 225% -- -13%
slice2 222/s 275% 15% --
Testing 800 paths of depth 20
Rate forled sliced slice2
forled 49.5/s -- -74% -77%
sliced 194/s 292% -- -12%
slice2 219/s 342% 13% --
52 changes: 52 additions & 0 deletions bin/experiments/bench_or_vs_plus.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use 5.010;
use Test::More;
use Benchmark qw {:all};
#use List::Util qw {:all};

my $a = 1;
my $b = rand();
my $c = rand();

for $a (0, rand(), 1, 2000) {
my $result_or = !!use_or();
my $result_or2 = !!use_or2();
my $result_plus = !!use_plus();
my $result_factored = !!factored();

is ($result_or, $result_or2, 'use_or2()');
is ($result_or, $result_plus, 'use_plus()');
is ($result_or, $result_factored, 'factored()');

say "$a: $result_or, $result_or2, $result_plus, $result_factored";

cmpthese (
5000000,
{
or => sub {use_or ()},
or2 => sub {use_or2 ()},
plus => sub {use_plus ()},
factored => sub {factored ()},
}
);

}

done_testing();


sub use_or {
my $x = $a || $b and $a || $c;
}

sub use_or2 {
my $x = ($a || $b) && ($a || $c);
}

sub factored {
my $x = $a || ($b && $c);
}

sub use_plus {
my $x = $a + $b and $a + $c;
}

Loading

0 comments on commit 086cae6

Please sign in to comment.