Skip to content

Commit

Permalink
Add more tests for Chao1 calcs.
Browse files Browse the repository at this point in the history
As with the Chao2 calcs in 0e2fb57 we now check for cases (F1==0 && F2>0), (F1 > 0 && F2==0) and (F1==0 && F2==0).

Flushes out some errors with the variance calcs in the process.

Updates issue #420
  • Loading branch information
shawnlaffan committed Apr 25, 2016
1 parent 0e2fb57 commit d8c9731
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 10 deletions.
30 changes: 20 additions & 10 deletions lib/Biodiverse/Indices/RichnessEstimation.pm
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
package Biodiverse::Indices::RichnessEstimation;

use 5.016;

use strict;
use warnings;
use Carp;
Expand All @@ -9,6 +12,10 @@ our $VERSION = '1.99_001';

my $metadata_class = 'Biodiverse::Metadata::Indices';

use Readonly;

#Readonly my $z => 1.959964; # currently hard coded for 0.95
Readonly my $z => 1.96; # should increase precision at some point

sub get_metadata_calc_chao1 {
my %metadata = (
Expand Down Expand Up @@ -102,7 +109,7 @@ sub calc_chao1 {
else {
# if only one singleton and no doubletons then the estimate stays zero
$variance_uses_eq8 = 1;
$chao_formula = undef;
$chao_formula = 0;
}
}

Expand All @@ -112,22 +119,25 @@ sub calc_chao1 {

if ($variance_uses_eq7) {
$variance = $cn1 * ($f1 * ($f1 - 1)) / 2
+ $cn2 * ($f1 * (2 * $f1 - 1)) ** 2
- $cn2 * $f1 ** 4 / (4 * $chao);
$chao_formula = undef;
+ $cn2 * $f1 * (2 * $f1 - 1)**2 / 4
- $cn2 * $f1 ** 4 / 4 / $chao;
#$chao_formula = 0;
}
elsif ($variance_uses_eq8) {
my %sums;
foreach my $freq (values %$label_hash) {
$sums{$freq} ++;
}
my ($part1, $part2);
while (my ($i, $f) = each %sums) {
$part1 += $f * (exp -$i - exp (-2 * $i));
#while (my ($i, $f) = each %sums) {
foreach my $i (sort {$a <=> $b} keys %sums) {
my $f = $sums{$i};
say "$i $f";
$part1 += $f * (exp (-$i) - exp (-2 * $i));
$part2 += $i * exp (-$i) * $f;
}
$variance = $part1 - $part2 ** 2 / $n;
$chao_formula = undef;
$chao_formula = 0;
}

$variance = max (0, $variance);
Expand Down Expand Up @@ -339,7 +349,7 @@ sub _calc_chao_confidence_intervals {
my $K;
eval {
no warnings qw /numeric uninitialized/;
$K = exp (1.96 * sqrt (log (1 + $variance / $T ** 2)));
$K = exp ($z * sqrt (log (1 + $variance / $T ** 2)));
$lower = $richness + $T / $K;
$upper = $richness + $T * $K;
};
Expand All @@ -353,11 +363,11 @@ sub _calc_chao_confidence_intervals {
# set CIs to undefined if we only have singletons/uniques
if (! (scalar keys %sums == 1 && exists $sums{1})) {
while (my ($f, $count) = each %sums) {
$P += $count * exp -$f;
$P += $count * exp (-$f);
}
$P /= $richness;
my $part1 = $richness / (1 - $P);
my $part2 = 1.96 * sqrt ($variance) / (1 - $P);
my $part2 = $z * sqrt ($variance) / (1 - $P);
$lower = max ($richness, $part1 - $part2);
$upper = $part1 + $part2;
}
Expand Down
136 changes: 136 additions & 0 deletions t/24-Indices_richness_estimators.t
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,142 @@ sub test_indices_1col {
}


# chao2 differs when q1 or q2 are zero
sub test_chao1_F2_no_F1 {
my $bd = shift->clone;

my $focal_gp = 'Broad_Meadow_Brook';

# need to ensure there are no uniques - bump their sample counts
foreach my $label ($bd->get_labels) {
if ($bd->get_label_sample_count(element => $label) == 1) {
$bd->add_element (group => $focal_gp, label => $label, count => 20);
}
}

my @nbr_groups = grep {$_ ne $focal_gp} $bd->get_groups;

my $results2 = {
CHAO1 => 26, CHAO1_SE => 0.8749986,
CHAO1_F1_COUNT => 0, CHAO1_F2_COUNT => 5,
CHAO1_UNDETECTED => 0, CHAO1_VARIANCE => 0.7656226,
CHAO1_CI_LOWER => 26, CHAO1_CI_UPPER => 28.680569,
CHAO1_META => {
CHAO_FORMULA => 0,
CI_FORMULA => 14,
VARIANCE_FORMULA => 8,
},
};

my %expected_results = (2 => $results2);

run_indices_test1 (
calcs_to_test => [qw/
calc_chao1
/],
sort_array_lists => 1,
basedata_ref => $bd,
element_list1 => ['Broad_Meadow_Brook'],
element_list2 => \@nbr_groups,
expected_results => \%expected_results,
skip_nbr_counts => {1 => 1},
#generate_result_sets => 1,
descr_suffix => 'test_chao1_F2_no_F1',
);

return;
}

sub test_chao1_F1_no_F2 {
my $bd = shift->clone;

my $focal_gp = 'Broad_Meadow_Brook';

# need to ensure there are no doubles
foreach my $label ($bd->get_labels) {
if ($bd->get_label_sample_count(element => $label) == 2) {
$bd->add_element (group => $focal_gp, label => $label, count => 20);
}
}

my @nbr_groups = grep {$_ ne $focal_gp} $bd->get_groups;

my $results2 = {
CHAO1 => 31.986014, CHAO1_SE => 7.264041,
CHAO1_F1_COUNT => 4, CHAO1_F2_COUNT => 0,
CHAO1_UNDETECTED => 5.986014, CHAO1_VARIANCE => 52.766285,
CHAO1_CI_LOWER => 26.9273497, CHAO1_CI_UPPER => 64.6395356295,
CHAO1_META => {
CHAO_FORMULA => 2,
CI_FORMULA => 13,
VARIANCE_FORMULA => 7,
},
};

my %expected_results = (2 => $results2);

run_indices_test1 (
calcs_to_test => [qw/
calc_chao1
/],
sort_array_lists => 1,
basedata_ref => $bd,
element_list1 => ['Broad_Meadow_Brook'],
element_list2 => \@nbr_groups,
expected_results => \%expected_results,
skip_nbr_counts => {1 => 1},
#generate_result_sets => 1,
descr_suffix => 'test_chao1_F1_no_F2',
);

return;
}

sub test_chao1_no_F1_no_F2 {
my $bd = shift->clone;

my $focal_gp = 'Broad_Meadow_Brook';

# need to ensure there are no uniques or doubles - make them all occur everywhere
foreach my $label ($bd->get_labels) {
my $sc = $bd->get_label_sample_count (element => $label);
if ($sc == 1 || $sc == 2) {
$bd->add_element (group => $focal_gp, label => $label, count => 20);
}
}

my @nbr_groups = grep {$_ ne $focal_gp} $bd->get_groups;

my $results2 = {
CHAO1 => 26, CHAO1_SE => 0.43544849,
CHAO1_F1_COUNT => 0, CHAO1_F2_COUNT => 0,
CHAO1_UNDETECTED => 0, CHAO1_VARIANCE => 0.1896153894,
CHAO1_CI_LOWER => 26, CHAO1_CI_UPPER => 27.0602293,
CHAO1_META => {
CHAO_FORMULA => 0,
CI_FORMULA => 14,
VARIANCE_FORMULA => 8,
},
};

my %expected_results = (2 => $results2);

run_indices_test1 (
calcs_to_test => [qw/
calc_chao1
/],
sort_array_lists => 1,
basedata_ref => $bd,
element_list1 => ['Broad_Meadow_Brook'],
element_list2 => \@nbr_groups,
expected_results => \%expected_results,
skip_nbr_counts => {1 => 1},
#generate_result_sets => 1,
descr_suffix => 'test_chao1_no_F1_no_F2',
);

return;
}
# chao2 differs when q1 or q2 are zero
sub test_chao2_Q2_no_Q1 {
my $bd = shift->clone;
Expand Down

0 comments on commit d8c9731

Please sign in to comment.