Skip to content

Commit

Permalink
Simplify static_dict
Browse files Browse the repository at this point in the history
* Convert `IsMatch(dict, ...)` into a member method. The original `IsMatch` is marked as deprecated - and we probably can delete it, but it will require a major release due to being a breaking change. So for now, lets keep it, but remove it when releasing a new breaking version
* Same for `BrotliFindAllStaticDictionaryMatches` and a few more methods
  (they were not moved though to allow for easier review)
* lots of internal simplifications - like using direct comparisons, using bools, etc
  • Loading branch information
nyurik committed May 27, 2024
1 parent 85196be commit aa0a2f8
Show file tree
Hide file tree
Showing 4 changed files with 497 additions and 566 deletions.
9 changes: 3 additions & 6 deletions src/enc/backward_references/hq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ use crate::enc::command::{
use crate::enc::constants::{kCopyExtra, kInsExtra};
use crate::enc::encode;
use crate::enc::literal_cost::BrotliEstimateBitCostsForLiterals;
use crate::enc::static_dict::{
BrotliDictionary, BrotliFindAllStaticDictionaryMatches, FindMatchLengthWithLimit,
};
use crate::enc::static_dict::{BrotliDictionary, FindMatchLengthWithLimit};
use crate::enc::util::{floatX, FastLog2, FastLog2f64};

const BROTLI_WINDOW_GAP: usize = 16;
Expand Down Expand Up @@ -369,13 +367,12 @@ where
{
let minlen = max(4, best_len.wrapping_add(1));
if dictionary.is_some()
&& BrotliFindAllStaticDictionaryMatches(
dictionary.unwrap(),
&& dictionary.unwrap().find_all_matches(
&data[cur_ix_masked..],
minlen,
max_length,
&mut dict_matches[..],
) != 0
)
{
assert!(params.use_dictionary);
let maxlen = min(37, max_length);
Expand Down
148 changes: 69 additions & 79 deletions src/enc/backward_references/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,7 @@ impl<T: SliceWrapperMut<u32> + SliceWrapper<u32> + BasicHashComputer> AnyHasher
}
}
if dictionary.is_some() && self.buckets_.USE_DICTIONARY() != 0 && !is_match_found {
is_match_found = SearchInStaticDictionary(
dictionary.unwrap(),
is_match_found = dictionary.unwrap().search_static_item(
dictionary_hash,
self,
&data[cur_ix_masked..],
Expand Down Expand Up @@ -821,8 +820,7 @@ impl<Alloc: alloc::Allocator<u16> + alloc::Allocator<u32>> AnyHasher for H9<Allo
}
if !is_match_found && dictionary.is_some() {
let (_, cur_data) = data.split_at(cur_ix_masked);
is_match_found = SearchInStaticDictionary(
dictionary.unwrap(),
is_match_found = dictionary.unwrap().search_static_item(
dictionary_hash,
self,
cur_data,
Expand Down Expand Up @@ -1752,8 +1750,7 @@ impl<

if !is_match_found && dictionary.is_some() {
let (_, cur_data) = data.split_at(cur_ix_masked);
is_match_found = SearchInStaticDictionary(
dictionary.unwrap(),
is_match_found = dictionary.unwrap().search_static_item(
dictionary_hash,
self,
cur_data,
Expand Down Expand Up @@ -1849,98 +1846,91 @@ fn Hash14(data: &[u8]) -> u32 {
h >> (32i32 - 14i32)
}

fn TestStaticDictionaryItem(
dictionary: &BrotliDictionary,
item: usize,
data: &[u8],
max_length: usize,
max_backward: usize,
max_distance: usize,
h9_opts: H9Opts,
out: &mut HasherSearchResult,
) -> i32 {
let backward: usize;

let len: usize = item & 0x1fusize;
let dist: usize = item >> 5;
let offset: usize =
(dictionary.offsets_by_length[len] as usize).wrapping_add(len.wrapping_mul(dist));
if len > max_length {
return 0i32;
}
let matchlen: usize = FindMatchLengthWithLimit(data, &dictionary.data[offset..], len);
if matchlen.wrapping_add(kCutoffTransformsCount as usize) <= len || matchlen == 0usize {
return 0i32;
}
{
let cut: u64 = len.wrapping_sub(matchlen) as u64;
let transform_id: usize =
impl BrotliDictionary {
fn test_static_item(
&self,
item: usize,
data: &[u8],
max_length: usize,
max_backward: usize,
max_distance: usize,
h9_opts: H9Opts,
out: &mut HasherSearchResult,
) -> bool {
let len = item & 0x1f;
let dist = item >> 5;
let offset = (self.offsets_by_length[len] as usize).wrapping_add(len.wrapping_mul(dist));
if len > max_length {
return false;
}
let matchlen: usize = FindMatchLengthWithLimit(data, &self.data[offset..], len);
if matchlen.wrapping_add(kCutoffTransformsCount as usize) <= len || matchlen == 0 {
return false;
}

let cut = len.wrapping_sub(matchlen) as u64;
let transform_id =
(cut << 2).wrapping_add(kCutoffTransforms >> cut.wrapping_mul(6) & 0x3f) as usize;
backward = max_backward
let backward = max_backward
.wrapping_add(dist)
.wrapping_add(1)
.wrapping_add(transform_id << dictionary.size_bits_by_length[len] as i32);
}
if backward > max_distance {
return 0i32;
}
let score: u64 = BackwardReferenceScore(matchlen, backward, h9_opts);
if score < out.score {
return 0i32;
.wrapping_add(transform_id << self.size_bits_by_length[len]);

if backward > max_distance {
return false;
}
let score = BackwardReferenceScore(matchlen, backward, h9_opts);
if score < out.score {
return false;
}
out.len = matchlen;
out.len_x_code = len ^ matchlen;
out.distance = backward;
out.score = score;
true
}
out.len = matchlen;
out.len_x_code = len ^ matchlen;
out.distance = backward;
out.score = score;
1i32
}

fn SearchInStaticDictionary<HasherType: AnyHasher>(
dictionary: &BrotliDictionary,
dictionary_hash: &[u16],
handle: &mut HasherType,
data: &[u8],
max_length: usize,
max_backward: usize,
max_distance: usize,
out: &mut HasherSearchResult,
shallow: bool,
) -> bool {
let mut key: usize;
let mut i: usize;
let mut is_match_found = false;
let opts = handle.Opts();
let xself: &mut Struct1 = handle.GetHasherCommon();
if xself.dict_num_matches < xself.dict_num_lookups >> 7 {
return false;
}
key = (Hash14(data) << 1) as usize; //FIXME: works for any kind of hasher??
i = 0usize;
while i < if shallow { 1 } else { 2 } {
{
let item: usize = dictionary_hash[key] as usize;
fn search_static_item<HasherType: AnyHasher>(
&self,
dictionary_hash: &[u16],
handle: &mut HasherType,
data: &[u8],
max_length: usize,
max_backward: usize,
max_distance: usize,
out: &mut HasherSearchResult,
shallow: bool,
) -> bool {
let mut is_match_found = false;
let opts = handle.Opts();
let xself = handle.GetHasherCommon();
if xself.dict_num_matches < xself.dict_num_lookups >> 7 {
return false;
}
let mut key = (Hash14(data) << 1) as usize; //FIXME: works for any kind of hasher??
let iterations = if shallow { 1 } else { 2 };
for _ in 0..iterations {
let item = dictionary_hash[key] as usize;
xself.dict_num_lookups = xself.dict_num_lookups.wrapping_add(1);
if item != 0usize {
let item_matches: i32 = TestStaticDictionaryItem(
dictionary,
if item != 0 {
if self.test_static_item(
item,
data,
max_length,
max_backward,
max_distance,
opts,
out,
);
if item_matches != 0 {
) {
xself.dict_num_matches = xself.dict_num_matches.wrapping_add(1);
is_match_found = true;
}
}
key += 1;
}
i = i.wrapping_add(1);
key = key.wrapping_add(1);

is_match_found
}
is_match_found
}

impl<Alloc: alloc::Allocator<u16> + alloc::Allocator<u32>> CloneWithAlloc<Alloc>
Expand Down
2 changes: 0 additions & 2 deletions src/enc/bit_cost.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use alloc::SliceWrapperMut;

use core::cmp::{max, min};

use super::super::alloc::SliceWrapper;
Expand All @@ -8,7 +7,6 @@ use super::util::{FastLog2, FastLog2u16};
use super::vectorization::Mem256i;
use crate::enc::floatX;


const BROTLI_REPEAT_ZERO_CODE_LENGTH: usize = 17;
const BROTLI_CODE_LENGTH_CODES: usize = BROTLI_REPEAT_ZERO_CODE_LENGTH + 1;

Expand Down
Loading

0 comments on commit aa0a2f8

Please sign in to comment.