Skip to content

Commit

Permalink
Remove data marker paths from release binaries (#5981)
Browse files Browse the repository at this point in the history
#4991 

I updated `tutorial_buffer.wasm` in every commit, so the sizes can be
seen there.

I'm only using a 4-byte tag, but apparently that's enough to be a
distinct marker (`markers_for_bin` test passes without false positives).
Going higher (i.e. to 12 bytes) immediately adds 5kB to the WASM file,
probably because the `DataMarkerInfo` cannot be passed in a register
anymore.
  • Loading branch information
robertbastian authored Jan 15, 2025
1 parent 23db3ff commit b75d20c
Show file tree
Hide file tree
Showing 54 changed files with 1,621 additions and 1,660 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 9 additions & 19 deletions components/icu/src/datagen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,42 +36,32 @@ macro_rules! cb {
use crate as icu;
let lookup =
[
("core/helloworld@1", Some(icu_provider::hello_world::HelloWorldV1Marker::INFO)),
(icu_provider::marker::data_marker_id!("core/helloworld@1").hashed().to_bytes(), Ok(icu_provider::hello_world::HelloWorldV1Marker::INFO)),
$(
($path, Some(<$marker>::INFO)),
(icu_provider::marker::data_marker_id!($path).hashed().to_bytes(), Ok(<$marker>::INFO)),
)+
$(
#[cfg(feature = "experimental")]
($epath, Some(<$emarker>::INFO)),
(icu_provider::marker::data_marker_id!($epath).hashed().to_bytes(), Ok(<$emarker>::INFO)),
#[cfg(not(feature = "experimental"))]
($epath, None),
(icu_provider::marker::data_marker_id!($epath).hashed().to_bytes(), Err($epath)),
)+

]
.into_iter()
.collect::<BTreeMap<_,_>>();
.collect::<BTreeMap<[u8; 4],Result<DataMarkerInfo, &'static str>>>();

use memchr::memmem::*;

const LEADING_TAG: &[u8] = icu_provider::leading_tag!().as_bytes();
const TRAILING_TAG: &[u8] = icu_provider::trailing_tag!().as_bytes();

let trailing_tag = Finder::new(TRAILING_TAG);
const LEADING_TAG: &[u8] = b"tdmh";

find_iter(bytes, LEADING_TAG)
.map(|tag_position| tag_position + LEADING_TAG.len())
.filter_map(|marker_start| bytes.get(marker_start..))
.filter_map(move |marker_fragment| {
trailing_tag
.find(marker_fragment)
.and_then(|end| marker_fragment.get(..end))
})
.map(core::str::from_utf8)
.filter_map(Result::ok)
.filter_map(|marker_start| bytes.get(marker_start..marker_start+4))
.filter_map(|p| {
match lookup.get(p) {
Some(Some(marker)) => Some(Ok(*marker)),
Some(None) => Some(Err(DataError::custom("This marker requires the `experimental` Cargo feature").with_display_context(p))),
Some(Ok(marker)) => Some(Ok(*marker)),
Some(Err(p)) => Some(Err(DataError::custom("This marker requires the `experimental` Cargo feature").with_display_context(p))),
None => None,
}
})
Expand Down
Binary file modified components/icu/tests/data/tutorial_buffer.wasm
Binary file not shown.
2 changes: 1 addition & 1 deletion components/properties/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ macro_rules! data_struct_generic {
}
impl icu_provider::DataMarker for $marker {
const INFO: icu_provider::DataMarkerInfo = {
let mut info = DataMarkerInfo::from_path(icu_provider::marker::data_marker_path!($path));
let mut info = DataMarkerInfo::from_id(icu_provider::marker::data_marker_id!($path));
info.is_singleton = true;
info
};
Expand Down
2 changes: 1 addition & 1 deletion provider/adapters/src/fixed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ use yoke::Yokeable;
/// # type DataStruct = <HelloWorldV1Marker as DynamicDataMarker>::DataStruct;
/// # }
/// # impl DataMarker for DummyMarker {
/// # const INFO: DataMarkerInfo = DataMarkerInfo::from_path(icu_provider::marker::data_marker_path!("dummy@1"));
/// # const INFO: DataMarkerInfo = DataMarkerInfo::from_id(icu_provider::marker::data_marker_id!("dummy@1"));
/// # }
/// // Requests for invalid markers get MissingDataMarker
/// assert!(matches!(
Expand Down
10 changes: 5 additions & 5 deletions provider/baked/src/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -744,9 +744,9 @@ impl DataExporter for BakedExporter {
#maybe_msrv
impl icu_provider::any::AnyProvider for $provider {
fn load_any(&self, marker: icu_provider::DataMarkerInfo, req: icu_provider::DataRequest) -> Result<icu_provider::AnyResponse, icu_provider::DataError> {
match marker.path.hashed() {
match marker.id.hashed() {
#(
h if h == <#marker_bakes as icu_provider::DataMarker>::INFO.path.hashed() =>
h if h == <#marker_bakes as icu_provider::DataMarker>::INFO.id.hashed() =>
icu_provider::DataProvider::<#marker_bakes>::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response),
)*
_ => Err(icu_provider::DataErrorKind::MarkerNotFound.with_req(marker, req)),
Expand Down Expand Up @@ -781,12 +781,12 @@ impl DataExporter for BakedExporter {
macro_rules! cb {
($($marker:path = $path:literal,)+ #[experimental] $($emarker:path = $epath:literal,)+) => {
fn bake_marker(marker: DataMarkerInfo) -> databake::TokenStream {
if marker.path.as_str() == icu_provider::hello_world::HelloWorldV1Marker::INFO.path.as_str() {
if marker.id == icu_provider::hello_world::HelloWorldV1Marker::INFO.id {
return databake::quote!(icu_provider::hello_world::HelloWorldV1Marker);
}

$(
if marker.path.as_str() == $path {
if marker.id == icu_provider::marker::data_marker_id!($path) {
return stringify!($marker)
.replace("icu :: ", "icu_")
.parse()
Expand All @@ -795,7 +795,7 @@ macro_rules! cb {
)+

$(
if marker.path.as_str() == $epath {
if marker.id == icu_provider::marker::data_marker_id!($epath) {
return stringify!($emarker)
.replace("icu :: ", "icu_")
.parse()
Expand Down
4 changes: 2 additions & 2 deletions provider/baked/tests/data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ macro_rules! impl_any_provider {
#[clippy::msrv = "1.81"]
impl icu_provider::any::AnyProvider for $provider {
fn load_any(&self, marker: icu_provider::DataMarkerInfo, req: icu_provider::DataRequest) -> Result<icu_provider::AnyResponse, icu_provider::DataError> {
match marker.path.hashed() {
h if h == <icu_provider::hello_world::HelloWorldV1Marker as icu_provider::DataMarker>::INFO.path.hashed() => icu_provider::DataProvider::<icu_provider::hello_world::HelloWorldV1Marker>::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response),
match marker.id.hashed() {
h if h == <icu_provider::hello_world::HelloWorldV1Marker as icu_provider::DataMarker>::INFO.id.hashed() => icu_provider::DataProvider::<icu_provider::hello_world::HelloWorldV1Marker>::load(self, req).map(icu_provider::DataResponse::wrap_into_any_response),
_ => Err(icu_provider::DataErrorKind::MarkerNotFound.with_req(marker, req)),
}
}
Expand Down
8 changes: 4 additions & 4 deletions provider/blob/src/blob_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
use alloc::boxed::Box;
use alloc::collections::BTreeSet;
use core::fmt::Write;
use icu_provider::{marker::DataMarkerPathHash, prelude::*};
use icu_provider::{marker::DataMarkerIdHash, prelude::*};
use serde::Deserialize;
use writeable::Writeable;
use zerotrie::ZeroTrieSimpleAscii;
Expand Down Expand Up @@ -99,7 +99,7 @@ pub(crate) struct BlobSchemaV3<'data, LocaleVecFormat: VarZeroVecFormat> {
/// Map from marker hash to locale trie.
/// Weak invariant: should be sorted.
#[serde(borrow)]
pub markers: &'data ZeroSlice<DataMarkerPathHash>,
pub markers: &'data ZeroSlice<DataMarkerIdHash>,
/// Map from locale to buffer index.
/// Weak invariant: the `usize` values are valid indices into `self.buffers`
/// Weak invariant: there is at least one value for every integer in 0..self.buffers.len()
Expand All @@ -126,7 +126,7 @@ impl<'data, LocaleVecFormat: VarZeroVecFormat> BlobSchemaV3<'data, LocaleVecForm
pub fn load(&self, marker: DataMarkerInfo, req: DataRequest) -> Result<&'data [u8], DataError> {
let marker_index = self
.markers
.binary_search(&marker.path.hashed())
.binary_search(&marker.id.hashed())
.ok()
.ok_or_else(|| DataErrorKind::MarkerNotFound.with_req(marker, req))?;
if marker.is_singleton && !req.id.locale.is_default() {
Expand Down Expand Up @@ -169,7 +169,7 @@ impl<'data, LocaleVecFormat: VarZeroVecFormat> BlobSchemaV3<'data, LocaleVecForm
) -> Result<BTreeSet<DataIdentifierCow>, DataError> {
let marker_index = self
.markers
.binary_search(&marker.path.hashed())
.binary_search(&marker.id.hashed())
.ok()
.ok_or_else(|| DataErrorKind::MarkerNotFound.with_marker(marker))?;
let zerotrie = self
Expand Down
12 changes: 6 additions & 6 deletions provider/blob/src/export/blob_exporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

use crate::blob_schema::*;
use icu_provider::export::*;
use icu_provider::{marker::DataMarkerPathHash, prelude::*};
use icu_provider::{marker::DataMarkerIdHash, prelude::*};
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::sync::Mutex;
use zerotrie::ZeroTrieSimpleAscii;
Expand All @@ -23,9 +23,9 @@ use postcard::ser_flavors::{AllocVec, Flavor};
pub struct BlobExporter<'w> {
/// Map of marker path hash -> locale byte string -> blob ID
#[allow(clippy::type_complexity)]
resources: Mutex<BTreeMap<DataMarkerPathHash, BTreeMap<Vec<u8>, usize>>>,
resources: Mutex<BTreeMap<DataMarkerIdHash, BTreeMap<Vec<u8>, usize>>>,
// All seen markers
all_markers: Mutex<BTreeSet<DataMarkerPathHash>>,
all_markers: Mutex<BTreeSet<DataMarkerIdHash>>,
/// Map from blob to blob ID
unique_resources: Mutex<HashMap<Vec<u8>, usize>>,
sink: Box<dyn std::io::Write + Sync + 'w>,
Expand Down Expand Up @@ -81,7 +81,7 @@ impl DataExporter for BlobExporter<'_> {
self.resources
.lock()
.expect("poison")
.entry(marker.path.hashed())
.entry(marker.id.hashed())
.or_default()
.entry({
let mut key = id.locale.to_string();
Expand All @@ -99,7 +99,7 @@ impl DataExporter for BlobExporter<'_> {
self.all_markers
.lock()
.expect("poison")
.insert(marker.path.hashed());
.insert(marker.id.hashed());
Ok(())
}

Expand Down Expand Up @@ -150,7 +150,7 @@ impl BlobExporter<'_> {
let all_markers = self.all_markers.lock().expect("poison");
let resources = self.resources.lock().expect("poison");

let markers: ZeroVec<DataMarkerPathHash> = all_markers.iter().copied().collect();
let markers: ZeroVec<DataMarkerIdHash> = all_markers.iter().copied().collect();

let locales_vec: Vec<Vec<u8>> = all_markers
.iter()
Expand Down
6 changes: 2 additions & 4 deletions provider/core/macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,11 @@ mod tests;
/// // Note: FooV1Marker implements `DynamicDataMarker` but not `DataMarker`.
/// // The other two implement `DataMarker`.
///
/// assert_eq!(BarV1Marker::INFO.path.as_str(), "demo/bar@1");
/// assert_eq!(
/// BarV1Marker::INFO.fallback_config.priority,
/// LocaleFallbackPriority::Language
/// );
///
/// assert_eq!(BazV1Marker::INFO.path.as_str(), "demo/baz@1");
/// assert_eq!(
/// BazV1Marker::INFO.fallback_config.priority,
/// LocaleFallbackPriority::Region
Expand Down Expand Up @@ -331,14 +329,14 @@ fn data_struct_impl(attr: DataStructArgs, input: DeriveInput) -> TokenStream2 {
quote! {icu_provider::fallback::LocaleFallbackPriority::default()}
};
let attributes_domain_setter = if let Some(attributes_domain_lit) = attributes_domain {
quote! { info.attributes_domain = #attributes_domain_lit; }
quote! { info = info.with_attributes_domain(#attributes_domain_lit); }
} else {
quote!()
};
result.extend(quote!(
impl icu_provider::DataMarker for #marker_name {
const INFO: icu_provider::DataMarkerInfo = {
let mut info = icu_provider::DataMarkerInfo::from_path(icu_provider::marker::data_marker_path!(#path_str));
let mut info = icu_provider::DataMarkerInfo::from_id(icu_provider::marker::data_marker_id!(#path_str));
info.is_singleton = #singleton;
info.fallback_config.priority = #fallback_by_expr;
#attributes_domain_setter
Expand Down
8 changes: 4 additions & 4 deletions provider/core/macros/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ fn test_data_marker() {
}
impl icu_provider::DataMarker for BarV1Marker {
const INFO: icu_provider::DataMarkerInfo = {
let mut info = icu_provider::DataMarkerInfo::from_path(icu_provider::marker::data_marker_path!("demo/bar@1"));
let mut info = icu_provider::DataMarkerInfo::from_id(icu_provider::marker::data_marker_id!("demo/bar@1"));
info.is_singleton = false;
info.fallback_config.priority = icu_provider::fallback::LocaleFallbackPriority::default();
info
Expand Down Expand Up @@ -100,7 +100,7 @@ fn test_multi_named_keyed_data_marker() {
}
impl icu_provider::DataMarker for BarV1Marker {
const INFO: icu_provider::DataMarkerInfo = {
let mut info = icu_provider::DataMarkerInfo::from_path(icu_provider::marker::data_marker_path!("demo/bar@1"));
let mut info = icu_provider::DataMarkerInfo::from_id(icu_provider::marker::data_marker_id!("demo/bar@1"));
info.is_singleton = false;
info.fallback_config.priority = icu_provider::fallback::LocaleFallbackPriority::default();
info
Expand All @@ -113,7 +113,7 @@ fn test_multi_named_keyed_data_marker() {
}
impl icu_provider::DataMarker for BazV1Marker {
const INFO: icu_provider::DataMarkerInfo = {
let mut info = icu_provider::DataMarkerInfo::from_path(icu_provider::marker::data_marker_path!("demo/baz@1"));
let mut info = icu_provider::DataMarkerInfo::from_id(icu_provider::marker::data_marker_id!("demo/baz@1"));
info.is_singleton = false;
info.fallback_config.priority = icu_provider::fallback::LocaleFallbackPriority::default();
info
Expand Down Expand Up @@ -149,7 +149,7 @@ fn test_attributes() {
}
impl icu_provider::DataMarker for BarV1Marker {
const INFO: icu_provider::DataMarkerInfo = {
let mut info = icu_provider::DataMarkerInfo::from_path(icu_provider::marker::data_marker_path!("demo/bar@1"));
let mut info = icu_provider::DataMarkerInfo::from_id(icu_provider::marker::data_marker_id!("demo/bar@1"));
info.is_singleton = true;
info.fallback_config.priority = icu_provider::fallback::LocaleFallbackPriority::Region;
info
Expand Down
2 changes: 1 addition & 1 deletion provider/core/src/data_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ mod test {

/// Key for HelloAlt, used for testing mismatched types
const HELLO_ALT_KEY: DataMarkerInfo =
DataMarkerInfo::from_path(crate::marker::data_marker_path!("core/helloalt1@1"));
DataMarkerInfo::from_id(crate::marker::data_marker_id!("core/helloalt1@1"));

/// A data struct serialization-compatible with HelloWorldV1 used for testing mismatched types
#[derive(
Expand Down
16 changes: 8 additions & 8 deletions provider/core/src/dynutil.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ pub use __impl_casting_upcast as impl_casting_upcast;
/// # type DataStruct = <HelloWorldV1Marker as DynamicDataMarker>::DataStruct;
/// # }
/// # impl DataMarker for DummyMarker {
/// # const INFO: DataMarkerInfo = DataMarkerInfo::from_path(icu_provider::marker::data_marker_path!("dummy@1"));
/// # const INFO: DataMarkerInfo = DataMarkerInfo::from_id(icu_provider::marker::data_marker_id!("dummy@1"));
/// # }
/// // MissingDataMarker error as the marker does not match:
/// assert_eq!(
Expand Down Expand Up @@ -174,7 +174,7 @@ pub use __impl_casting_upcast as impl_casting_upcast;
/// type DataStruct = <HelloWorldV1Marker as DynamicDataMarker>::DataStruct;
/// }
/// impl DataMarker for DummyMarker {
/// const INFO: DataMarkerInfo = DataMarkerInfo::from_path(icu_provider::marker::data_marker_path!("dummy@1"));
/// const INFO: DataMarkerInfo = DataMarkerInfo::from_id(icu_provider::marker::data_marker_id!("dummy@1"));
/// }
/// HelloWorldProvider.as_any_provider().load_any(DummyMarker::INFO, DataRequest {
/// id: DataIdentifierBorrowed::for_locale(&langid!("de").into()),
Expand Down Expand Up @@ -216,9 +216,9 @@ macro_rules! __impl_dynamic_data_provider {
$crate::DataResponse<$dyn_m>,
$crate::DataError,
> {
match marker.path.hashed() {
match marker.id.hashed() {
$(
h if h == $marker.path.hashed() => {
h if h == $marker.id.hashed() => {
let result: $crate::DataResponse<$struct_m> =
$crate::DynamicDataProvider::<$struct_m>::load_data(self, marker, req)?;
Ok($crate::DataResponse {
Expand Down Expand Up @@ -254,10 +254,10 @@ macro_rules! __impl_dynamic_data_provider {
$crate::DataResponse<$dyn_m>,
$crate::DataError,
> {
match marker.path.hashed() {
match marker.id.hashed() {
$(
$(#[$cfg])?
h if h == <$struct_m as $crate::DataMarker>::INFO.path.hashed() => {
h if h == <$struct_m as $crate::DataMarker>::INFO.id.hashed() => {
let result: $crate::DataResponse<$struct_m> =
$crate::DataProvider::load(self, req)?;
Ok($crate::DataResponse {
Expand All @@ -281,10 +281,10 @@ macro_rules! __impl_iterable_dynamic_data_provider {
($provider:ty, [ $($(#[$cfg:meta])? $struct_m:ty),+, ], $dyn_m:path) => {
impl $crate::IterableDynamicDataProvider<$dyn_m> for $provider {
fn iter_ids_for_marker(&self, marker: $crate::DataMarkerInfo) -> Result<alloc::collections::BTreeSet<$crate::DataIdentifierCow>, $crate::DataError> {
match marker.path.hashed() {
match marker.id.hashed() {
$(
$(#[$cfg])?
h if h == <$struct_m as $crate::DataMarker>::INFO.path.hashed() => {
h if h == <$struct_m as $crate::DataMarker>::INFO.id.hashed() => {
$crate::IterableDataProvider::<$struct_m>::iter_ids(self)
}
)+,
Expand Down
Loading

0 comments on commit b75d20c

Please sign in to comment.