getsentry
diff --git a/‎bin/load-integration-data
Lines changed: 152 additions & 0 deletions b/‎bin/load-integration-data
Lines changed: 152 additions & 0 deletions
diff --git a/‎bin/save-integration-data
Lines changed: 61 additions & 0 deletions b/‎bin/save-integration-data
Lines changed: 61 additions & 0 deletions
diff --git a/‎rspack.config.ts
Lines changed: 25 additions & 22 deletions b/‎rspack.config.ts
Lines changed: 25 additions & 22 deletions
diff --git a/‎src/sentry/grouping/enhancer/__init__.py
Lines changed: 32 additions & 3 deletions b/‎src/sentry/grouping/enhancer/__init__.py
Lines changed: 32 additions & 3 deletions
diff --git a/‎src/sentry/options/defaults.py
Lines changed: 7 additions & 0 deletions b/‎src/sentry/options/defaults.py
Lines changed: 7 additions & 0 deletions
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+from sentry.runner import configure
+
+configure()
+
+import argparse
+
+import click
+from django.core import serializers
+from django.db import IntegrityError, router
+
+from sentry.integrations.models.integration import Integration
+from sentry.integrations.models.organization_integration import OrganizationIntegration
+from sentry.users.models.identity import Identity, IdentityProvider
+from sentry.utils.db import atomic_transaction
+
+# The order in which models should be loaded to respect foreign key dependencies.
+MODEL_LOAD_ORDER = [
+    "sentry.identityprovider",
+    "sentry.integration",
+    "sentry.identity",  # Depends on sentry.identityprovider
+    "sentry.organizationintegration",  # Depends on sentry.integration (and sentry.organization)
+]
+
+
+def load_data(input_file, org_id):
+    """
+    Loads data from a JSON file and saves it to the database.
+    Assumes that PKs from the file should be preserved.
+    """
+    click.echo(f"Reading serialized data from {input_file}...")
+    with open(input_file) as f:
+        serialized_data = f.read()
+
+    if not serialized_data.strip() or serialized_data.strip() == "[]":
+        click.echo("Input file is empty or contains no data. Nothing to load.")
+        return
+
+    click.echo("Deserializing objects...")
+    try:
+        deserialized_objects = list(serializers.deserialize("json", serialized_data))
+    except Exception as e:
+        click.echo(f"Error during deserialization: {e}")
+        click.echo(
+            "Please ensure the input file is a valid JSON dump created by the save_integration_data.py script."
+        )
+        return
+
+    if not deserialized_objects:
+        click.echo("No objects were deserialized from the file.")
+        return
+
+    click.echo(f"Deserialized {len(deserialized_objects)} objects.")
+
+    # Sort deserialized objects based on MODEL_LOAD_ORDER to handle dependencies.
+    # Objects not in MODEL_LOAD_ORDER will be placed at the end.
+    def get_sort_key(d_obj):
+        model_key = f"{d_obj.object._meta.app_label}.{d_obj.object._meta.model_name}"
+        try:
+            return MODEL_LOAD_ORDER.index(model_key)
+        except ValueError:
+            return len(MODEL_LOAD_ORDER)  # Put unknown models at the end
+
+    sorted_deserialized_objects = sorted(deserialized_objects, key=get_sort_key)
+
+    saved_count = 0
+    skipped_count = 0
+    error_count = 0
+
+    parsed_org_id = None
+    if org_id:
+        try:
+            parsed_org_id = int(org_id)
+            click.echo(
+                f"Will update OrganizationIntegration objects to organization_id: {parsed_org_id}"
+            )
+        except ValueError:
+            click.echo(
+                f"Warning: Invalid org_id '{org_id}'. It will be ignored. Please provide a valid integer."
+            )
+            parsed_org_id = None
+
+    click.echo("Attempting to save objects to the database...")
+    with atomic_transaction(
+        using=(
+            router.db_for_write(Integration),
+            router.db_for_write(OrganizationIntegration),
+            router.db_for_write(Identity),
+            router.db_for_write(IdentityProvider),
+        )
+    ):
+        for deserialized_object in sorted_deserialized_objects:
+            model_name = deserialized_object.object._meta.object_name
+            pk = deserialized_object.object.pk
+
+            # If org_id is provided, update OrganizationIntegration's organization_id
+            if parsed_org_id is not None and isinstance(
+                deserialized_object.object, OrganizationIntegration
+            ):
+                click.echo(
+                    f"  Updating organization_id for {model_name} (PK: {pk}) to {parsed_org_id}"
+                )
+                deserialized_object.object.organization_id = parsed_org_id
+
+            try:
+                # The deserialized_object.save() method handles saving the object
+                # and its many-to-many data (if any). It attempts to use the PK
+                # from the serialized data.
+                deserialized_object.save()
+                saved_count += 1
+                click.echo(f"  Saved: {model_name} (PK: {pk})")
+            except IntegrityError as e:
+                # This can occur due to PK conflict, unique constraint violation,
+                # or a non-existent foreign key (e.g., if a referenced User or Organization
+                # doesn't exist in the target DB).
+                skipped_count += 1
+                click.echo(f"  Skipped: {model_name} (PK: {pk}) due to IntegrityError: {e}")
+            except Exception as e:
+                # Catch other potential errors during save.
+                error_count += 1
+                click.echo(f"  Error saving: {model_name} (PK: {pk}): {e}")
+                # Depending on severity, you might want to re-raise to stop the transaction.
+                # For now, we'll log and continue.
+
+    click.echo("\nLoad process completed.")
+    click.echo(f"  Successfully saved: {saved_count} objects.")
+    click.echo(f"  Skipped (IntegrityError): {skipped_count} objects.")
+    click.echo(f"  Errors (Other): {error_count} objects.")
+    if skipped_count > 0 or error_count > 0:
+        click.echo(
+            "Please check skipped/error messages. This might indicate that the target database was not clean,"
+        )
+        click.echo("or that required related objects (like Organizations or Users) were missing.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Load Sentry integration-related models from a JSON file into the database."
+    )
+    parser.add_argument(
+        "--input-file",
+        required=True,
+        help="Path to the input JSON file containing the data to load.",
+    )
+    parser.add_argument(
+        "--org-id",
+        required=False,
+        help="The organization ID to save integration data for.",
+    )
+    args = parser.parse_args()
+
+    load_data(args.input_file, args.org_id)
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+from sentry.runner import configure
+
+configure()
+
+import argparse
+
+import click
+from django.core import serializers
+
+from sentry.integrations.models import Integration, OrganizationIntegration
+from sentry.users.models.identity import Identity, IdentityProvider
+
+MODELS_TO_SERIALIZE = [
+    IdentityProvider,  # Has no FKs to other serialized models
+    Integration,  # Has no FKs to other serialized models
+    Identity,  # Depends on IdentityProvider
+    OrganizationIntegration,  # Depends on Integration
+]
+
+
+def save_data(output_file):
+    """
+    Collects data from specified models and serializes it to a JSON file.
+    """
+    all_objects_to_serialize = []
+    click.echo("Collecting data from models...")
+    for model_cls in MODELS_TO_SERIALIZE:
+        model_name = f"{model_cls._meta.app_label}.{model_cls._meta.model_name}"
+        click.echo(f"  Fetching from {model_name}...")
+        # Order by PK for consistent output, though serializer might reorder.
+        # Convert queryset to list to avoid issues with extending during iteration if any.
+        objects = list(model_cls.objects.order_by("pk").all())
+        all_objects_to_serialize.extend(objects)
+        click.echo(f"    Found {len(objects)} objects.")
+
+    if not all_objects_to_serialize:
+        click.echo("No objects found to serialize.")
+        serialized_data = "[]"
+    else:
+        click.echo(f"\nSerializing {len(all_objects_to_serialize)} objects in total...")
+        serialized_data = serializers.serialize("json", all_objects_to_serialize, indent=2)
+
+    click.echo(f"Writing serialized data to {output_file}...")
+    with open(output_file, "w") as f:
+        f.write(serialized_data)
+    click.echo(f"Successfully saved data to {output_file}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Save Sentry integration-related models to a JSON file."
+    )
+    parser.add_argument(
+        "--output-file",
+        required=True,
+        help="Path to the output JSON file where data will be saved.",
+    )
+    args = parser.parse_args()
+
+    save_data(args.output_file)
@@ -293,10 +293,32 @@ const appConfig: Configuration = {
   },
   plugins: [
     /**
-     * Adds build time measurement instrumentation, which will be reported back
-     * to sentry
+     * Without this, webpack will chunk the locales but attempt to load them all
+     * eagerly.
      */
-    // new SentryInstrumentation(),
+    new rspack.IgnorePlugin({
+      contextRegExp: /moment$/,
+      resourceRegExp: /^\.\/locale$/,
+    }),
+
+    /**
+     * Restrict translation files that are pulled in through app/translations.jsx
+     * and through moment/locale/* to only those which we create bundles for via
+     * locale/catalogs.json.
+     *
+     * Without this, webpack will still output all of the unused locale files despite
+     * the application never loading any of them.
+     */
+    new rspack.ContextReplacementPlugin(
+      /sentry-locale$/,
+      path.join(__dirname, 'src', 'sentry', 'locale', path.sep),
+      true,
+      new RegExp(`(${supportedLocales.join('|')})/.*\\.po$`)
+    ),
+    new rspack.ContextReplacementPlugin(
+      /moment\/locale/,
+      new RegExp(`(${supportedLanguages.join('|')})\\.js$`)
+    ),
 
     /**
      * TODO(epurkhiser): Figure out if we still need these
@@ -332,25 +354,6 @@ const appConfig: Configuration = {
 
     ...(SHOULD_ADD_RSDOCTOR ? [new RsdoctorWebpackPlugin({})] : []),
 
-    /**
-     * Restrict translation files that are pulled in through app/translations.jsx
-     * and through moment/locale/* to only those which we create bundles for via
-     * locale/catalogs.json.
-     *
-     * Without this, webpack will still output all of the unused locale files despite
-     * the application never loading any of them.
-     */
-    new rspack.ContextReplacementPlugin(
-      /sentry-locale$/,
-      path.join(__dirname, 'src', 'sentry', 'locale', path.sep),
-      true,
-      new RegExp(`(${supportedLocales.join('|')})/.*\\.po$`)
-    ),
-    new rspack.ContextReplacementPlugin(
-      /moment\/locale/,
-      new RegExp(`(${supportedLanguages.join('|')})\\.js$`)
-    ),
-
     /**
      * Copies file logo-sentry.svg to the dist/entrypoints directory so that it can be accessed by
      * the backend
 
@@ -44,6 +44,10 @@
 ]
 LATEST_VERSION = VERSIONS[-1]
 
+# A delimiter to insert between rulesets in the base64 represenation of enhancements (by spec,
+# base64 strings never contain '#')
+BASE64_ENHANCEMENTS_DELIMITER = b"#"
+
 VALID_PROFILING_MATCHER_PREFIXES = (
     "stack.abs_path",
     "path",  # stack.abs_path alias
@@ -807,7 +811,19 @@ def _get_base64_bytes_from_rules(self, rules: list[EnhancementRule]) -> bytes:
     @cached_property
     def base64_string(self) -> str:
         """A base64 string representation of the enhancements object"""
-        base64_bytes = self._get_base64_bytes_from_rules(self.rules)
+        rulesets = [self.rules]
+
+        if self.run_split_enhancements:
+            rulesets.extend([self.classifier_rules, self.contributes_rules])
+
+        # Create a base64 bytestring for each set of rules, and join them with a character we know
+        # can never appear in base64. We do it this way rather than combining all three sets of
+        # rules into a single bytestring because the rust enhancer only knows how to deal with
+        # bytestrings encoding data of the form `[version, bases, rules]` (not
+        # `[version, bases, rules, rules, rules]`).
+        base64_bytes = BASE64_ENHANCEMENTS_DELIMITER.join(
+            self._get_base64_bytes_from_rules(ruleset) for ruleset in rulesets
+        )
         base64_str = base64_bytes.decode("ascii")
         return base64_str
 
@@ -845,13 +861,25 @@ def from_base64_string(
         with metrics.timer("grouping.enhancements.creation") as metrics_timer_tags:
             metrics_timer_tags.update({"source": "base64_string", "referrer": referrer})
 
-            bytes_str = (
+            raw_bytes_str = (
                 base64_string.encode("ascii", "ignore")
                 if isinstance(base64_string, str)
                 else base64_string
             )
 
-            unsplit_config = cls._get_config_from_base64_bytes(bytes_str)
+            # Split the string to get encoded data for each set of rules: unsplit rules (i.e., rules
+            # the way they're stored in project config), classifier rules, and contributes rules.
+            # Older base64 strings - such as those stored in events created before rule-splitting was
+            # introduced - will only have one part and thus will end up unchanged. (The delimiter is
+            # chosen specifically to be a character which can't appear in base64.)
+            bytes_strs = raw_bytes_str.split(BASE64_ENHANCEMENTS_DELIMITER)
+            configs = [cls._get_config_from_base64_bytes(bytes_str) for bytes_str in bytes_strs]
+
+            unsplit_config = configs[0]
+            split_configs = None
+
+            if len(configs) == 3:
+                split_configs = (configs[1], configs[2])
 
             version = unsplit_config.version
             bases = unsplit_config.bases
@@ -861,6 +889,7 @@ def from_base64_string(
             return cls(
                 rules=unsplit_config.rules,
                 rust_enhancements=unsplit_config.rust_enhancements,
+                split_enhancement_configs=split_configs,
                 version=version,
                 bases=bases,
             )
 
@@ -3113,6 +3113,13 @@
     flags=FLAG_AUTOMATOR_MODIFIABLE,
 )
 
+register(
+    "taskworker.try_compress.profile_metrics.rollout",
+    default=0.0,
+    type=Float,
+    flags=FLAG_AUTOMATOR_MODIFIABLE,
+)
+
 # Taskbroker flags
 register(
     "taskworker.try_compress.profile_metrics.level",