From bfa574e87373322aadeb8b72fdb7adda77e9d240 Mon Sep 17 00:00:00 2001
From: "Moises Lopez - https://www.vauxoo.com/" <moylop260@vauxoo.com>
Date: Tue, 21 Jan 2025 22:06:20 -0600
Subject: [PATCH 1/4] [ADD] xml-id-position-first: Add check with autofix to
 detect when xml id is not the first attribute

Welcome to autofix checks for xml

Fix https://github.com/OCA/odoo-pre-commit-hooks/issues/26

Following the Odoo convention: [Place id attribute before model](https://www.odoo.com/documentation/18.0/contributing/development/coding_guidelines.html#xml-files)

[IMP] xml-redundant-module-name: Add autofix feature
---
 .../checks_odoo_module.py                     |   5 +-
 .../checks_odoo_module_xml.py                 | 103 +++++++++++++++++-
 tests/common.py                               |  17 +++
 tests/test_checks.py                          |   1 +
 4 files changed, 120 insertions(+), 6 deletions(-)

diff --git a/src/oca_pre_commit_hooks/checks_odoo_module.py b/src/oca_pre_commit_hooks/checks_odoo_module.py
index 8de53468..b2286d89 100755
--- a/src/oca_pre_commit_hooks/checks_odoo_module.py
+++ b/src/oca_pre_commit_hooks/checks_odoo_module.py
@@ -136,12 +136,15 @@ def check_xml(self):
         if not manifest_datas:
             return
         checks_obj = checks_odoo_module_xml.ChecksOdooModuleXML(
-            manifest_datas, self.odoo_addon_name, self.enable, self.disable
+            manifest_datas, self.odoo_addon_name, self.enable, self.disable, self.autofix
         )
         for check_meth in utils.getattr_checks(checks_obj):
             check_meth()
         self.checks_errors.extend(checks_obj.checks_errors)
 
+        if self.autofix:
+            checks_obj.perform_autofix()
+
     @utils.only_required_for_installable()
     def check_csv(self):
         manifest_datas = self.manifest_referenced_files[".csv"]
diff --git a/src/oca_pre_commit_hooks/checks_odoo_module_xml.py b/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
index b42adfec..7f841095 100644
--- a/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
+++ b/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
@@ -1,5 +1,7 @@
 import os
 import re
+import shutil
+import tempfile
 from collections import defaultdict, namedtuple
 from typing import Dict, List
 
@@ -25,6 +27,21 @@ def _hasclass(context, *cls):
 FileElementPair = namedtuple("FileElementPair", ["filename", "element"])
 
 
+xmlid_reorder_re = re.compile(r'(<(\w+)\s+)([^>]*?)\b(id="[^"]*")([^>]*)')
+
+def xmlid_reorder(old_content):
+    def reorder(match):
+        match = xmlid_reorder_re.search(old_content)
+        start = match.group(1)  # Tag: <record or <menuitem
+        # tag_name = match.group(2)  # Tag name: record or menuitem
+        before_id = match.group(3)  # Attributes before id
+        id_attr = match.group(4)  # id attribute
+        after_id = match.group(5)  # Attributes after id
+        # Re-order using the id first
+        return f"{start}{id_attr} {before_id.strip()} {after_id.strip()}".rstrip()
+    return xmlid_reorder_re.sub(reorder, old_content)
+
+
 class ChecksOdooModuleXML(BaseChecker):
     xpath_deprecated_data = etree.XPath("/odoo[count(./*) < 2]/data|/openerp[count(./*) < 2]/data")
     xpath_oe_structure_woid = etree.XPath(
@@ -56,8 +73,8 @@ class ChecksOdooModuleXML(BaseChecker):
         f"/odoo//template//*[{qweb_deprecated_attrs}] | " f"/openerp//template//*[{qweb_deprecated_attrs}]"
     )
 
-    def __init__(self, manifest_datas, module_name, enable, disable):
-        super().__init__(enable, disable, module_name)
+    def __init__(self, manifest_datas, module_name, enable, disable, autofix):
+        super().__init__(enable, disable, module_name, autofix)
         self.manifest_datas = manifest_datas or []
         for manifest_data in self.manifest_datas:
             try:
@@ -68,6 +85,8 @@ def __init__(self, manifest_datas, module_name, enable, disable):
                             "node": node,
                             "file_error": None,
                             "disabled_checks": self._get_disabled_checks(node),
+                            "needs_autofix": False,
+                            "changes": [],
                         }
                     )
             except (FileNotFoundError, etree.XMLSyntaxError, UnicodeDecodeError) as xml_err:
@@ -76,6 +95,8 @@ def __init__(self, manifest_datas, module_name, enable, disable):
                         "node": etree.Element("__empty__"),
                         "file_error": str(xml_err).replace(manifest_data["filename"], ""),
                         "disabled_checks": set(),
+                        "needs_autofix": False,
+                        "changes": [],
                     }
                 )
 
@@ -92,6 +113,15 @@ def _get_disabled_checks(self, node):
                 print(f"{node.docinfo.URL}:{comment_node.sourceline} WARNING. DEPRECATED. Use oca-disable instead.")
         return all_checks_disabled
 
+    def read_line(self, filename, line):
+        """Return the content of the file only for the number of line
+        It avoid to load the whole file in memory
+        """
+        with open(filename, encoding="UTF-8") as f_content:
+            for current_line, content in enumerate(f_content, start=1):
+                if line == current_line:
+                    return content
+
     def getattr_checks(self, manifest_data, prefix):
         disable_node = manifest_data["disabled_checks"]
         yield from utils.getattr_checks(self, prefix, disable_node)
@@ -190,6 +220,29 @@ def check_xml_records(self):
                 extra_positions=[(field[0]["filename_short"], field[1].sourceline) for field in fields[1:]],
             )
 
+    def perform_autofix(self):
+        for manifest_data in self.manifest_datas:
+            if not manifest_data["needs_autofix"]:
+                continue
+            print(f"File changed {manifest_data['filename']}")
+            with (
+                tempfile.NamedTemporaryFile("w", delete=False, encoding="UTF-8") as xml_file_tmp,
+                open(manifest_data["filename"], encoding="UTF-8") as xml_file,
+            ):
+                for no_line, line in enumerate(xml_file, start=1):
+                    for change in manifest_data["changes"]:
+                        if change["sourceline"] != no_line:
+                            continue
+                        if line != change["old_content"]:
+                            # TODO: Improve the script to autofix more than one change in the same line
+                            print(
+                                "This file had more than one change over the same line, please, re-run the script again"
+                            )
+                            break
+                        line = change["new_content"]
+                    xml_file_tmp.write(line)
+            shutil.move(xml_file_tmp.name, manifest_data["filename"])
+
     @utils.only_required_for_checks("xml-syntax-error")
     def check_xml_syntax_error(self):
         """* Check xml-syntax-error
@@ -204,7 +257,7 @@ def check_xml_syntax_error(self):
                 line=1,
             )
 
-    @utils.only_required_for_checks("xml-redundant-module-name")
+    @utils.only_required_for_checks("xml-redundant-module-name", "xml-id-position-first")
     def visit_xml_record(self, manifest_data, record):
         """* Check xml-redundant-module-name
 
@@ -213,15 +266,44 @@ def visit_xml_record(self, manifest_data, record):
 
         The "module_a." is redundant it could be replaced to only
         `<record id="xmlid_name1" ...`
+
+        * Check xml-id-position-first
+
+        If the record id is not in the first position
+        `<record ... id="xmlid_name1"`
+
+        It should be the first
+        `<record id="xmlid_name1" ...`
         """
         # redundant_module_name
         record_id = record.get("id")
         if not record_id:
             return
+        first_attr = record.keys()[0]
+        if first_attr != "id" and self.is_message_enabled("xml-id-position-first", manifest_data["disabled_checks"]):
+            self.register_error(
+                code="xml-id-position-first",
+                message=f'The "id" attribute must be first',
+                info=f'Use `<record id="{record_id}" {first_attr}=...` instead',
+                filepath=manifest_data["filename_short"],
+                line=record.sourceline,
+            )
+            if self.autofix:
+                old_content = self.read_line(manifest_data["filename"], record.sourceline)
+                manifest_data["needs_autofix"] = True
+                new_content = xmlid_reorder(old_content)
+                manifest_data["changes"].append(
+                    {
+                        "sourceline": record.sourceline,
+                        "old_content": old_content,
+                        "new_content": new_content,
+                    }
+                )
 
         xmlid_module, xmlid_name = record_id.split(".") if "." in record_id else ["", record_id]
-        if xmlid_module == self.module_name:
-            # TODO: Add autofix option
+        if xmlid_module == self.module_name and self.is_message_enabled(
+            "xml-redundant-module-name", manifest_data["disabled_checks"]
+        ):
             self.register_error(
                 code="xml-redundant-module-name",
                 message=f'Redundant module name `<record id="{record_id}" />`',
@@ -229,6 +311,17 @@ def visit_xml_record(self, manifest_data, record):
                 filepath=manifest_data["filename_short"],
                 line=record.sourceline,
             )
+            if self.autofix:
+                manifest_data["needs_autofix"] = True
+                old_content = self.read_line(manifest_data["filename"], record.sourceline)
+                new_content = old_content.replace(f"{xmlid_module}.", "")
+                manifest_data["changes"].append(
+                    {
+                        "sourceline": record.sourceline,
+                        "old_content": old_content,
+                        "new_content": new_content,
+                    }
+                )
 
     @utils.only_required_for_checks("xml-view-dangerous-replace-low-priority", "xml-deprecated-tree-attribute")
     def visit_xml_record_view(self, manifest_data, record):
diff --git a/tests/common.py b/tests/common.py
index 5f6344d6..d247160c 100644
--- a/tests/common.py
+++ b/tests/common.py
@@ -257,3 +257,20 @@ def test_checks_as_string(self):
         all_check_errors = self.checks_run(self.file_paths, no_exit=True, no_verbose=False)
         for check_error in all_check_errors:
             self.assertTrue(str(check_error).count(check_error.code) == 1)
+
+    def test_checks_autofix(self):
+        # TODO: Create a new tmp folder in oder to avoid generating changes in the original files
+        all_check_errors = self.checks_run(
+            self.file_paths,
+            no_exit=True,
+            no_verbose=False,
+            enable="xml-id-position-first,xml-redundant-module-name",
+            autofix=True,
+        )
+        real_errors = self.get_count_code_errors(all_check_errors)
+        expected_errors = {
+            key: val
+            for key, val in self.expected_errors.items()
+            if key in ["xml-redundant-module-name", "xml-id-position-first"]
+        }
+        assertDictEqual(self, real_errors, expected_errors)
diff --git a/tests/test_checks.py b/tests/test_checks.py
index 545ad758..1cdff504 100644
--- a/tests/test_checks.py
+++ b/tests/test_checks.py
@@ -29,6 +29,7 @@
     "xml-deprecated-tree-attribute": 3,
     "xml-duplicate-fields": 3,
     "xml-duplicate-record-id": 2,
+    "xml-id-position-first": 2,
     "xml-not-valid-char-link": 2,
     "xml-redundant-module-name": 1,
     "xml-syntax-error": 2,

From 11f4572573961abb5d7e64fa6e95a56b1dc52d7a Mon Sep 17 00:00:00 2001
From: "Moises Lopez - https://www.vauxoo.com/" <moylop260@vauxoo.com>
Date: Mon, 27 Jan 2025 11:47:11 -0600
Subject: [PATCH 2/4] checkpoint

---
 .../checks_odoo_module_xml.py                 | 383 +++++++++++++-----
 1 file changed, 288 insertions(+), 95 deletions(-)

diff --git a/src/oca_pre_commit_hooks/checks_odoo_module_xml.py b/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
index 7f841095..a81ddacc 100644
--- a/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
+++ b/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
@@ -13,6 +13,124 @@
 DFTL_MIN_PRIORITY = 99
 DFLT_DEPRECATED_TREE_ATTRS = ["colors", "fonts", "string"]
 
+import difflib
+import re
+
+
+# def generate_regex_from_diff(original, modified):
+#     # Paso 1: Encuentra diferencias
+#     diff = list(difflib.ndiff(original.split(), modified.split()))
+
+#     # Paso 2: Identificar los tokens que cambiaron
+#     removed = [token[2:] for token in diff if token.startswith('- ')]
+#     added = [token[2:] for token in diff if token.startswith('+ ')]
+
+#     # Paso 3: Construir patrón dinámico basado en las diferencias
+#     # Asumimos que el formato básico del XML es consistente
+#     if len(removed) == len(added):
+#         # Crear el patrón regex para identificar la línea original
+#         pattern_parts = []
+#         for token in original.split():
+#             if token in removed:
+#                 # Usa grupos de captura para los valores que cambian
+#                 pattern_parts.append(r'([^"]+)')
+#             else:
+#                 pattern_parts.append(re.escape(token))
+#         pattern = r'\s+'.join(pattern_parts)
+
+#         # Crear la cadena de reemplazo dinámica
+#         replacement_parts = []
+#         group_index = 1
+#         for token in modified.split():
+#             if token in added:
+#                 # Reemplaza por el grupo capturado correspondiente
+#                 replacement_parts.append(rf'\{group_index}')
+#                 group_index += 1
+#             else:
+#                 replacement_parts.append(token)
+#         replacement = ' '.join(replacement_parts)
+
+#         return pattern, replacement
+#     else:
+#         raise ValueError("El diff no tiene cambios coincidentes en longitud.")
+
+# def generate_regex_from_diff(original, modified):
+#     # Paso 1: Encuentra diferencias
+#     diff = list(difflib.ndiff(original.split(), modified.split()))
+
+#     # Paso 2: Identificar los tokens que cambiaron
+#     removed = [token[2:] for token in diff if token.startswith('- ')]
+#     added = [token[2:] for token in diff if token.startswith('+ ')]
+
+#     # Paso 3: Construir patrón dinámico basado en las diferencias
+#     if len(removed) == len(added):
+#         # Crear el patrón regex para identificar la línea original
+#         pattern_parts = []
+#         for token in original.split():
+#             if token in removed:
+#                 # Usa grupos de captura para los valores que cambian
+#                 pattern_parts.append(r'([^">]+)')
+#             else:
+#                 pattern_parts.append(re.escape(token))
+#         # Permitir cualquier espacio o salto de línea entre tokens
+#         pattern = r'\s*'.join(pattern_parts)
+
+#         # Crear la cadena de reemplazo dinámica
+#         replacement_parts = []
+#         group_index = 1
+#         for token in modified.split():
+#             if token in added:
+#                 # Reemplaza por el grupo capturado correspondiente
+#                 replacement_parts.append(rf'\{group_index}')
+#                 group_index += 1
+#             else:
+#                 replacement_parts.append(token)
+#         replacement = ' '.join(replacement_parts)
+
+#         return pattern, replacement
+#     else:
+#         raise ValueError("El diff no tiene cambios coincidentes en longitud.")
+
+def generate_regex_from_diff(original, modified):
+    # Paso 1: Encuentra diferencias
+    diff = list(difflib.ndiff(original.split(), modified.split()))
+
+    # Paso 2: Identificar los tokens que cambiaron
+    removed = [token[2:] for token in diff if token.startswith('- ')]
+    added = [token[2:] for token in diff if token.startswith('+ ')]
+
+    # Paso 3: Construir patrón dinámico basado en las diferencias
+    if len(removed) == len(added):
+        # Crear el patrón regex para identificar la línea original
+        pattern_parts = []
+        capture_index = 1
+        capture_map = {}
+        
+        for token in original.split():
+            if token in removed:
+                # Usa grupos de captura para los valores que cambian
+                pattern_parts.append(r'([^">]+)')
+                capture_map[token] = f'\\{capture_index}'
+                capture_index += 1
+            else:
+                pattern_parts.append(re.escape(token))
+        # Permitir cualquier espacio o salto de línea entre tokens
+        pattern = r'\s*'.join(pattern_parts)
+
+        # Crear la cadena de reemplazo dinámica basada en los grupos
+        replacement_parts = []
+        for token in modified.split():
+            if token in added:
+                # Reorganizar usando los grupos capturados
+                replacement_parts.append(capture_map.get(token, token))
+            else:
+                replacement_parts.append(token)
+        replacement = ' '.join(replacement_parts)
+
+        return pattern, replacement
+    else:
+        raise ValueError("El diff no tiene cambios coincidentes en longitud.")
+
 
 # Same as Odoo: https://github.com/odoo/odoo/commit/9cefa76988ff94c3d590c6631b604755114d0297
 def _hasclass(context, *cls):
@@ -29,7 +147,9 @@ def _hasclass(context, *cls):
 
 xmlid_reorder_re = re.compile(r'(<(\w+)\s+)([^>]*?)\b(id="[^"]*")([^>]*)')
 
+
 def xmlid_reorder(old_content):
+    # TODO: Support multiline <record\naction="hola"\nid="name"
     def reorder(match):
         match = xmlid_reorder_re.search(old_content)
         start = match.group(1)  # Tag: <record or <menuitem
@@ -39,6 +159,7 @@ def reorder(match):
         after_id = match.group(5)  # Attributes after id
         # Re-order using the id first
         return f"{start}{id_attr} {before_id.strip()} {after_id.strip()}".rstrip()
+
     return xmlid_reorder_re.sub(reorder, old_content)
 
 
@@ -59,6 +180,7 @@ class ChecksOdooModuleXML(BaseChecker):
     xpath_comment = etree.XPath("//comment()")
     xpath_openerp = etree.XPath("/openerp")
     xpath_xpath = etree.XPath("//xpath")
+    xpath_menuitem = etree.XPath("/odoo//menuitem | /openerp//menuitem")
 
     tree_deprecate_attrs = {"string", "colors", "fonts"}
     xpath_tree_deprecated = etree.XPath(f'.//tree[{"|".join(f"@{a}" for a in tree_deprecate_attrs)}]')
@@ -80,9 +202,12 @@ def __init__(self, manifest_datas, module_name, enable, disable, autofix):
             try:
                 with open(manifest_data["filename"], "rb") as f_xml:
                     node = etree.parse(f_xml)
+                    f_xml.seek(0)
+                    node_original = etree.parse(f_xml)
                     manifest_data.update(
                         {
                             "node": node,
+                            "node_original": node_original,
                             "file_error": None,
                             "disabled_checks": self._get_disabled_checks(node),
                             "needs_autofix": False,
@@ -90,6 +215,7 @@ def __init__(self, manifest_datas, module_name, enable, disable, autofix):
                         }
                     )
             except (FileNotFoundError, etree.XMLSyntaxError, UnicodeDecodeError) as xml_err:
+                print(f"xml error {str(xml_err)}")
                 manifest_data.update(
                     {
                         "node": etree.Element("__empty__"),
@@ -113,14 +239,18 @@ def _get_disabled_checks(self, node):
                 print(f"{node.docinfo.URL}:{comment_node.sourceline} WARNING. DEPRECATED. Use oca-disable instead.")
         return all_checks_disabled
 
-    def read_line(self, filename, line):
+    def read_content_start_end(self, filename, num_line_start, num_line_end):
         """Return the content of the file only for the number of line
         It avoid to load the whole file in memory
         """
+        content = ""
         with open(filename, encoding="UTF-8") as f_content:
-            for current_line, content in enumerate(f_content, start=1):
-                if line == current_line:
-                    return content
+            for num_current_line, line in enumerate(f_content, start=1):
+                if num_line_start <= num_current_line <= num_line_end:
+                    content += line
+                if num_current_line > num_line_end:
+                    break
+        return content
 
     def getattr_checks(self, manifest_data, prefix):
         disable_node = manifest_data["disabled_checks"]
@@ -147,46 +277,16 @@ def _is_replaced_field(cls, view):
 
     # Not set only_required_for_checks because of the calls to visit_xml_record... methods
     def check_xml_records(self):
-        """* Check xml-record-missing-id
-        Generated when a <record> tag has no id.
-
-        * Check xml-duplicate-record-id
-
-        If a module has duplicated record_id AKA xml_ids
-        file1.xml
-            <record id="xmlid_name1"
-        file2.xml
-            <record id="xmlid_name1"
-
+        """
         * Check xml-duplicate-fields in all record nodes
             <record id="xmlid_name1"...
                 <field name="field_name1"...
                 <field name="field_name1"...
         """
-        xmlids_section: Dict[str, List[FileElementPair]] = defaultdict(list)
+        self.xmlids_section: Dict[str, List[FileElementPair]] = defaultdict(list)
         xml_fields = defaultdict(list)
         for manifest_data in self.manifest_datas:
             for record in self.xpath_record(manifest_data["node"]):
-                record_id = record.get("id")
-
-                if not record_id and self.is_message_enabled(
-                    "xml-record-missing-id", manifest_data["disabled_checks"]
-                ):
-                    self.register_error(
-                        code="xml-record-missing-id",
-                        message="Record has no id, add a unique one to create a new record, use an existing one to update it",
-                        filepath=manifest_data["filename_short"],
-                        line=record.sourceline,
-                    )
-
-                if self.is_message_enabled("xml-duplicate-record-id", manifest_data["disabled_checks"]):
-                    # xmlids_duplicated
-                    xmlid_key = (
-                        f"{manifest_data['data_section']}/{record_id}"
-                        f"_noupdate_{record.getparent().get('noupdate', '0')}"
-                    )
-                    xmlids_section[xmlid_key].append(FileElementPair(manifest_data["filename_short"], record))
-
                 # fields_duplicated
                 if self.is_message_enabled("xml-duplicate-fields", manifest_data["disabled_checks"]):
                     for field in self.xpath_record_fields_wname(record):
@@ -196,8 +296,13 @@ def check_xml_records(self):
                 for meth in self.getattr_checks(manifest_data, "visit_xml_record"):
                     meth(manifest_data, record)
 
+            for record in self.xpath_menuitem(manifest_data["node"]):
+                # call "visit_xml_menuitem_*" methods to re-use the same node xpath loop
+                for meth in self.getattr_checks(manifest_data, "visit_xml_menuitem"):
+                    meth(manifest_data, record)
+
         # xmlids_duplicated (empty dict if check is not enabled)
-        for __, records in xmlids_section.items():
+        for __, records in self.xmlids_section.items():
             if len(records) < 2:
                 continue
             self.register_error(
@@ -220,28 +325,64 @@ def check_xml_records(self):
                 extra_positions=[(field[0]["filename_short"], field[1].sourceline) for field in fields[1:]],
             )
 
+    # def perform_autofix(self):
+    #     # Using STR REGEX STYLE
+    #     for manifest_data in self.manifest_datas:
+    #         if not manifest_data["needs_autofix"]:
+    #             continue
+    #         print(f"File changed {manifest_data['filename']}")
+    #         with (
+    #             tempfile.NamedTemporaryFile("w", delete=False, encoding="UTF-8") as xml_file_tmp,
+    #             open(manifest_data["filename"], encoding="UTF-8") as xml_file,
+    #         ):
+    #             for no_line, line in enumerate(xml_file, start=1):
+    #                 for change in manifest_data["changes"]:
+    #                     if change["sourceline"] != no_line:
+    #                         continue
+    #                     if line != change["old_content"]:
+    #                         # TODO: Improve the script to autofix more than one change in the same line
+    #                         print(
+    #                             "This file had more than one change over the same line, please, re-run the script again"
+    #                         )
+    #                         break
+    #                     line = change["new_content"]
+    #                 xml_file_tmp.write(line)
+    #         shutil.move(xml_file_tmp.name, manifest_data["filename"])
+
     def perform_autofix(self):
+        # Using LXML style
+        # The problem is that it is removing all the newlines for multi-attributes multi-line cases
         for manifest_data in self.manifest_datas:
             if not manifest_data["needs_autofix"]:
                 continue
             print(f"File changed {manifest_data['filename']}")
-            with (
-                tempfile.NamedTemporaryFile("w", delete=False, encoding="UTF-8") as xml_file_tmp,
-                open(manifest_data["filename"], encoding="UTF-8") as xml_file,
-            ):
-                for no_line, line in enumerate(xml_file, start=1):
-                    for change in manifest_data["changes"]:
-                        if change["sourceline"] != no_line:
-                            continue
-                        if line != change["old_content"]:
-                            # TODO: Improve the script to autofix more than one change in the same line
-                            print(
-                                "This file had more than one change over the same line, please, re-run the script again"
-                            )
-                            break
-                        line = change["new_content"]
-                    xml_file_tmp.write(line)
-            shutil.move(xml_file_tmp.name, manifest_data["filename"])
+
+            from difflib import unified_diff
+            # node_str_modified = etree.tostring(manifest_data["node"]).decode("UTF-8")
+            # node_str_original = etree.tostring(manifest_data["node_original"]).decode("UTF-8")
+            # diff = unified_diff(
+            #     node_str_original.splitlines(),  # Divide en líneas el nodo original
+            #     node_str_modified.splitlines(),  # Divide en líneas el nodo modificado
+            #     lineterm="",  # Para no agregar '\n' extra
+            #     fromfile="original",  # Etiqueta para el diff
+            #     tofile="modified",    # Etiqueta para el diff
+            # )
+            # print("\n".join(diff))
+            for node_original, node_modified in zip(manifest_data["node_original"].iter(), manifest_data["node"].iter()):
+                if node_modified.tag not in ("menuitem", "record"):
+                    continue
+                diff = unified_diff(
+                    etree.tostring(node_original).decode("UTF-8").strip(" \n").splitlines(),
+                    etree.tostring(node_modified).decode("UTF-8").strip(" \n").splitlines(),
+                    lineterm="",
+                    fromfile="original",
+                    tofile="modified",
+                )
+                if "view_sbd_line_out_pivot" in node_modified.attrib.get("id"):
+                    import ipdb;ipdb.set_trace()
+                print("\n".join(diff))
+                pattern, replacement = generate_regex_from_diff(etree.tostring(node_original).decode("UTF-8"), etree.tostring(node_modified).decode("UTF-8"))
+
 
     @utils.only_required_for_checks("xml-syntax-error")
     def check_xml_syntax_error(self):
@@ -257,9 +398,29 @@ def check_xml_syntax_error(self):
                 line=1,
             )
 
-    @utils.only_required_for_checks("xml-redundant-module-name", "xml-id-position-first")
+    @utils.only_required_for_checks(
+        "xml-redundant-module-name", "xml-id-position-first", "xml-record-missing-id", "xml-duplicate-record-id"
+    )
+    def visit_xml_menuitem(self, manifest_data, record):
+        # Currently record has the same checks, maybe in the future it will not be like this, but for now we can re-use the same method
+        self.visit_xml_record(manifest_data, record)
+
+    @utils.only_required_for_checks(
+        "xml-redundant-module-name", "xml-id-position-first", "xml-record-missing-id", "xml-duplicate-record-id"
+    )
     def visit_xml_record(self, manifest_data, record):
-        """* Check xml-redundant-module-name
+        """* Check xml-record-missing-id
+        Generated when a <record> tag has no id.
+
+        * Check xml-duplicate-record-id
+
+        If a module has duplicated record_id AKA xml_ids
+        file1.xml
+            <record id="xmlid_name1"
+        file2.xml
+            <record id="xmlid_name1"
+
+        * Check xml-redundant-module-name
 
         If the module is called "module_a" and the xmlid is
         `<record id="module_a.xmlid_name1" ...`
@@ -275,53 +436,85 @@ def visit_xml_record(self, manifest_data, record):
         It should be the first
         `<record id="xmlid_name1" ...`
         """
-        # redundant_module_name
         record_id = record.get("id")
-        if not record_id:
-            return
-        first_attr = record.keys()[0]
-        if first_attr != "id" and self.is_message_enabled("xml-id-position-first", manifest_data["disabled_checks"]):
+
+        if not record_id and self.is_message_enabled("xml-record-missing-id", manifest_data["disabled_checks"]):
             self.register_error(
-                code="xml-id-position-first",
-                message=f'The "id" attribute must be first',
-                info=f'Use `<record id="{record_id}" {first_attr}=...` instead',
+                code="xml-record-missing-id",
+                message="Record has no id, add a unique one to create a new record, use an existing one to update it",
                 filepath=manifest_data["filename_short"],
                 line=record.sourceline,
             )
-            if self.autofix:
-                old_content = self.read_line(manifest_data["filename"], record.sourceline)
-                manifest_data["needs_autofix"] = True
-                new_content = xmlid_reorder(old_content)
-                manifest_data["changes"].append(
-                    {
-                        "sourceline": record.sourceline,
-                        "old_content": old_content,
-                        "new_content": new_content,
-                    }
-                )
 
-        xmlid_module, xmlid_name = record_id.split(".") if "." in record_id else ["", record_id]
-        if xmlid_module == self.module_name and self.is_message_enabled(
-            "xml-redundant-module-name", manifest_data["disabled_checks"]
-        ):
-            self.register_error(
-                code="xml-redundant-module-name",
-                message=f'Redundant module name `<record id="{record_id}" />`',
-                info=f'Use `<record id="{xmlid_name}" />` instead',
-                filepath=manifest_data["filename_short"],
-                line=record.sourceline,
+        if self.is_message_enabled("xml-duplicate-record-id", manifest_data["disabled_checks"]):
+            # xmlids_duplicated
+            xmlid_key = (
+                f"{manifest_data['data_section']}/{record_id}" f"_noupdate_{record.getparent().get('noupdate', '0')}"
             )
-            if self.autofix:
-                manifest_data["needs_autofix"] = True
-                old_content = self.read_line(manifest_data["filename"], record.sourceline)
-                new_content = old_content.replace(f"{xmlid_module}.", "")
-                manifest_data["changes"].append(
-                    {
-                        "sourceline": record.sourceline,
-                        "old_content": old_content,
-                        "new_content": new_content,
-                    }
+            self.xmlids_section[xmlid_key].append(FileElementPair(manifest_data["filename_short"], record))
+        if record_id:
+            first_attr = record.keys()[0]
+            if first_attr != "id" and self.is_message_enabled(
+                "xml-id-position-first", manifest_data["disabled_checks"]
+            ):
+                self.register_error(
+                    code="xml-id-position-first",
+                    message=f'The "id" attribute must be first',
+                    info=f'Use `<{record.tag} id="{record_id}" {first_attr}=...` instead',
+                    filepath=manifest_data["filename_short"],
+                    line=record.sourceline,
+                )
+                if self.autofix:
+                    manifest_data["needs_autofix"] = True
+                    # if manifest_data["filename"].endswith("sbd_endorsement/wizards/change_credit_balance_views.xml"):
+                    #     import ipdb;ipdb.set_trace()
+
+                    # LXML STYLE
+                    record.attrib.pop("id")
+                    attributes = dict(record.attrib)
+                    record.attrib.clear()
+                    record.attrib.update({"id": record_id, **attributes})
+
+                    # STR REGEX STYLE
+                    # old_content = self.read_content_start_end(
+                    #     manifest_data["filename"], record.getprevious().sourceline + 1, record.sourceline
+                    # )
+                    # manifest_data["needs_autofix"] = True
+                    # new_content = xmlid_reorder(old_content)
+                    # manifest_data["changes"].append(
+                    #     {
+                    #         "sourceline": record.sourceline,
+                    #         "old_content": old_content,
+                    #         "new_content": new_content,
+                    #     }
+                    # )
+
+            xmlid_module, xmlid_name = record_id.split(".") if "." in record_id else ["", record_id]
+            if xmlid_module == self.module_name and self.is_message_enabled(
+                "xml-redundant-module-name", manifest_data["disabled_checks"]
+            ):
+                self.register_error(
+                    code="xml-redundant-module-name",
+                    message=f'Redundant module name `<{record.tag} id="{record_id}"`',
+                    info=f'Use `<{record.tag} id="{xmlid_name}"` instead',
+                    filepath=manifest_data["filename_short"],
+                    line=record.sourceline,
                 )
+                if self.autofix:
+                    manifest_data["needs_autofix"] = True
+                    # LXML STYLE
+                    record.attrib["id"] = record.attrib["id"].replace(f"{xmlid_module}.", "")
+
+                    # STR REGEX STYLE
+                    # old_content = self.read_line(manifest_data["filename"], record.sourceline)
+                    # new_content = old_content.replace(f"{xmlid_module}.", "")
+                    # manifest_data["changes"].append(
+                    #     {
+                    #         "sourceline": record.sourceline,
+                    #         "old_content": old_content,
+                    #         "new_content": new_content,
+                    #     }
+                    # )
 
     @utils.only_required_for_checks("xml-view-dangerous-replace-low-priority", "xml-deprecated-tree-attribute")
     def visit_xml_record_view(self, manifest_data, record):

From dcb736c7c49319407e72466b84755d8f0a4cc72d Mon Sep 17 00:00:00 2001
From: "Moises Lopez - https://www.vauxoo.com/" <moylop260@vauxoo.com>
Date: Mon, 27 Jan 2025 16:42:18 -0600
Subject: [PATCH 3/4] checkpoint-2

---
 src/oca_pre_commit_hooks/checks_odoo_module_xml.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/oca_pre_commit_hooks/checks_odoo_module_xml.py b/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
index a81ddacc..c4419c6b 100644
--- a/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
+++ b/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
@@ -368,6 +368,8 @@ def perform_autofix(self):
             #     tofile="modified",    # Etiqueta para el diff
             # )
             # print("\n".join(diff))
+            with open(manifest_data["filename"], encoding="UTF-8") as xml_file:
+                xml_content = xml_file.read()
             for node_original, node_modified in zip(manifest_data["node_original"].iter(), manifest_data["node"].iter()):
                 if node_modified.tag not in ("menuitem", "record"):
                     continue
@@ -378,11 +380,19 @@ def perform_autofix(self):
                     fromfile="original",
                     tofile="modified",
                 )
+                diff_str = "\n".join(diff)
+                if not diff_str:
+                    continue
+                print(diff_str)
+                if "foreclosed_assets/views/foreclosed_asset_appraisal_views.xml" in manifest_data["filename"]:
+                    import ipdb;ipdb.set_trace()
                 if "view_sbd_line_out_pivot" in node_modified.attrib.get("id"):
                     import ipdb;ipdb.set_trace()
-                print("\n".join(diff))
                 pattern, replacement = generate_regex_from_diff(etree.tostring(node_original).decode("UTF-8"), etree.tostring(node_modified).decode("UTF-8"))
-
+                xml_content = re.sub(pattern, replacement, xml_content)
+            with tempfile.NamedTemporaryFile("w", delete=False, encoding="UTF-8") as xml_file_tmp:
+                xml_file_tmp.write(xml_content)
+            shutil.move(xml_file_tmp.name, manifest_data["filename"])
 
     @utils.only_required_for_checks("xml-syntax-error")
     def check_xml_syntax_error(self):

From 03cb316ddcee53df8622ca0eb097cd57ca290767 Mon Sep 17 00:00:00 2001
From: "Moises Lopez - https://www.vauxoo.com/" <moylop260@vauxoo.com>
Date: Tue, 28 Jan 2025 13:26:07 -0600
Subject: [PATCH 4/4] checkpoint

---
 .../checks_odoo_module_xml.py                 | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/oca_pre_commit_hooks/checks_odoo_module_xml.py b/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
index c4419c6b..90cb446e 100644
--- a/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
+++ b/src/oca_pre_commit_hooks/checks_odoo_module_xml.py
@@ -16,7 +16,6 @@
 import difflib
 import re
 
-
 # def generate_regex_from_diff(original, modified):
 #     # Paso 1: Encuentra diferencias
 #     diff = list(difflib.ndiff(original.split(), modified.split()))
@@ -91,13 +90,14 @@
 #     else:
 #         raise ValueError("El diff no tiene cambios coincidentes en longitud.")
 
+
 def generate_regex_from_diff(original, modified):
     # Paso 1: Encuentra diferencias
     diff = list(difflib.ndiff(original.split(), modified.split()))
 
     # Paso 2: Identificar los tokens que cambiaron
-    removed = [token[2:] for token in diff if token.startswith('- ')]
-    added = [token[2:] for token in diff if token.startswith('+ ')]
+    removed = [token[2:] for token in diff if token.startswith("- ")]
+    added = [token[2:] for token in diff if token.startswith("+ ")]
 
     # Paso 3: Construir patrón dinámico basado en las diferencias
     if len(removed) == len(added):
@@ -105,17 +105,17 @@ def generate_regex_from_diff(original, modified):
         pattern_parts = []
         capture_index = 1
         capture_map = {}
-        
+
         for token in original.split():
             if token in removed:
                 # Usa grupos de captura para los valores que cambian
                 pattern_parts.append(r'([^">]+)')
-                capture_map[token] = f'\\{capture_index}'
+                capture_map[token] = f"\\{capture_index}"
                 capture_index += 1
             else:
                 pattern_parts.append(re.escape(token))
         # Permitir cualquier espacio o salto de línea entre tokens
-        pattern = r'\s*'.join(pattern_parts)
+        pattern = r"\s*".join(pattern_parts)
 
         # Crear la cadena de reemplazo dinámica basada en los grupos
         replacement_parts = []
@@ -125,7 +125,7 @@ def generate_regex_from_diff(original, modified):
                 replacement_parts.append(capture_map.get(token, token))
             else:
                 replacement_parts.append(token)
-        replacement = ' '.join(replacement_parts)
+        replacement = " ".join(replacement_parts)
 
         return pattern, replacement
     else:
@@ -145,7 +145,7 @@ def _hasclass(context, *cls):
 FileElementPair = namedtuple("FileElementPair", ["filename", "element"])
 
 
-xmlid_reorder_re = re.compile(r'(<(\w+)\s+)([^>]*?)\b(id="[^"]*")([^>]*)')
+xmlid_reorder_re = re.compile(r'(<(\w+)\s+)([^>]*?)\b(id="[^"]*")([^>]*)', flags=re.DOTALL)
 
 
 def xmlid_reorder(old_content):
@@ -158,7 +158,7 @@ def reorder(match):
         id_attr = match.group(4)  # id attribute
         after_id = match.group(5)  # Attributes after id
         # Re-order using the id first
-        return f"{start}{id_attr} {before_id.strip()} {after_id.strip()}".rstrip()
+        return f"{start}{id_attr} {before_id} {after_id}".rstrip()
 
     return xmlid_reorder_re.sub(reorder, old_content)
 
@@ -351,13 +351,14 @@ def check_xml_records(self):
 
     def perform_autofix(self):
         # Using LXML style
-        # The problem is that it is removing all the newlines for multi-attributes multi-line cases
+        # The problem is that it is removing all the newlines for multi-attributes multi-line cases
         for manifest_data in self.manifest_datas:
             if not manifest_data["needs_autofix"]:
                 continue
             print(f"File changed {manifest_data['filename']}")
 
             from difflib import unified_diff
+
             # node_str_modified = etree.tostring(manifest_data["node"]).decode("UTF-8")
             # node_str_original = etree.tostring(manifest_data["node_original"]).decode("UTF-8")
             # diff = unified_diff(
@@ -370,7 +371,9 @@ def perform_autofix(self):
             # print("\n".join(diff))
             with open(manifest_data["filename"], encoding="UTF-8") as xml_file:
                 xml_content = xml_file.read()
-            for node_original, node_modified in zip(manifest_data["node_original"].iter(), manifest_data["node"].iter()):
+            for node_original, node_modified in zip(
+                manifest_data["node_original"].iter(), manifest_data["node"].iter()
+            ):
                 if node_modified.tag not in ("menuitem", "record"):
                     continue
                 diff = unified_diff(
@@ -384,11 +387,10 @@ def perform_autofix(self):
                 if not diff_str:
                     continue
                 print(diff_str)
-                if "foreclosed_assets/views/foreclosed_asset_appraisal_views.xml" in manifest_data["filename"]:
-                    import ipdb;ipdb.set_trace()
-                if "view_sbd_line_out_pivot" in node_modified.attrib.get("id"):
-                    import ipdb;ipdb.set_trace()
-                pattern, replacement = generate_regex_from_diff(etree.tostring(node_original).decode("UTF-8"), etree.tostring(node_modified).decode("UTF-8"))
+
+                pattern, replacement = generate_regex_from_diff(
+                    etree.tostring(node_original).decode("UTF-8"), etree.tostring(node_modified).decode("UTF-8")
+                )
                 xml_content = re.sub(pattern, replacement, xml_content)
             with tempfile.NamedTemporaryFile("w", delete=False, encoding="UTF-8") as xml_file_tmp:
                 xml_file_tmp.write(xml_content)
@@ -476,8 +478,6 @@ def visit_xml_record(self, manifest_data, record):
                 )
                 if self.autofix:
                     manifest_data["needs_autofix"] = True
-                    # if manifest_data["filename"].endswith("sbd_endorsement/wizards/change_credit_balance_views.xml"):
-                    #     import ipdb;ipdb.set_trace()
 
                     # LXML STYLE
                     record.attrib.pop("id")