diff --git a/CommentsCleaner.py b/CommentsCleaner.py new file mode 100644 index 0000000..4856d09 --- /dev/null +++ b/CommentsCleaner.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import (unicode_literals, division, absolute_import, + print_function) + + +__copyright__ = '2020, un_pogaz <>' +__docformat__ = 'restructuredtext en' + +import sys, os, re +import calibre_plugins.comments_cleaner.config as cfg + +reFlag = re.MULTILINE + re.DOTALL; + +try: + reFlag = re.ASCII + re.MULTILINE + re.DOTALL; +except : + reFlag = re.MULTILINE + re.DOTALL; + pass; # calibre 5 // re.ASCII for Python3 only + + +def RegexSimple(pattern, repl, string): + return re.sub(pattern, repl, string, 0, reFlag); + +def RegexSearch(pattern, string): + return re.search(pattern, string, reFlag); + +def RegexLoop(pattern, repl, string): + + while RegexSearch(pattern, string): + string = RegexSimple(pattern, repl, string); + + return string; + + +def CleanBasic(text): + + text = RegexLoop(r'( | )', '\u202F', text); + text = RegexLoop(r'( | )', '\u00A0', text); + + # line + text = text.replace('\r\n', '\n').replace('\r', '\n'); + text = RegexLoop(r'( |\t|\n\n)+\n', '\n', text); + + text = RegexLoop(r'\s<(p|div|h\d|li|ul|ol|blockquote)', r'<\1', text); + text = RegexLoop(r'><(p|div|h\d|li|ul|ol|blockquote)', r'>\n<\1', text); + + # entity + text = RegexLoop("&", "&", text); + text = RegexLoop("<", "<", text); + text = RegexLoop(">", ">", text); + + text = RegexLoop("( | )", r'\u00A0', text); + + text = RegexLoop("(—|—)", "—", text); + text = RegexLoop("(–|–)", "–", text); + text = RegexLoop("(«|«)", "«", text); + text = RegexLoop("(»|»)", "»", text); + text = RegexLoop("(…|…)", "…", text); + text = RegexLoop("(’|’)", "’", text); + + # inline vide + innerSpace = r"<(i|b|em|strong)[^>]*>\s+"; + innerEmpty = r"<(i|b|em|strong)[^>]*>"; + outerSpace = r"\s+<\1.*?>"; + outerEmpty = r"<\1.*?>"; + + while (RegexSearch(innerSpace, text) or + RegexSearch(innerEmpty, text) or + RegexSearch(outerSpace, text) or + RegexSearch(outerEmpty, text)): + + text = RegexLoop(innerSpace, r' ', text); + text = RegexLoop(innerEmpty, r'', text); + + text = RegexLoop(outerSpace, r' ', text); + text = RegexLoop(outerEmpty, r'', text); + + # double espace et tab dans paragraphe + text = RegexLoop(r'(<(p|h\d).*?>.*?)(\t| {2,})', r'\1 ', text); + # tab pour l'indentation + text = RegexLoop(r'^( *)\t(\s*<)', r'\1 \2', text); + + + # attribut style + text = RegexLoop(r'style="([^"]*);\s+;([^"]*)"', r'style="\1;\2"', text); + text = RegexLoop(r'style="([^"]*)(;|:)\s{2,}([^"]*)"', r'style="\1\2 \3"', text); + text = RegexLoop(r'style="([^"]*)\s+(;|:)([^"]*)"', r'style="\1\2\3"', text); + + text = RegexLoop(r'style="([^"]*);\s*"', r'style="\1"', text); + text = RegexLoop(r'style="\s*"', r'', text); + + #strip span + text = RegexLoop(r'(.*?)', r'\1', text); + + # remplace les triple point invalide + text = RegexSimple(r'\.\s*\.\s*\.', r'…', text); + + # xml format + text = RegexLoop(r'<([^<>]+)\s{2,}([^<>]+)>', r'<\1 \2>', text); + text = RegexLoop(r'\s+(|/|\?)\s*>', r'\1>', text); + text = RegexLoop(r'<\s*(|/|!|\?)\s+', r'<\1', text); + + return text; + + +def CleanHTML(library_config, text): + text = CleanBasic(text); + + if library_config[cfg.KEY_KEEP_URL] == 'none': + text = RegexLoop(r'(.*?)', r'\1', text); + + + # uniformise les attribut style + text = RegexLoop(r'style="([^"]*[^";])"', r'style="\1;"', text); + + text = RegexLoop(r'(]*>||]*>||]*>|)', r'', text); + text = RegexLoop(r'<(img|meta|link)[^>]*>', r'', text); + + text = RegexLoop(r'(id|class)=".*?"', r'', text); + text = RegexLoop(r'<(div|p|li|h1|h2|h3|h4|h5|h6)[^>]*>\s+', r'', text); + text = RegexLoop(r'<(b|h)r[^>]+>', r'<\1r>', text); + text = RegexLoop(r'<(b|h)r>\s+', r'<\1r>', text); + text = RegexLoop(r'\s+<(b|h)r>', r'<\1r>', text); + + text = RegexLoop(r'<(div|p|li|h1|h2|h3|h4|h5|h6)(([^>]*))>
', "<\1\2>\u00A0", text); + text = RegexLoop(r'
', r'', text); + + atr_tbl = [ + r'(background-color)', + r'(color)', + r'(text-indent|letter-spacing|white-space|word-spacing|word-wrap|overflow)', + r'(margin|padding|border|box-sizing|outline|orphans|widows|float|display|visibility|text-rendering)', + r'(page-break|clear|cursor|text-autospace|transition|tab-stops|zoom)', + r'(background|opacity|text-shadow|list-style-position)', + r'(position|top|bottom|left|right)', + r'(max-|z-|)(width|height|index)', + r'-{0,2}(mso-|moz-|webkit-|qt-)', + r'(font-family|font-variant|font-stretch|font-size|line-height)' + ]; + + for atr in atr_tbl: + text = RegexLoop(r'style="([^"]*)'+ atr +'[^:]*:[^;]*;([^"]*)"', r'style="\1\3"', text); + + # font-weight + text = RegexLoop(r'style="([^"]*)font-weight\s*:\s*(normal|inherit|initial)\s*;([^"]*)"', r'style="\1\3"', text); + text = RegexLoop(r'style="([^"]*)font-weight\s*:\s*(?P\d)[1-9]\d(?:\.\d+)?\s*;([^"]*)"', r'style="\1 font-weight: \g00;\3"', text); + text = RegexLoop(r'style="([^"]*)font-weight\s*:\s*(bold)\s*;([^"]*)"', r'style="\1font-weight: 600\3"', text); + text = RegexLoop(r'style="([^"]*)font-weight\s*:\s*(\d){4,}(?:\.\d+)?\s*;([^"]*)"', r'style="\1font-weight: 900;\3"', text); + text = RegexLoop(r'style="([^"]*)font-weight\s*:\s*(\d){1,2}(?:\.\d+)?\s*;([^"]*)"', r'style="\1font-weight: 100;\3"', text); + + # font-style + text = RegexLoop(r'style="([^"]*)font-style\s*:\s*(normal|inherit|initial)\s*;([^"]*)"', r'style="\1\3"', text); + text = RegexLoop(r'style="([^"]*)font-style\s*:\s*(oblique(?:\s+\d+deg))\s*;([^"]*)"', r'style="\1font-style: italic;\3"', text); + + + # align + text = RegexLoop(r'<(p|div)([^=]*=[^>]*)\s*align="([^"]*)"', r'<\1 align="\3"\2', text); + + # align / empty|all + if ((library_config[cfg.KEY_FORCE_JUSTIFY] == 'empty') or + (library_config[cfg.KEY_FORCE_JUSTIFY] == 'all')): + # align for all + text = text.replace(']*align="[^"]*")', r'<\1\2', text); + text = RegexLoop(r'\s*\n]*)style="([^"]*)text-align\s*:\s*(center|right)\s*;([^"]*)"', r'align="\3"\1style="\2\4"', text); + if (library_config[cfg.KEY_FORCE_JUSTIFY] == 'all'): + text = RegexLoop(r'align="(left|center|right)"', r'align="justify"', text); + + # del text-align + text = RegexLoop(r'style="([^"]*)text-align\s*:\s*([^;]*)\s*;([^"]*)"', r'style="\1\3"', text); + # del align for
  • + text = RegexLoop(r'<(ol|ul|li)([^>]*)align="[^"]*"', r'<\1\2', text); + + # align / none + if ((library_config[cfg.KEY_FORCE_JUSTIFY] == 'none')): + # align left + text = text.replace(']*align="[^"]*")', r'<\1\2', text); + + # align center or right or justify + text = RegexLoop(r'align="[^"]*"([^>]*)style="([^"]*)text-align\s*:\s*(center|right|justify)\s*;([^"]*)"', r'align="\3"\1style="\2\4"', text); + # del text-align + text = RegexLoop(r'style="([^"]*)text-align\s*:\s*([^;]*)\s*;([^"]*)"', r'style="\1\3"', text); + # del align for
  • + text = RegexLoop(r'<(ol|ul|li)([^>]*)align="[^"]*"', r'<\1\2', text); + + text = RegexLoop(r'align="left"', r'', text); + text = RegexLoop(r'\s*\n(.*?)', r'\1', text); + text = RegexLoop(r'style="\s+([^"]*)"', r'style="\1"', text); + text = RegexLoop(r'style="([^"]*)\s+"', r'style="\1"', text); + + # + + text = CleanBasic(text) + return text; + + +def main(): + print("I reached main when I should not have\n"); + return -1; + +if __name__ == "__main__": + sys.exit(main()) diff --git a/README.md b/README.md index 9f86615..7a14350 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ -# Comments-Cleaner +# Comments Cleaner ## Highly experimental Calibre plugin for remove the scraps CSS in HTML comments diff --git a/common_utils.py b/common_utils.py new file mode 100644 index 0000000..edcd677 --- /dev/null +++ b/common_utils.py @@ -0,0 +1,310 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import (unicode_literals, division, absolute_import, + print_function) +import six + +__license__ = 'GPL v3' +__copyright__ = '2011, Grant Drake ' +__docformat__ = 'restructuredtext en' + +import os, time + +try: + from PyQt5 import QtWidgets as QtGui + from PyQt5.Qt import (Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout, + QTableWidgetItem, QFont, QLineEdit, QComboBox, + QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime, + QRegExpValidator, QRegExp, QTextEdit, + QListWidget, QAbstractItemView) +except ImportError: + from PyQt4 import QtGui + from PyQt4.Qt import (Qt, QIcon, QPixmap, QLabel, QDialog, QHBoxLayout, + QTableWidgetItem, QFont, QLineEdit, QComboBox, + QVBoxLayout, QDialogButtonBox, QStyledItemDelegate, QDateTime, + QRegExpValidator, QRegExp, QTextEdit, + QListWidget, QAbstractItemView) + +from calibre.constants import iswindows, DEBUG +from calibre.gui2 import gprefs, error_dialog, UNDEFINED_QDATETIME, info_dialog +from calibre.gui2.actions import menu_action_unique_name +from calibre.gui2.complete2 import EditWithComplete +from calibre.gui2.keyboard import ShortcutConfig +from calibre.gui2.widgets import EnLineEdit +from calibre.utils.config import config_dir, tweaks +from calibre.utils.date import now, format_date, qt_to_dt, UNDEFINED_DATE +from calibre.utils.icu import sort_key +from calibre import prints + +try: + from calibre.gui2 import QVariant + del QVariant +except ImportError: + is_qt4 = False + convert_qvariant = lambda x: x +else: + is_qt4 = True + + def convert_qvariant(x): + vt = x.type() + if vt == x.String: + return six.text_type(x.toString()) + if vt == x.List: + return [convert_qvariant(i) for i in x.toList()] + return x.toPyObject() + +# Global definition of our plugin name. Used for common functions that require this. +plugin_name = None +# Global definition of our plugin resources. Used to share between the xxxAction and xxxBase +# classes if you need any zip images to be displayed on the configuration dialog. +plugin_icon_resources = {} + +BASE_TIME = None +def debug_print(*args): + global BASE_TIME + if BASE_TIME is None: + BASE_TIME = time.time() + if DEBUG: + prints('DEBUG CommentsCleaner: %6.1f'%(time.time()-BASE_TIME), *args) + +def set_plugin_icon_resources(name, resources): + ''' + Set our global store of plugin name and icon resources for sharing between + the InterfaceAction class which reads them and the ConfigWidget + if needed for use on the customization dialog for this plugin. + ''' + global plugin_icon_resources, plugin_name + plugin_name = name + plugin_icon_resources = resources + + +def get_icon(icon_name): + ''' + Retrieve a QIcon for the named image from the zip file if it exists, + or if not then from Calibre's image cache. + ''' + if icon_name: + pixmap = get_pixmap(icon_name) + if pixmap is None: + # Look in Calibre's cache for the icon + return QIcon(I(icon_name)) + else: + return QIcon(pixmap) + return QIcon() + + +def get_pixmap(icon_name): + ''' + Retrieve a QPixmap for the named image + Any icons belonging to the plugin must be prefixed with 'images/' + ''' + global plugin_icon_resources, plugin_name + + if not icon_name.startswith('images/'): + # We know this is definitely not an icon belonging to this plugin + pixmap = QPixmap() + pixmap.load(I(icon_name)) + return pixmap + + # Check to see whether the icon exists as a Calibre resource + # This will enable skinning if the user stores icons within a folder like: + # ...\AppData\Roaming\calibre\resources\images\Plugin Name\ + if plugin_name: + local_images_dir = get_local_images_dir(plugin_name) + local_image_path = os.path.join(local_images_dir, icon_name.replace('images/', '')) + if os.path.exists(local_image_path): + pixmap = QPixmap() + pixmap.load(local_image_path) + return pixmap + + # As we did not find an icon elsewhere, look within our zip resources + if icon_name in plugin_icon_resources: + pixmap = QPixmap() + pixmap.loadFromData(plugin_icon_resources[icon_name]) + return pixmap + return None + + +def get_local_images_dir(subfolder=None): + ''' + Returns a path to the user's local resources/images folder + If a subfolder name parameter is specified, appends this to the path + ''' + images_dir = os.path.join(config_dir, 'resources/images') + if subfolder: + images_dir = os.path.join(images_dir, subfolder) + if iswindows: + images_dir = os.path.normpath(images_dir) + return images_dir + +def get_library_uuid(db): + try: + library_uuid = db.library_id + except: + library_uuid = '' + return library_uuid + +def create_menu_action_unique(ia, parent_menu, menu_text, image=None, tooltip=None, + shortcut=None, triggered=None, is_checked=None, shortcut_name=None, + unique_name=None, favourites_menu_unique_name=None,submenu=None,enabled=True): + ''' + Create a menu action with the specified criteria and action, using the new + InterfaceAction.create_menu_action() function which ensures that regardless of + whether a shortcut is specified it will appear in Preferences->Keyboard + ''' + orig_shortcut = shortcut + kb = ia.gui.keyboard + if unique_name is None: + unique_name = menu_text + if not shortcut == False: + full_unique_name = menu_action_unique_name(ia, unique_name) + if full_unique_name in kb.shortcuts: + shortcut = False + else: + if shortcut is not None and not shortcut == False: + if len(shortcut) == 0: + shortcut = None + else: + shortcut = _(shortcut) + + if shortcut_name is None: + shortcut_name = menu_text.replace('&','') + + ac = ia.create_menu_action(parent_menu, unique_name, menu_text, icon=None, shortcut=shortcut, + description=tooltip, triggered=triggered, shortcut_name=shortcut_name) + if shortcut == False and not orig_shortcut == False: + if ac.calibre_shortcut_unique_name in ia.gui.keyboard.shortcuts: + kb.replace_action(ac.calibre_shortcut_unique_name, ac) + if image: + ac.setIcon(get_icon(image)) + if is_checked is not None: + ac.setCheckable(True) + if is_checked: + ac.setChecked(True) + + if submenu: + ac.setMenu (submenu) + + if (not enabled): + ac.setEnabled (False) + else: + ac.setEnabled (True) + return ac + +class ImageTitleLayout(QHBoxLayout): + ''' + A reusable layout widget displaying an image followed by a title + ''' + def __init__(self, parent, icon_name, title): + QHBoxLayout.__init__(self) + self.title_image_label = QLabel(parent) + self.update_title_icon(icon_name) + self.addWidget(self.title_image_label) + + title_font = QFont() + title_font.setPointSize(16) + shelf_label = QLabel(title, parent) + shelf_label.setFont(title_font) + self.addWidget(shelf_label) + self.insertStretch(-1) + + def update_title_icon(self, icon_name): + debug_print ("Icon: ", icon_name) + pixmap = get_pixmap(icon_name) + if pixmap is None: + error_dialog(self.parent(), _('Restart required'), + _('Title image not found - you must restart Calibre before using this plugin!'), show=True) + else: + self.title_image_label.setPixmap(pixmap) + self.title_image_label.setMaximumSize(32, 32) + self.title_image_label.setScaledContents(True) + +class SizePersistedDialog(QDialog): + ''' + This dialog is a base class for any dialogs that want their size/position + restored when they are next opened. + ''' + def __init__(self, parent, unique_pref_name): + QDialog.__init__(self, parent) + self.unique_pref_name = unique_pref_name + self.geom = gprefs.get(unique_pref_name, None) + self.finished.connect(self.dialog_closing) + + def resize_dialog(self): + if self.geom is None: + self.resize(self.sizeHint()) + else: + self.restoreGeometry(self.geom) + + def dialog_closing(self, result): + geom = bytearray(self.saveGeometry()) + gprefs[self.unique_pref_name] = geom + self.persist_custom_prefs() + + def persist_custom_prefs(self): + ''' + Invoked when the dialog is closing. Override this function to call + save_custom_pref() if you have a setting you want persisted that you can + retrieve in your __init__() using load_custom_pref() when next opened + ''' + pass + + def load_custom_pref(self, name, default=None): + return gprefs.get(self.unique_pref_name+':'+name, default) + + def save_custom_pref(self, name, value): + gprefs[self.unique_pref_name+':'+name] = value + +class KeyValueComboBox(QComboBox): + + def __init__(self, parent, values, selected_key): + QComboBox.__init__(self, parent) + self.values = values + self.populate_combo(selected_key) + + def populate_combo(self, selected_key): + self.clear() + selected_idx = idx = -1 + for key, value in six.iteritems(self.values): + idx = idx + 1 + self.addItem(value) + if key == selected_key: + selected_idx = idx + self.setCurrentIndex(selected_idx) + + def selected_key(self): + for key, value in six.iteritems(self.values): + if value == six.text_type(self.currentText()).strip(): + return key + +class KeyboardConfigDialog(SizePersistedDialog): + ''' + This dialog is used to allow editing of keyboard shortcuts. + ''' + def __init__(self, gui, group_name): + SizePersistedDialog.__init__(self, gui, _('Keyboard shortcut dialog')) + self.gui = gui + self.setWindowTitle(_('Keyboard shortcuts')) + layout = QVBoxLayout(self) + self.setLayout(layout) + + self.keyboard_widget = ShortcutConfig(self) + layout.addWidget(self.keyboard_widget) + self.group_name = group_name + + button_box = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel) + button_box.accepted.connect(self.commit) + button_box.rejected.connect(self.reject) + layout.addWidget(button_box) + + # Cause our dialog size to be restored from prefs or created on first usage + self.resize_dialog() + self.initialize() + + def initialize(self): + self.keyboard_widget.initialize(self.gui.keyboard) + self.keyboard_widget.highlight_group(self.group_name) + + def commit(self): + self.keyboard_widget.commit() + self.accept() diff --git a/config.py b/config.py new file mode 100644 index 0000000..70dc2d6 --- /dev/null +++ b/config.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Grant Drake ' +__docformat__ = 'restructuredtext en' + +from collections import OrderedDict +from PyQt5.Qt import QWidget, QGridLayout, QLabel, QPushButton, QGroupBox, QVBoxLayout +from calibre.utils.config import JSONConfig + +from calibre_plugins.comments_cleaner.common_utils import (KeyValueComboBox, KeyboardConfigDialog, ImageTitleLayout, debug_print, get_library_uuid) + +import copy, os + +PREFS_NAMESPACE = 'CommentCleanerPlugin' +PREFS_KEY_SETTINGS = 'settings' + +PLUGIN_ICONS = ['images/plugin.png'] + +STORE_NAME = 'Options' +KEY_KEEP_URL = 'keep' +KEY_FORCE_JUSTIFY = 'empty' + +SHOW_URL = OrderedDict([('keep', _('Keep URL')), + ('none', _('Delete URL'))]) + +FORCE_JUSTIFY = OrderedDict([('all', _('Forced justification (ecrase "center" and "right")')), + ('empty', _('Justification for indeterminate text')), + ('none', _('No change'))]) + +DEFAULT_LIBRARY_VALUES = { + KEY_KEEP_URL: 'keep', + KEY_FORCE_JUSTIFY: 'empty' +} + +def get_library_config(db): + library_id = get_library_uuid(db) + library_config = None + + if library_config is None: + library_config = db.prefs.get_namespaced(PREFS_NAMESPACE, PREFS_KEY_SETTINGS, + copy.deepcopy(DEFAULT_LIBRARY_VALUES)) + return library_config + +def set_library_config(db, library_config): + db.prefs.set_namespaced(PREFS_NAMESPACE, PREFS_KEY_SETTINGS, library_config) + +class ConfigWidget(QWidget): + + def __init__(self, plugin_action): + QWidget.__init__(self) + self.plugin_action = plugin_action + layout = QVBoxLayout(self) + self.setLayout(layout) + + library_config = get_library_config(self.plugin_action.gui.current_db) + + title_layout = ImageTitleLayout(self, 'images/plugin.png', _('Comments Cleaner Options')) + layout.addLayout(title_layout) + + # --- options --- + options_group_box = QGroupBox(_(' '), self) + layout.addWidget(options_group_box) + options_group_box_layout = QGridLayout() + options_group_box.setLayout(options_group_box_layout) + + options_group_box_layout.addWidget(QLabel(_('URL:'), self), 1, 1) + post_show3 = library_config.get(KEY_KEEP_URL, DEFAULT_LIBRARY_VALUES[KEY_KEEP_URL]) + self.showCombo1 = KeyValueComboBox(self, SHOW_URL, post_show3) + options_group_box_layout.addWidget(self.showCombo1, 2, 1) + + options_group_box_layout.addWidget(QLabel(_('Justification:'), self), 3, 1) + post_show3 = library_config.get(KEY_FORCE_JUSTIFY, DEFAULT_LIBRARY_VALUES[KEY_FORCE_JUSTIFY]) + self.showCombo2 = KeyValueComboBox(self, FORCE_JUSTIFY, post_show3) + options_group_box_layout.addWidget(self.showCombo2, 4, 1) + + # --- Keyboard shortcuts --- + keyboard_shortcuts_button = QPushButton(_('Keyboard shortcuts...'), self) + keyboard_shortcuts_button.setToolTip(_( + 'Edit the keyboard shortcuts associated with this plugin')) + keyboard_shortcuts_button.clicked.connect(self.edit_shortcuts) + layout.addWidget(keyboard_shortcuts_button) + layout.addStretch(1) + + def save_settings(self): + db = self.plugin_action.gui.current_db + library_config = get_library_config(db) + + library_config[KEY_KEEP_URL] = self.showCombo1.selected_key() + library_config[KEY_FORCE_JUSTIFY] = self.showCombo2.selected_key() + + set_library_config(db, library_config) + + def edit_shortcuts(self): + self.save_settings () + self.plugin_action.build_menus () + d = KeyboardConfigDialog(self.plugin_action.gui, self.plugin_action.action_spec[0]) + if d.exec_() == d.Accepted: + self.plugin_action.gui.keyboard.finalize() diff --git a/plugin-import-name-comments_cleaner.txt b/plugin-import-name-comments_cleaner.txt new file mode 100644 index 0000000..0519ecb --- /dev/null +++ b/plugin-import-name-comments_cleaner.txt @@ -0,0 +1 @@ + \ No newline at end of file