Skip to content

Commit 5f9e360

Browse files
author
Quan
authored
Merge pull request #132 from NekoAria/2.0
修正标签处理中 `bbcode` 的处理逻辑,补上对 `url` 标签的处理;修正 `<a> 标签处理` 的逻辑
2 parents dd5207f + 6c28c88 commit 5f9e360

File tree

1 file changed

+19
-9
lines changed

1 file changed

+19
-9
lines changed

src/plugins/ELF_RSS2/RSS/rss_parsing.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from io import BytesIO
1919
from pathlib import Path
2020
from typing import Dict, Any
21+
from html import unescape as html_unescape
2122

2223
import emoji
2324
import feedparser
@@ -659,13 +660,22 @@ async def handle_html_tag(html) -> str:
659660
rss_str = re.sub(
660661
r"(\[url=.+?])?\[img].+?\[/img](\[/url])?", "", rss_str, flags=re.I
661662
)
662-
rss_str = re.sub(r"\[align=.+?]|\[/align]", "", rss_str, flags=re.I)
663-
rss_str = re.sub(r"\[backcolor=.+?]|\[/backcolor]", "", rss_str, flags=re.I)
664-
rss_str = re.sub(r"\[color=.+?]|\[/color]", "", rss_str, flags=re.I)
665-
rss_str = re.sub(r"\[font=.+?]|\[/font]", "", rss_str, flags=re.I)
666-
rss_str = re.sub(r"\[size=.+?]|\[/size]", "", rss_str, flags=re.I)
667-
rss_str = re.sub(r"\[table=.+?]|\[/table]", "", rss_str, flags=re.I)
668-
rss_str = re.sub(r"\[/?(b|u|tr|td)]", "", rss_str, flags=re.I)
663+
bbcode_tags = [
664+
"align",
665+
"backcolor",
666+
"color",
667+
"font",
668+
"size",
669+
"table",
670+
"url",
671+
"b",
672+
"u",
673+
"tr",
674+
"td",
675+
]
676+
for i in bbcode_tags:
677+
rss_str = re.sub(rf"\[{i}=.+?]", "", rss_str, flags=re.I)
678+
rss_str = re.sub(rf"\[/?{i}]", "", rss_str, flags=re.I)
669679

670680
# 去掉结尾被截断的信息
671681
rss_str = re.sub(
@@ -699,8 +709,8 @@ async def handle_html_tag(html) -> str:
699709

700710
# <a> 标签处理
701711
for a in new_html("a").items():
702-
a_str = re.search(r"<a.+?</a>", str(a))[0]
703-
if str(a.text()) != a.attr("href"):
712+
a_str = re.search(r"<a.+?</a>", html_unescape(str(a)))[0]
713+
if a.text() and str(a.text()) != a.attr("href"):
704714
rss_str = rss_str.replace(a_str, f" {a.text()}: {a.attr('href')}\n")
705715
else:
706716
rss_str = rss_str.replace(a_str, f" {a.attr('href')}\n")

0 commit comments

Comments
 (0)