|
18 | 18 | from io import BytesIO
|
19 | 19 | from pathlib import Path
|
20 | 20 | from typing import Dict, Any
|
| 21 | +from html import unescape as html_unescape |
21 | 22 |
|
22 | 23 | import emoji
|
23 | 24 | import feedparser
|
@@ -659,13 +660,22 @@ async def handle_html_tag(html) -> str:
|
659 | 660 | rss_str = re.sub(
|
660 | 661 | r"(\[url=.+?])?\[img].+?\[/img](\[/url])?", "", rss_str, flags=re.I
|
661 | 662 | )
|
662 |
| - rss_str = re.sub(r"\[align=.+?]|\[/align]", "", rss_str, flags=re.I) |
663 |
| - rss_str = re.sub(r"\[backcolor=.+?]|\[/backcolor]", "", rss_str, flags=re.I) |
664 |
| - rss_str = re.sub(r"\[color=.+?]|\[/color]", "", rss_str, flags=re.I) |
665 |
| - rss_str = re.sub(r"\[font=.+?]|\[/font]", "", rss_str, flags=re.I) |
666 |
| - rss_str = re.sub(r"\[size=.+?]|\[/size]", "", rss_str, flags=re.I) |
667 |
| - rss_str = re.sub(r"\[table=.+?]|\[/table]", "", rss_str, flags=re.I) |
668 |
| - rss_str = re.sub(r"\[/?(b|u|tr|td)]", "", rss_str, flags=re.I) |
| 663 | + bbcode_tags = [ |
| 664 | + "align", |
| 665 | + "backcolor", |
| 666 | + "color", |
| 667 | + "font", |
| 668 | + "size", |
| 669 | + "table", |
| 670 | + "url", |
| 671 | + "b", |
| 672 | + "u", |
| 673 | + "tr", |
| 674 | + "td", |
| 675 | + ] |
| 676 | + for i in bbcode_tags: |
| 677 | + rss_str = re.sub(rf"\[{i}=.+?]", "", rss_str, flags=re.I) |
| 678 | + rss_str = re.sub(rf"\[/?{i}]", "", rss_str, flags=re.I) |
669 | 679 |
|
670 | 680 | # 去掉结尾被截断的信息
|
671 | 681 | rss_str = re.sub(
|
@@ -699,8 +709,8 @@ async def handle_html_tag(html) -> str:
|
699 | 709 |
|
700 | 710 | # <a> 标签处理
|
701 | 711 | for a in new_html("a").items():
|
702 |
| - a_str = re.search(r"<a.+?</a>", str(a))[0] |
703 |
| - if str(a.text()) != a.attr("href"): |
| 712 | + a_str = re.search(r"<a.+?</a>", html_unescape(str(a)))[0] |
| 713 | + if a.text() and str(a.text()) != a.attr("href"): |
704 | 714 | rss_str = rss_str.replace(a_str, f" {a.text()}: {a.attr('href')}\n")
|
705 | 715 | else:
|
706 | 716 | rss_str = rss_str.replace(a_str, f" {a.attr('href')}\n")
|
|
0 commit comments