-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrss_generator.py
309 lines (254 loc) · 10.1 KB
/
rss_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
import xml.etree.ElementTree as ET
from datetime import datetime
from email.utils import format_datetime
import argparse
import time
import os
import markdown
import requests
import yaml
from sh import ffprobe, ErrorReturnCode
from retry import retry
# Flag to indicate if we're in test mode
TEST_MODE = os.environ.get('RSS_GENERATOR_TEST_MODE', 'false').lower() == 'true'
# Mock ffprobe output for testing
MOCK_FFPROBE_OUTPUT = """streams.stream.0.index=0
streams.stream.0.codec_name="aac"
streams.stream.0.codec_long_name="AAC (Advanced Audio Coding)"
streams.stream.0.profile="LC"
streams.stream.0.codec_type="audio"
streams.stream.0.codec_tag_string="mp4a"
streams.stream.0.codec_tag="0x6134706d"
streams.stream.0.sample_fmt="fltp"
streams.stream.0.sample_rate="44100"
streams.stream.0.channels=2
streams.stream.0.channel_layout="stereo"
streams.stream.0.bits_per_sample=0
streams.stream.0.initial_padding=0
streams.stream.0.id="0x1"
streams.stream.0.r_frame_rate="0/0"
streams.stream.0.avg_frame_rate="0/0"
streams.stream.0.time_base="1/44100"
streams.stream.0.start_pts=0
streams.stream.0.start_time="0.000000"
streams.stream.0.duration_ts=156170240
streams.stream.0.duration="3541.275283"
streams.stream.0.bit_rate="107301"
streams.stream.0.max_bit_rate="N/A"
streams.stream.0.bits_per_raw_sample="N/A"
streams.stream.0.nb_frames="152510"
streams.stream.0.nb_read_frames="N/A"
streams.stream.0.nb_read_packets="N/A"
streams.stream.0.extradata_size=2
streams.stream.0.disposition.default=1"""
# Mock HTTP response for testing
class MockResponse:
def __init__(self, url):
self.url = url
self.headers = {
'content-length': '12345678',
'content-type': 'audio/mpeg'
}
def read_podcast_config(yaml_file_path):
with open(yaml_file_path, "r", encoding="utf-8") as file:
return yaml.safe_load(file)
def convert_iso_to_rfc2822(iso_date):
date_obj = datetime.fromisoformat(iso_date)
return format_datetime(date_obj)
@retry(tries=5, delay=2, backoff=2, logger=None)
def _make_http_request(url):
"""Make HTTP request with retry logic"""
if TEST_MODE:
return MockResponse(url)
return requests.head(url, allow_redirects=True)
def _run_ffprobe_with_retry(url, max_retries=5, delay=2):
"""
Run ffprobe with manual retry logic to handle ErrorReturnCode exceptions
"""
if TEST_MODE:
return MOCK_FFPROBE_OUTPUT
retries = 0
while retries < max_retries:
try:
return ffprobe(
"-hide_banner",
"-v",
"quiet",
"-show_streams",
"-print_format",
"flat",
url,
)
except ErrorReturnCode as e:
retries += 1
if retries >= max_retries:
print(f"Failed to run ffprobe after {max_retries} attempts for URL: {url}")
# Return empty string if all retries fail
return ""
print(f"ffprobe failed (attempt {retries}/{max_retries}), retrying in {delay} seconds...")
time.sleep(delay)
delay *= 2 # Exponential backoff
def get_file_info(url):
# Make HTTP request with retry logic
response = _make_http_request(url)
# Get duration of audio/video file
# We're using the response.url here in order to
# follow redirects and get the actual file
# Run ffprobe with retry logic
probe = _run_ffprobe_with_retry(response.url)
# If probe is empty (all retries failed), set duration to None
if not probe:
return {
"content-length": response.headers.get("content-length"),
"content-type": response.headers.get("content-type"),
"duration": None,
}
lines = probe.split("\n")
# Filtering out the line that contains 'streams.stream.0.duration'
duration_line = next(
(line for line in lines if line.startswith("streams.stream.0.duration=")), None
)
if duration_line:
# Extracting the numeric value and converting it to an integer
duration = int(float(duration_line.split("=")[1].strip('"')))
else:
duration = None
return {
"content-length": response.headers.get("content-length"),
"content-type": response.headers.get("content-type"),
"duration": duration,
}
def format_description(description):
"""
Convert Markdown description to HTML
"""
html_description = markdown.markdown(description)
wrapped_description = f"<![CDATA[{html_description}]]>"
# Ensure byte limit for the channel description
byte_limit = 4000
if len(wrapped_description.encode("utf-8")) > byte_limit:
# Truncate the description if it exceeds the limit
# Note: Truncation logic might need to be more sophisticated to handle HTML correctly
wrapped_description = wrapped_description[:byte_limit]
return wrapped_description
def generate_rss(config, output_file_path):
ET.register_namespace("itunes", "http://www.itunes.com/dtds/podcast-1.0.dtd")
ET.register_namespace("atom", "http://www.w3.org/2005/Atom")
# Global itunes:explicit setting
global_explicit = (
"yes" if config["metadata"].get("itunes_explicit", False) else "no"
)
rss = ET.Element(
"rss",
version="2.0",
attrib={
"xmlns:itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd",
"xmlns:atom": "http://www.w3.org/2005/Atom",
},
)
# Metadata
channel = ET.SubElement(rss, "channel")
metadata = config["metadata"]
ET.SubElement(channel, "title").text = metadata["title"]
ET.SubElement(channel, "description").text = format_description(
metadata["description"]
)
ET.SubElement(channel, "language").text = metadata.get("language", "en-us")
ET.SubElement(channel, "link").text = metadata["link"]
ET.SubElement(
channel, "generator"
).text = (
"Podcast RSS Generator (https://github.com/vpetersson/podcast-rss-generator)"
)
ET.SubElement(
channel,
"atom:link",
href=metadata["rss_feed_url"],
rel="self",
type="application/rss+xml",
)
# Adds explicit tag
itunes_explicit = ET.SubElement(channel, "itunes:explicit")
itunes_explicit.text = global_explicit
# Add itunes:owner and itunes:email tags
itunes_owner = ET.SubElement(channel, "itunes:owner")
ET.SubElement(itunes_owner, "itunes:email").text = metadata["itunes_email"]
# Add itunes:author tag
itunes_author = ET.SubElement(channel, "itunes:author")
itunes_author.text = metadata["itunes_author"]
# Duplicate description to itunes summary
itunes_summary = ET.SubElement(channel, "itunes:summary")
itunes_summary.text = metadata["description"]
# Add itunes:category tag
if "itunes_category" in metadata:
ET.SubElement(channel, "itunes:category", text=metadata["itunes_category"])
if "itunes_image" in metadata:
itunes_image = ET.SubElement(channel, "itunes:image")
itunes_image.set("href", metadata["itunes_image"])
# Episodes
for episode in config["episodes"]:
print(f"Processing episode {episode['title']}...")
# Don't pre-publish episodes
if not datetime.fromisoformat(episode["publication_date"]) < datetime.utcnow():
print(
f"Skipping episode {episode['title']} as it's not scheduled to be released until {episode['publication_date']}."
)
continue
file_info = get_file_info(episode["asset_url"])
item = ET.SubElement(channel, "item")
ET.SubElement(item, "pubDate").text = convert_iso_to_rfc2822(
episode["publication_date"]
)
ET.SubElement(item, "title").text = episode["title"]
ET.SubElement(item, "description").text = format_description(
episode["description"]
)
ET.SubElement(item, "guid").text = episode["asset_url"]
ET.SubElement(
item,
"enclosure",
url=episode["asset_url"],
type=file_info["content-type"],
length=str(file_info["content-length"]),
)
# Apply global itunes:explicit setting to each episode
itunes_explicit = ET.SubElement(item, "itunes:explicit")
itunes_explicit.text = global_explicit
# Add itunes:duration tag
itunes_duration = ET.SubElement(item, "itunes:duration")
itunes_duration.text = str(file_info["duration"])
# iTunes-specific tags
if episode.get("episode") is not None:
itunes_episode = ET.SubElement(item, "itunes:episode")
itunes_episode.text = str(episode["episode"])
if episode.get("season") is not None:
itunes_season = ET.SubElement(item, "itunes:season")
itunes_season.text = str(episode["season"])
if episode.get("episode_type") is not None:
itunes_episode_type = ET.SubElement(item, "itunes:episodeType")
itunes_episode_type.text = episode["episode_type"]
# Add link if available, if not, use global
link = ET.SubElement(item, "link")
link.text = episode.get("link", metadata["link"])
# Use episode specific artwork if available
itunes_image_url = episode.get("itunes_image", metadata["itunes_image"])
# Creating the 'itunes:image' element with the determined URL
itunes_image = ET.SubElement(item, "itunes:image")
itunes_image.set("href", itunes_image_url)
tree = ET.ElementTree(rss)
tree.write(output_file_path, encoding="UTF-8", xml_declaration=True)
def main():
parser = argparse.ArgumentParser(description="Process some parameters.")
parser.add_argument(
"--input-file", type=str, default="podcast_config.yaml", help="Input YAML file"
)
parser.add_argument(
"--output-file", type=str, default="podcast_feed.xml", help="Output XML file"
)
# Parse arguments from the command line
args = parser.parse_args()
print(f"Input file: {args.input_file}, Output file: {args.output_file}")
config = read_podcast_config(args.input_file)
generate_rss(config, args.output_file)
if __name__ == "__main__":
main()