93 lines
2.1 KiB
Python
93 lines
2.1 KiB
Python
|
|
"""
|
||
|
|
Podcast backup script
|
||
|
|
|
||
|
|
Parses an RSS feed from SOURCE_FILE and download all items to
|
||
|
|
DESTINATION_PATH. Downloads are done in parallel, for
|
||
|
|
PARALLEL_COUNT downloads at the time.
|
||
|
|
|
||
|
|
How to use
|
||
|
|
----------
|
||
|
|
|
||
|
|
1. Set DESTINATION_PATH. Make sure the folder exists on your
|
||
|
|
file system.
|
||
|
|
2. Save the source file (RSS or Atom) on your computer and
|
||
|
|
update SOURCE_FILE if needed.
|
||
|
|
3. Alter PARALLEL_COUNT for your needs. Higher number will
|
||
|
|
decrease total time for this script to be done, but will
|
||
|
|
increase net traffic.
|
||
|
|
4. Run script in a python intepreter, 3.7 is recommended.
|
||
|
|
|
||
|
|
This script was written by Anders Ytterström in October 2019.
|
||
|
|
If you find it useful, buy him a 🍺.
|
||
|
|
"""
|
||
|
|
import queue
|
||
|
|
import threading
|
||
|
|
import xml.etree.ElementTree as ET
|
||
|
|
from urllib.request import urlretrieve
|
||
|
|
|
||
|
|
DESTINATION_PATH = "D:\Asmodean\podcasts\inbox"
|
||
|
|
SOURCE_FILE = "D:\Kod\gists\src.xml"
|
||
|
|
PARALLEL_COUNT = 3
|
||
|
|
|
||
|
|
|
||
|
|
def download_file(url, target):
|
||
|
|
print(f"Downloading {target} <- {url}")
|
||
|
|
urlretrieve(url, f"{DESTINATION_PATH}\{target}.mp3")
|
||
|
|
|
||
|
|
|
||
|
|
def get_urls():
|
||
|
|
tree = ET.parse(SOURCE_FILE)
|
||
|
|
root = tree.getroot()
|
||
|
|
|
||
|
|
def f(item):
|
||
|
|
url = item.find("enclosure").attrib["url"]
|
||
|
|
filename = slugify(item.find("title").text)
|
||
|
|
return (url, filename)
|
||
|
|
|
||
|
|
return map(f, root.findall("./channel/item"))
|
||
|
|
|
||
|
|
|
||
|
|
def slugify(text):
|
||
|
|
return (
|
||
|
|
text.lower()
|
||
|
|
.replace(" ", "-")
|
||
|
|
.replace(":", "")
|
||
|
|
.replace("/", "-av-")
|
||
|
|
.replace("?", "")
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def do_work(item):
|
||
|
|
download_file(*item)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
|
||
|
|
def worker():
|
||
|
|
while True:
|
||
|
|
item = q.get()
|
||
|
|
if item is None:
|
||
|
|
break
|
||
|
|
do_work(item)
|
||
|
|
q.task_done()
|
||
|
|
|
||
|
|
q = queue.Queue()
|
||
|
|
threads = []
|
||
|
|
for i in range(PARALLEL_COUNT):
|
||
|
|
t = threading.Thread(target=worker)
|
||
|
|
t.start()
|
||
|
|
threads.append(t)
|
||
|
|
|
||
|
|
source = get_urls()
|
||
|
|
for item in source:
|
||
|
|
q.put(item)
|
||
|
|
|
||
|
|
# block until all tasks are done
|
||
|
|
q.join()
|
||
|
|
|
||
|
|
# stop workers
|
||
|
|
for i in range(PARALLEL_COUNT):
|
||
|
|
q.put(None)
|
||
|
|
for t in threads:
|
||
|
|
t.join()
|