Commit 29596930 authored by Bernhard Geier's avatar Bernhard Geier
Browse files

new generic script to download from BRs "Live" player web page

parent 1e7e6b1d
# Zündfunk Download / Nachtmix Download
# BR Downloader
Bayerischer Rundfunk airs a pretty decent radio show called "Zündunk", featuring new music, politics and culture.
For people who missed a show, Bayerischer Rundfunk provides recordings on its web page.
Download your favourite radio shows from Bayerischer Runfunk!
But only for less than one week. And only within a player, without a convenient download button.
That's why I wrote this Python 3 script.
This Python 3 script is a simple command line tool to downloads all currently available Zündfunk episodes from Bayerischer Rundfunk's web page as MP3 files.
The script "download-nachtmix.py" works the same way as "download-zuendfunk.py"
This is a Python 3 command line script to download shows from any channel from Bayerischer Rundfunk's "Live" web site.
The shows are saved in MP3 format and get tagged with all available information, including chapter markers.
### Requirements
Python 3 with modules "mutagen", "beautifulsoup4" and "requests".
(On Debian/Ubuntu: `sudo apt install python3 python3-mutagen python3-requests python3-bs4`)
Python 3 with modules "pydub", "mutagen", "beautifulsoup4" and "requests".
(On Debian/Ubuntu: `sudo apt install python3 python3-mutagen python3-requests python3-bs4 pydub`)
### Usage
```./zuendfunk-download.py <TargetDirectory>```
```./br-download.py <Channel> <Show> <TargetDirectory>```
The script searches Bayerischer Rundfunk's web site and downloads all currently available "Zündfunk" episodes into the given target directory.
Episodes aready downloaded get skipped, so it is well suited for cron jobs.
* `Channel` can be something like "Bayern2", "BR-Klassik", "BR24", "Puls", ...
* `Show` is the show's title as displayed in BR's "Live" (player https://www.br.de/radio/live/)
* `TargetDirectory` is the directory you want the MP3 files get saved in
The episode's metadata gets stored in the downloaded MP3 file's ID3 tag.
If a playlist is available for the episode it gets written in ID3 tag's "Comment" field.
Episodes aready downloaded get skipped, so this script is well suited for cron jobs.
**Example:**
```./br-download.py bayern2 "IQ - Wissenschaft und forschung" "/data/aufnahmen```
```./zuendfunk-download.py Downloads/Zündfunk```
This would download all available "IQ - Wissenschaft und Forschung" episodes from Bayern 2 and save them with full ID3 tags in the "/data/aufnahmen" directory.
This would download all available Zündfunk episodes and save them with correct ID3 tags in the "Downloads/Zündfunk" directory.
### Limitations
* As of January 2021 Bayerischer Rundfunk only offers the last 5 hours of its program as recordings, not the last 7 days
* Timestamps are way off. This means shows start earlier or later than expected and chapter markers are wrong. As it's the same on Bayerischer Rundfunk's "Live" web page it's most likely their fault.
### See also
If you want to listen to the downloaded shows with your podcast player: https://github.com/citronalco/mp3-to-rss2feed creates a RSS2 feed from MP3 files.
#!/usr/bin/env python3
import requests
import sys
from math import floor, ceil
import os.path
import re
from datetime import datetime, date, timedelta
from dateutil.parser import parse
import pytz
from mutagen.id3 import ID3,ID3NoHeaderError,TRSN,TPE1,TALB,TRCK,TIT2,COMM,TYER,TDAT,TIME,TLEN,CTOC,CHAP,WOAS,WORS,APIC,CTOCFlags
from urllib.request import urlopen
from bs4 import BeautifulSoup
import json
from io import BytesIO
from pydub import AudioSegment
import argparse
parser = argparse.ArgumentParser(
description = "Find all availabe recordings of a show in Bayerischer Runfunk's player, download them as MP3 files and save the shows' metadata in the ID3 tags.",
)
parser.add_argument("Channel", help="The channel's name (e.g. \"Bayern2\", \"BR-Klassik\", \"Puls\")")
parser.add_argument("ShowTitle", help="The show's title (e.g. \"Zündfunk\")")
parser.add_argument("Directory", help="The directory to save the files in (e.g. \"Downloads/Zündfunk Recordings\")")
args = parser.parse_args()
CHANNEL = args.Channel
SHOW = args.ShowTitle
DESTDIR = args.Directory
if not os.path.isdir(DESTDIR):
print("Directory %s does not exist!" % DESTDIR, file=sys.stderr)
sys.exit(1)
baseUrl="https://www.br.de/radio/live/%s/programm/" % CHANNEL.lower()
# Fetch program information of the current day and fetch M3U8 data
day = date.today()
try:
html = requests.get(baseUrl + '/' + day.strftime("%Y-%m-%d") + '/', timeout=5).text
# extract JSON data embedded into HTML page
soup = BeautifulSoup(html, 'lxml')
jsonData = json.loads(soup.find('script', id='__NEXT_DATA__').encode_contents())
# get M3U8 with paths to media streams
streamsM3U8url = jsonData['props']['pageProps']['stationData']['audioBroadcastService']['sophoraLivestreamDocuments'][0]['streamingUrl']
streamsM3U8 = requests.get(streamsM3U8url).text
# retrieve all media stream paths from M3U8
streams = re.findall(r'^(?!#)(.*)\n', streamsM3U8, re.MULTILINE)
# get M3U8 with TS paths from media stream (streams are sorted by bitrate, last one has the highest)
tsBaseUrl = re.sub(r'([^\/]+?)\/?$','', streamsM3U8url)
tsM3U8 = requests.get(tsBaseUrl + streams[-1]).text
except:
print("Error: Could fetch download program information from %s" % baseUrl + '/' + day.strftime("%Y-%m-%d") + '/', file=sys.stderr)
exit(1)
# retrieve information about TS stream from M3U8
tsData = {
# name of the first TS snippet
'mediaSequence': int(re.search(r'^#EXT-X-MEDIA-SEQUENCE:\s*(\d+)$', tsM3U8, re.MULTILINE).group(1)),
# duration of each TS snippet
'targetDuration': int(re.search(r'^#EXT-X-TARGETDURATION:\s*(\d+)$', tsM3U8, re.MULTILINE).group(1)),
# datetime of oldest TS snippet
'programDateTime': parse(re.search(r'^#EXT-X-PROGRAM-DATE-TIME:\s*(.+)$', tsM3U8, re.MULTILINE).group(1)),
# URLs to all TS snippets
'segments': list(map(lambda x: tsBaseUrl + x, re.findall(r'^(?!#)(.*)\n', tsM3U8, re.MULTILINE)))
}
# search for broadcasts of requested show
foundBroadcasts = []
while True:
# loop broadcasts from new to old
for broadcast in reversed(jsonData['props']['pageProps']['stationDayProgramData']['audioBroadcastService']['epg']):
# stop on any broadcast too dated
if parse(broadcast['broadcastEvent']['start']) < tsData['programDateTime']:
break
# skip broadcasts not having ended yet
if parse(broadcast['broadcastEvent']['end']) > datetime.now(tz=pytz.timezone('Europe/Berlin')):
continue
match = re.search('^\s*' + SHOW + '\s*$', broadcast['broadcastEvent']['trackingInfos']['pageVars']['topline'], flags=re.IGNORECASE)
if match:
foundBroadcasts.append(broadcast['broadcastEvent'])
else:
# no "break" happened above? -> get data of previous day and continue searching!
day = day - timedelta(days = 1)
html = requests.get(baseUrl + '/' + day.strftime("%Y-%m-%d") + '/', timeout=5).text
soup = BeautifulSoup(html, 'lxml')
jsonData = json.loads(soup.find('script', id='__NEXT_DATA__').encode_contents())
continue
# broadcasts are too dated already ("break" happened above), don't go further in the past
break
# download broadcasts, from old to new
for broadcast in reversed(foundBroadcasts):
broadcastStartDT = parse(broadcast['start'])
broadcastEndDT = parse(broadcast['end'])
# build filename from channel, show title and broadcast datetime, while escaping "bad" characters
filename = os.path.join(
DESTDIR,
re.sub(
'[^\w\s\-\.\[\]]', '_',
broadcast['trackingInfos']['pageVars']['broadcast_service'] + ' ' + broadcast['trackingInfos']['pageVars']['topline'] + ' ' + broadcastStartDT.astimezone(pytz.timezone('Europe/Berlin')).strftime("%Y-%m-%d %H:%M")
) + ".mp3"
)
# skip broadcast if file is already exists
if os.path.isfile(filename) and os.path.getsize(filename)>0:
print("%s already exists, skipping." % filename, flush=True)
continue
# calculate TS snippets for this broadcast
ts_first = floor( (broadcastStartDT - tsData['programDateTime']).total_seconds() / tsData['targetDuration'])
ts_last = ceil( (broadcastEndDT - tsData['programDateTime']).total_seconds() / tsData['targetDuration'])
# dowload all ts segments, and convert them to mp3
print("Downloading %s ..." % filename, end=" ", flush=True)
try:
sound = AudioSegment.empty()
for i in range(ts_first, ts_last):
sound += AudioSegment.from_file(BytesIO(urlopen(tsData['segments'][i]).read()))
sound.export(filename, format="mp3")
except:
print("failed.", flush=True)
continue
else:
print("done.", flush=True)
# ID3: remove all tags
try:
tags = ID3(filename)
tags.delete()
except ID3NoHeaderError:
tags = ID3()
# ID3: save as much information as possible in the ID3 tags
tags.add(TRSN(text=[broadcast['trackingInfos']['pageVars']['broadcast_service']]))
tags.add(TPE1(text=[broadcast['trackingInfos']['pageVars']['broadcast_service']]))
tags.add(TALB(text=[ " - ".join(list(dict.fromkeys([ broadcast['trackingInfos']['pageVars']['topline'], broadcast['trackingInfos']['pageVars']['title'] ])))]))
tags.add(TRCK(text=['1/1']))
tags.add(TIT2(text=[broadcastStartDT.astimezone(pytz.timezone('Europe/Berlin')).strftime("%Y-%m-%d %H:%M")]))
tags.add(COMM(lang="deu", desc="desc", text=[ broadcast['publicationOf']['description'] ]))
tags.add(TYER(text=[broadcastStartDT.astimezone(pytz.timezone('Europe/Berlin')).strftime("%Y")]))
tags.add(TDAT(text=[broadcastStartDT.astimezone(pytz.timezone('Europe/Berlin')).strftime("%d%m")]))
tags.add(TIME(text=[broadcastStartDT.astimezone(pytz.timezone('Europe/Berlin')).strftime("%H%M")]))
tags.add(TLEN(text=[int((broadcastEndDT - broadcastStartDT).total_seconds() * 1000)]))
tags.add(WOAS(url=broadcast['publicationOf']['canonicalUrl']))
tags.add(WORS(url=baseUrl))
# ID3: chapters
chapterNr = 0
for chapter in broadcast['items']:
chapterStartDT = parse(chapter['start'])
if 'duration' in chapter and chapter['duration'] is not None:
chapterEndDT = chapterStartDT + timedelta(seconds = chapter['duration'])
else:
chapterEndDT = broadcastEndDT
artists = []
for i in [ 'performer', 'author' ]:
if i in chapter and chapter[i] is not None and len(chapter[i])>0:
artists.append(chapter[i])
titles = []
for i in [ 'title' ]:
if i in chapter and chapter[i] is not None and len(chapter[i])>0:
titles.append(chapter[i])
tags.add(CHAP(
element_id = chapterNr,
start_time = floor((chapterStartDT - broadcastStartDT).total_seconds() * 1000),
end_time = ceil((chapterEndDT - broadcastStartDT).total_seconds() * 1000),
sub_frames = [TIT2(text=[ " - ".join([" ".join(artists), " ".join(titles) ])])]
))
chapterNr += 1
tocList = ",".join([ str(i) for i in range(0,chapterNr) ])
tags.add(CTOC(
element_id = "toc",
flags = CTOCFlags.TOP_LEVEL | CTOCFlags.ORDERED,
child_element_ids = [tocList],
sub_frames = [TIT2(text=["Table Of Contents"])]
))
# ID3: cover image
response = requests.get(broadcast['publicationOf']['defaultTeaserImage']['url'])
if response.status_code == 200:
tags.add(APIC(mime=response.headers['content-type'], desc="Front Cover", data=response.content))
# save ID3 tags
tags.save(filename,v2_version=3)
exit()
#!/usr/bin/env python3
import requests
import sys
import urllib.parse
import urllib.request
import os.path
import re
from datetime import datetime, date
import time
from mutagen.id3 import ID3,ID3NoHeaderError,TRSN,TPE1,TALB,TRCK,TIT2,COMM,TYER,TDAT,TIME,TLEN,WOAS,WORS,TLAN,APIC
import shutil
from tempfile import NamedTemporaryFile
import lxml
from bs4 import BeautifulSoup
#import pprint
baseUrl = "https://www.br.de/radio/bayern2/sendungen/nachtmix/index.html"
playlistsBaseUrl = "https://www.br.de/radio/bayern2/sendungen/nachtmix/playlisten/index.html"
minimalEpisodeDuration_ms = 45 * 60 * 1000
showTitle = "Nachtmix"
def download(url: str, attempts=4):
tmpfile = NamedTemporaryFile(delete=False)
for attempt in range (1,attempts+1):
try:
if attempt > 1:
time.sleep(3)
#urllib.request.urlretrieve(url, tmpfile.name)
stream = urllib.request.urlopen(url)
shutil.copyfileobj(stream, tmpfile)
return tmpfile.name
except:
pass
return None
def time2seconds(timestr: str):
# return duration of HH:MM:SS in seconds
parts = re.split(":", timestr)
return int(parts[0])*3600+int(parts[1])*60+int(parts[2])
def safe_text_get(l: list, idx: int, default=None):
# return text attribute of list item, or default value if it does not exist
try:
return l[idx].text
except IndexError:
return default
def get_playlist_as_text(dt: datetime):
try:
# get website with calender entries with all available playlists
html = requests.get(playlistsBaseUrl, timeout=5).text
soup = BeautifulSoup(html, 'lxml')
# select day
dayLink = soup.find('a', class_=re.compile('^playlisten.+'), href=re.compile('.+_date\-'+dt.strftime("%Y")+'\-'+dt.strftime("%m")+'\-'+dt.strftime("%d")+'_.+\.html$'))['href']
dayUrl = urllib.parse.urljoin(playlistsBaseUrl, dayLink)
# follow link to playlist
html = requests.get(dayUrl, timeout=5).text
soup = BeautifulSoup(html, 'lxml')
plsLink = soup.find('a', class_=re.compile("^playlist(\-"+showTitle.lower()+")?\-\d+$"), href=re.compile('.+playlist(\-'+showTitle.lower()+')?\-\d+.html$'))['href']
plsUrl = urllib.parse.urljoin(playlistsBaseUrl, plsLink)
# read playlist
html = requests.get(plsUrl, timeout=5).text
soup = BeautifulSoup(html, 'lxml')
playlistEntries = []
for entry in soup.select('div.detail_content > p.copytext'):
playlistEntries.append(" - ".join(entry.find_all(text=True)))
return(" | ".join(playlistEntries))
except:
return None
if len(sys.argv) != 2:
print("Usage:", file=sys.stderr)
print("%s <DownloadDir>\n" % sys.argv[0], file=sys.stderr)
print("Example:", file=sys.stderr)
print("%s 'Downloads/%s Recordings'\n" % (sys.argv[0], showTitle), file=sys.stderr)
sys.exit(1)
DESTDIR = sys.argv[1]
if not os.path.isdir(DESTDIR):
print("Directory %s does not exist!" % DESTDIR, file=sys.stderr)
sys.exit(1)
html = requests.get(baseUrl, timeout=5).text
soup = BeautifulSoup(html, 'lxml')
# extract Json URL
jsonUrl = None
for className in soup.find('div', id='program_stage')['class']:
match = re.match('.*jsonUrl:\s*[\'\"](.+?)[\'\"]',className)
if match:
jsonUrl = match.group(1)
# jsonUrl is relative, make it absolute
jsonUrl = urllib.parse.urljoin(baseUrl, jsonUrl)
if jsonUrl == None:
print ("ERROR: Could not find JSON file containing the broadcasts", file=sys.stderr)
sys.exit(1)
# fetch Json
broadcastJson = requests.get(jsonUrl, timeout=5).json()
# a "channelBroadcast" is a episode of a radio show
for bc in broadcastJson['channelBroadcasts']:
if not bc['broadcastStartDate'] or not bc['broadcastEndDate']:
# show's in the future, skip it
continue
# the link to the episode's web page is in the "broadcastHTML" attribute - within HTML
bcSoup = BeautifulSoup(bc['broadcastHtml'], 'lxml')
episodeUrl = bcSoup.find('div', class_='broadcast').find('a', href=True)['href']
episodeUrl = urllib.parse.urljoin(baseUrl, episodeUrl)
episodePage = requests.get(episodeUrl, timeout=5).text
episodePageSoup = BeautifulSoup(episodePage, 'lxml')
# the episode's web page either contains a player, links to websites with a player, or nothing of interest.
# we collect a list of URLs of all those sites
candidates = [ episodeUrl ]
for url in list(link['href'] for link in episodePageSoup.find_all('a',class_=re.compile('link_audio'), href=True)):
candidates.append(urllib.parse.urljoin(baseUrl, url))
# on each of this pages try to find the player link (<a id="avPlayer_...) and extract the dataURL from the "onlick" parameter
# dataURL points to a XML ressource. Fetch them!
xmls = []
for url in candidates:
page = requests.get(url, timeout=5).text
pageSoup = BeautifulSoup(page, 'lxml')
for player in pageSoup.find_all('a', id=re.compile('^avPlayer'), onclick=True):
match = re.match('^.*dataURL:\s*[\'\"](.+?)[\'\"]',player['onclick'])
if match:
dataUrl = match.group(1)
dataUrl = urllib.parse.urljoin(baseUrl, dataUrl)
# dataURL is the URL to a XML file with metadata for the media
#xmls.append(lxml.etree.parse(dataUrl))
# lxml does not support HTTPS
xmls.append(lxml.etree.parse(urllib.request.urlopen(dataUrl)))
# if nothing was found: continue with next episode
if len(xmls) == 0:
continue
# Figure out best matching XML
## sort XMLs according to audio length, longest first
xmls = sorted(xmls, key=lambda x: time2seconds(x.xpath('./audio/duration')[0].text), reverse=True)
# extract metadata from XML with longest audio
XMLmeta = {
'topline': safe_text_get(xmls[0].xpath("./audio/topline"),0),
'title': re.sub("^Jetzt nachhören: ","", safe_text_get(xmls[0].xpath("./audio/title"),0)),
'shareTitle': safe_text_get(xmls[0].xpath("./audio/shareTitle"),0),
'duration': safe_text_get(xmls[0].xpath("./audio/duration"),0),
'channel': safe_text_get(xmls[0].xpath("./audio/channel"),0,"BAYERN 2"),
'broadcast': safe_text_get(xmls[0].xpath("./audio/broadcast"),0),
'broadcastDate': safe_text_get(xmls[0].xpath("./audio/broadcastDate"),0,date.today().strftime("%d.%m.%Y")),
'author': safe_text_get(xmls[0].xpath("./audio/author"),0),
'desc': safe_text_get(xmls[0].xpath("./audio/desc"),0),
'permalink': safe_text_get(xmls[0].xpath("./audio/permalink"),0),
'homepageUrl': safe_text_get(xmls[0].xpath("./audio/homepageUrl"),0,"https://www.br.de/index.html"),
'imageUrl': "https://br.de" + safe_text_get(xmls[0].xpath("./audio/teaserImage/variants/variant[@name='image512']/url"),0),
'agf_c9': safe_text_get(xmls[0].xpath("./audio/agf-tracking/c9"),0),
}
# pprint.PrettyPrinter(indent=4).pprint(XMLmeta)
# continue
# our own metadata
meta = {
'downloadUrl': None,
'broadcastDate_dt': None,
'filename': None,
'filepath': None,
'duration_ms': time2seconds(XMLmeta['duration']) * 1000,
'playlist_text': None,
}
## Filter out some episodes
# Skip this episode if it is shorter than defined minimal duration
if meta['duration_ms'] < minimalEpisodeDuration_ms:
continue
# Skip this episode if "Broadcast" is not matching the show's title
if XMLmeta['broadcast'].lower() != showTitle.lower():
continue
# build filename
filename = XMLmeta['broadcast'] + " " + '-'.join(reversed(XMLmeta['broadcastDate'].split('.'))) + " - " + XMLmeta['title'][0:80] + ".mp3"
# in filename replace bad characters
meta['filename'] = re.sub('[^\w\s\-\.\[\]]','_', filename)
# filename with path
meta['filepath'] = os.path.join(DESTDIR, meta['filename'])
# continue with next episode if file already exists
if os.path.isfile(meta['filepath']) and os.path.getsize(meta['filepath'])>0:
print("%s already exists, skipping." % meta['filename'], flush=True)
continue
## Populate values in "meta" dict
# agf_c9 looks like "Zündfunk_Zündfunk_27.08.2020_19:05" or "Zündfunk_Zündfunk Generator_30.08.2020_22:05" or "Nachtmix_Nachtmix_27.08.2020_23:05"
# so it can be used to extract the episode's exact broadcast time
try:
parts = XMLmeta['agf_c9'].split('_')
meta['broadcastDate_dt'] = datetime.strptime(parts[2] + " " + parts[3], "%d.%m.%Y %H:%M")
except:
meta['broadcastDate_dt'] = datetime.strptime(XMLmeta['broadcastDate'], "%d.%m.%Y")
# from the XML with the longest audio, get all MP3 audio tracks ("assets")
mp3Assets = xmls[0].xpath("./audio/assets/asset/codecAudio[contains(.,'mp3') or contains(.,'MP3')]/..")
# from all MP3 audio tracks select the one with the highest bitrate...
highestBitrateMp3Asset = sorted(mp3Assets, key=lambda x: int(x.xpath('./bitrateAudio')[0].text), reverse=True)[0]
# ...and get its downloadURL
meta['downloadUrl'] = "https:" + highestBitrateMp3Asset.xpath("./downloadUrl")[0].text
# download file in temporary dir
print("Downloading %s..." % meta['filename'], end=" ", flush=True)
tmpFile = download(meta['downloadUrl'])
if tmpFile is None:
print ("failed.", flush=True)
print ("ERROR: Could not download %s" % url, file=sys.stderr)
sys.exit(1)
# get playlist
playlist_text = get_playlist_as_text(meta['broadcastDate_dt'])
if playlist_text:
meta['playlist_text'] = "PLAYLIST: " + playlist_text
# set ID3 tag
try:
tag = ID3(tmpFile)
tag.delete()
except ID3NoHeaderError:
tag = ID3()
tag.add(TRSN(text=[XMLmeta['channel']]))
tag.add(TPE1(text=[XMLmeta['channel']]))
tag.add(TALB(text=[XMLmeta['broadcast']]))
tag.add(TRCK(text=["1/1"]))
#tag.add(TIT2(text=[meta['broadcastDate_dt'].strftime("%Y-%m-%d") + ": "+XMLmeta['title']]))
tag.add(TIT2(text=[XMLmeta['title']]))
tag.add(COMM(lang="deu", desc="desc", text=[ " /// ".join(filter(None, [XMLmeta['desc'], meta['playlist_text']]))]))
tag.add(TYER(text=[meta['broadcastDate_dt'].strftime("%Y")]))
tag.add(TDAT(text=[meta['broadcastDate_dt'].strftime("%d%m")]))
tag.add(TIME(text=[meta['broadcastDate_dt'].strftime("%H%M")]))
tag.add(TLEN(text=[meta['duration_ms']]))
tag.add(WOAS(url=XMLmeta['permalink']))
tag.add(WORS(url=XMLmeta['homepageUrl']))
tag.add(TLAN(text=["deu"]))
# add cover image
if XMLmeta['imageUrl'] is not None:
try:
response = requests.get(XMLmeta['imageUrl'], timeout=5)
if response.status_code == 200:
imageData = response.content
imageMime = response.headers['content-type']
if imageData is not None and imageMime is not None:
tag.add(APIC(mime=imageMime, desc="Front Cover", data=imageData))
except:
pass
# save ID3 tag
tag.save(tmpFile,v2_version=3)
# done
shutil.move(tmpFile, meta['filepath'])
os.chmod(meta['filepath'], 0o644)
print("done.", flush=True)
#!/usr/bin/env python3
import requests
import sys
import urllib.parse
import urllib.request
import os.path
import re
from datetime import datetime, date
import time
from mutagen.id3 import ID3,ID3NoHeaderError,TRSN,TPE1,TALB,TRCK,TIT2,COMM,TYER,TDAT,TIME,TLEN,WOAS,WORS,TLAN,APIC
import shutil
from tempfile import NamedTemporaryFile
import lxml
from bs4 import BeautifulSoup
#import pprint
baseUrl="https://www.br.de/radio/bayern2/sendungen/zuendfunk/programm-nachhoeren/index.html"
playlistsBaseUrl="https://www.br.de/radio/bayern2/sendungen/zuendfunk/pop-platten/playlisten/index.html"
minimalEpisodeDuration_ms = 45 * 60 * 1000
showTitle = "Zündfunk"
def download(url: str, attempts=4):
tmpfile = NamedTemporaryFile(delete=False)
for attempt in range (1,attempts+1):
try:
if attempt > 1:
time.sleep(3)
#urllib.request.urlretrieve(url, tmpfile.name)
stream = urllib.request.urlopen(url)
shutil.copyfileobj(stream, tmpfile)
return tmpfile.name
except:
pass
return None
def time2seconds(timestr: str):
# return duration of HH:MM:SS in seconds
parts = re.split(":", timestr)
return int(parts[0])*3600+int(parts[1])*60+int(parts[2])
def safe_text_get(l: list, idx: int, default=None):
# return text attribute of list item, or default value if it does not exist
try:
return l[idx].text
except IndexError:
return default
def get_playlist_as_text(dt: datetime):