Commit 1089f42c authored by Bernhard Geier's avatar Bernhard Geier

clean up code and variables

parent 6f740f6b
......@@ -84,6 +84,11 @@ def download(url: str, file_path: str, attempts=4):
response = requests.get(searchUrl % urllib.parse.quote_plus(SHOW), timeout=5)
result = response.json()
stationInfo = {
'name': 'FM4',
'website': 'http://fm4.orf.at',
}
# for each search result fetch linked data
for hit in result['hits']:
# only care about "Broadcast" and skip everything else
......@@ -93,117 +98,166 @@ for hit in result['hits']:
# get json of matching broadcast
broadcastJson = requests.get(hit['data']['href'], timeout=5).json()
# extract show name. skip if results not containing the show's name in the title
# dictionary to collect show data
showInfo = {
'name': None,
'start_dt': None,
'end_dt': None,
'description': None,
'website': None,
'image_data': None,
'image_mime': None,
}
# extract show name. skip if result does not contain the show's name in the title
match = re.search('^\s*(.*?'+SHOW+'.*?)\s*$',broadcastJson['title'],flags=re.IGNORECASE)
if not match:
continue
showName = match.group(1)
showInfo['name'] = match.group(1)
# extract start and end datetime
showStart = datetime.fromtimestamp(broadcastJson['start']/1000)
showEnd = datetime.fromtimestamp(broadcastJson['end']/1000)
showInfo['start_dt'] = datetime.fromtimestamp(broadcastJson['start']/1000)
showInfo['end_dt'] = datetime.fromtimestamp(broadcastJson['end']/1000)
# build show description
showDescription = strip_html(broadcastJson['description'])
if showDescription is None:
showDescription = strip_html(broadcastJson['subtitle'])
if showDescription is None:
showDescription = strip_html(broadcastJson['pressRelease'])
if showDescription is None:
showDescription = showStart.strftime("%Y-%m-%d %H:%M")
showInfo['description'] = strip_html(broadcastJson['description']) or \
strip_html(broadcastJson['subtitle']) or \
strip_html(broadcastJson['pressRelease']) or \
showInfo['start_dt'].strftime("%Y-%m-%d %H:%M")
# link to show's website
showInfo['website'] = broadcastJson['url']
# most shows have one part in the stream, some shows (e.g. Morning Show) are split into multiple stream parts
# download them, sorted by start time
streams = sorted(broadcastJson['streams'], key=lambda x: x['start'])
# get show's cover image
for i in range(2,-1,-1):
try:
response = requests.get(broadcastJson['images'][0]['versions'][i]['path'])
if response.status_code == 200:
showInfo['image_data'] = response.content
showInfo['image_mime'] = response.headers['content-type']
break
except:
continue
for streamNr in range(0, len(streams)):
tagTitle = showStart.strftime("%Y-%m-%d %H:%M")
if len(streams)>1:
tagTitle += " [" + str(streamNr+1) + "/" + str(len(streams)) + "]"
filename = re.sub('[^\w\s\-\.\[\]]','_', showName + " " + tagTitle)
match = re.search('^FM4 ',filename)
# most shows consist of a single file, but some shows (e.g. Morning Show) are split into multiple parts
# download them, sorted by start time
streamParts = sorted(broadcastJson['streams'], key=lambda x: x['start'])
for streamPartNr in range(0, len(streamParts)):
partInfo = {
'url': broadcastJson['streams'][streamPartNr]['loopStreamId'],
'start_at_ms': broadcastJson['streams'][streamPartNr]['start'],
'end_at_ms': broadcastJson['streams'][streamPartNr]['end'],
'duration_ms': broadcastJson['streams'][streamPartNr]['end'] - broadcastJson['streams'][streamPartNr]['start'],
'title': None,
'filename': None,
'filepath': None,
'chapters': [],
}
# if show has more than 1 part: append current_part/total_parts to title
partInfo['title'] = showInfo['start_dt'].strftime("%Y-%m-%d %H:%M")
if len(streamParts)>1:
partInfo['title'] += " [" + str(streamPartNr+1) + "/" + str(len(streamParts)) + "]"
# build filename
partInfo['filename'] = re.sub('[^\w\s\-\.\[\]]','_', showInfo['name'] + " " + partInfo['title'])
# prepend station name to filename
match = re.search('^'+stationInfo['name']+' ', partInfo['filename'])
if not match:
filename = "FM4 "+filename
filename+=".mp3"
partInfo['filename'] = stationInfo['name'] + ' ' + partInfo['filename']
partInfo['filename'] += ".mp3"
# filepath
partInfo['filepath'] = os.path.join(DESTDIR, partInfo['filename'])
filepath = os.path.join(DESTDIR, filename)
if os.path.isfile(filepath) and os.path.getsize(filepath)>0:
print("%s already exists, skipping." % filepath, flush=True)
# skip file if it already exists
if os.path.isfile(partInfo['filepath']) and os.path.getsize(partInfo['filepath'])>0:
print("%s already exists, skipping." % partInfo['filepath'], flush=True)
continue
print("%s downloading..." % filepath, end=" ", flush=True)
if not download(shoutcastBaseUrl % broadcastJson['streams'][streamNr]['loopStreamId'], filepath+".part"):
# download file
print("%s downloading..." % partInfo['filepath'], end=" ", flush=True)
if not download(shoutcastBaseUrl % partInfo['url'], partInfo['filepath']+".part"):
print("failed.", flush=True)
continue
# set ID3 tag
try:
tags = ID3(filepath+".part")
tags.delete()
except ID3NoHeaderError:
tags = ID3()
tags.add(TRSN(text=["FM4"]))
tags.add(TPE1(text=["FM4"]))
tags.add(TALB(text=[showName]))
tags.add(TRCK(text=[str(streamNr+1) + "/" + str(len(streams))]))
tags.add(TIT2(text=[tagTitle]))
tags.add(COMM(lang="deu", desc="desc", text=[showDescription]))
tags.add(TYER(text=[showStart.strftime("%Y")]))
tags.add(TDAT(text=[showStart.strftime("%d%m")]))
tags.add(TIME(text=[showStart.strftime("%H%M")]))
tags.add(TLEN(text=[broadcastJson['streams'][streamNr]['end'] - broadcastJson['streams'][streamNr]['start']]))
tags.add(WOAS(url=broadcastJson['url']))
tags.add(WORS(url="http://fm4.orf.at"))
# set chapter information according to show's "items"
# https://mutagen.readthedocs.io/en/latest/user/id3.html
chapters = []
chapterNr = 0
for item in sorted(broadcastJson['items'], key=lambda x: x['start']):
if item['entity'] == "BroadcastItem":
if item['end'] <= broadcastJson['streams'][streamNr]['start']:
# skip items that end too early or start too soon for the current stream part
if item['end'] <= partInfo['start_at_ms']:
continue
if item['start'] >= broadcastJson['streams'][streamNr]['end']:
if item['start'] >= partInfo['end_at_ms']:
break
chapterNr+=1
chapterTitle = []
chapterInfo = {
"id": "ch"+str(chapterNr),
"title": None,
"start_ms": None,
"end_ms": None,
}
# build chapter title
chapterTitles = []
for key in [ "interpreter", "title", "description" ]:
if key in item.keys():
if item[key] is not None:
chapterTitle.append(strip_html(item[key]))
chapterTitles.append(strip_html(item[key]))
chapterInfo['title'] = " / ".join(chapterTitles)
# For multipart shows sometimes chapters start in the previous part
# As in ID3 chapter start times must not be negative set chapter start to 0 in that case
chapterStart = item['start'] - broadcastJson['streams'][streamNr]['start']
if chapterStart < 0:
chapterStart = 0
# for multipart shows sometimes chapters start in the previous part, so the start time is negative
# In ID3 chapter start times must be >=0, so we set chapter start to 0 in that case
chapterInfo['start_ms'] = item['start'] - partInfo['start_at_ms']
if chapterInfo['start_ms'] < 0:
chapterInfo['start_ms'] = 0
chapterEnd = item['end'] - broadcastJson['streams'][streamNr]['start'] # FIXME: chapters (and shows?) seem to be 1s too long
if chapterEnd > broadcastJson['streams'][streamNr]['end'] - broadcastJson['streams'][streamNr]['start']:
chapterEnd = broadcastJson['streams'][streamNr]['end'] - broadcastJson['streams'][streamNr]['start']
chapterInfo['end_ms'] = item['end'] - partInfo['start_at_ms'] # FIXME: chapters (and shows?) seem to be 1s too long
if chapterInfo['end_ms'] > partInfo['duration_ms']:
chapterInfo['end_ms'] = partInfo['duration_ms']
chapters.append({
"id": "ch"+str(chapterNr),
"title": " / ".join(chapterTitle),
"startTime": chapterStart,
"endTime": chapterEnd,
})
for c in chapters:
partInfo['chapters'].append(chapterInfo)
# set ID3 tags
try:
tags = ID3(partInfo['filepath']+".part")
tags.delete()
except ID3NoHeaderError:
tags = ID3()
tags.add(TRSN(text=[stationInfo['name']]))
tags.add(TPE1(text=[stationInfo['name']]))
tags.add(TALB(text=[showInfo['name']]))
tags.add(TRCK(text=[str(streamPartNr+1) + "/" + str(len(streamParts))]))
tags.add(TIT2(text=[partInfo['title']]))
tags.add(COMM(lang="deu", desc="desc", text=[showInfo['description']]))
tags.add(TYER(text=[showInfo['start_dt'].strftime("%Y")]))
tags.add(TDAT(text=[showInfo['start_dt'].strftime("%d%m")]))
tags.add(TIME(text=[showInfo['start_dt'].strftime("%H%M")]))
tags.add(TLEN(text=[partInfo['duration_ms']]))
tags.add(WOAS(url=showInfo['website']))
tags.add(WORS(url=stationInfo['website']))
for chapter in partInfo['chapters']:
tags.add(CHAP(
element_id = c["id"],
start_time = c["startTime"],
end_time = c["endTime"],
sub_frames = [TIT2(text=[c["title"]])]
element_id = chapter["id"],
start_time = chapter["start_ms"],
end_time = chapter["end_ms"],
sub_frames = [TIT2(text=[chapter["title"]])]
))
tocList = ",".join([ c["id"] for c in chapters ])
tocList = ",".join([ chapter["id"] for chapter in partInfo['chapters'] ])
tags.add(CTOC(
element_id = "toc",
flags = CTOCFlags.TOP_LEVEL | CTOCFlags.ORDERED,
......@@ -211,21 +265,14 @@ for hit in result['hits']:
sub_frames = [TIT2(text=["Table Of Contents"])]
))
if showInfo['image_mime'] is not None and showInfo['image_data'] is not None:
tags.add(APIC(mime=showInfo['image_mime'], desc="Front Cover", data=showInfo['image_data']))
# cover image
for i in range(2,-1,-1):
try:
response = requests.get(broadcastJson['images'][0]['versions'][i]['path'])
if response.status_code == 200:
tags.add(APIC(mime=response.headers['content-type'], desc="Front Cover", data=response.content))
break
except:
continue
# save ID3 tags
tags.save(filepath+".part",v2_version=3)
tags.save(partInfo['filepath']+".part",v2_version=3)
# done
os.rename(filepath+".part", filepath)
os.rename(showInfo['filepath']+".part", showInfo['filepath'])
print("done.", flush=True)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment