Commit a231970a authored by Bernhard Geier's avatar Bernhard Geier
Browse files

use python3 for indextool

parent cff9d145
#!/usr/bin/env python #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
...@@ -31,7 +31,7 @@ def get_metadata(basedir): ...@@ -31,7 +31,7 @@ def get_metadata(basedir):
if 'metadata.opf' in files: if 'metadata.opf' in files:
path = '/'.join(root.split('/')[-2:]) path = '/'.join(root.split('/')[-2:])
filename = get_ebook_file(files) filename = get_ebook_file(files)
extension = os.path.splitext(filename)[1].lower()[1:] extension = os.path.splitext(filename)[1].lower()[1:]
cover = '' cover = ''
if 'cover.jpg' in files: if 'cover.jpg' in files:
cover = 'cover.jpg' cover = 'cover.jpg'
...@@ -45,24 +45,24 @@ def parse_metadata(metadata): ...@@ -45,24 +45,24 @@ def parse_metadata(metadata):
root = x.getroot() root = x.getroot()
def get_field(matcher): def get_field(matcher):
matches = [] matches = []
for match in root.findall('./opf:metadata/dc:%s' % matcher, namespaces=namespaces): for match in root.findall('./opf:metadata/dc:%s' % matcher, namespaces=namespaces):
matches.append(match.text) matches.append(match.text)
return matches return matches
def get_meta_field(matcher): def get_meta_field(matcher):
matches = [] matches = []
for match in root.findall("./opf:metadata/opf:meta[@name='%s']" % matcher, namespaces=namespaces): for match in root.findall("./opf:metadata/opf:meta[@name='%s']" % matcher, namespaces=namespaces):
matches.append(match.get("content")) matches.append(match.get("content"))
return matches return matches
def get_identifiers(): def get_identifiers():
matches = [] matches = []
for match in root.findall("./opf:metadata/dc:identifier[@opf:scheme]", namespaces=namespaces): for match in root.findall("./opf:metadata/dc:identifier[@opf:scheme]", namespaces=namespaces):
identifier_type = match.get('{http://www.idpf.org/2007/opf}scheme') identifier_type = match.get('{http://www.idpf.org/2007/opf}scheme')
if identifier_type == 'calibre': if identifier_type == 'calibre':
continue continue
matches.append(identifier_type + ':' + match.text) matches.append(identifier_type + ':' + match.text)
return matches return matches
...@@ -79,19 +79,19 @@ def parse_metadata(metadata): ...@@ -79,19 +79,19 @@ def parse_metadata(metadata):
# description may contain html, we remove that # description may contain html, we remove that
description = get_field('description') description = get_field('description')
if description: if description:
soup = BeautifulSoup(description[0],"lxml") soup = BeautifulSoup(description[0],"lxml")
for i in soup (['script','style']): # remove script and style for i in soup (['script','style']): # remove script and style
i.extract() i.extract()
description = soup.get_text() # get text description = soup.get_text() # get text
lines = (line.strip() for line in description.splitlines()) # break into lines and remove leading and trailing space on each lines = (line.strip() for line in description.splitlines()) # break into lines and remove leading and trailing space on each
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # break multi-headlines into a line each
description = '\n'.join(chunk for chunk in chunks if chunk) # drop blank lines description = '\n'.join(chunk for chunk in chunks if chunk) # drop blank lines
identifier = get_identifiers() identifier = get_identifiers()
language = get_field('language') language = get_field('language')
date = get_field('date') date = get_field('date')
if (date[0]): if (date[0]):
date="%sZ" % parse(date[0]).astimezone(tzutc()).isoformat() date="%sZ" % parse(date[0]).astimezone(tzutc()).isoformat()
publisher = get_field('publisher') publisher = get_field('publisher')
author_sort = get_meta_field('calibre:author_sort') author_sort = get_meta_field('calibre:author_sort')
...@@ -121,10 +121,10 @@ def parse_metadata(metadata): ...@@ -121,10 +121,10 @@ def parse_metadata(metadata):
'date' : date, 'date' : date,
'year': date[:4], 'year': date[:4],
'publisher': publisher, 'publisher': publisher,
'author_sort': author_sort, 'author_sort': author_sort,
'title_sort': title_sort, 'title_sort': title_sort,
} }
...@@ -154,6 +154,7 @@ if __name__ == '__main__': ...@@ -154,6 +154,7 @@ if __name__ == '__main__':
if not ebook_data: if not ebook_data:
print("Unable to find metadata in %s." % metadata_file) print("Unable to find metadata in %s." % metadata_file)
continue continue
ebook_data.update({'path': path, ebook_data.update({'path': path,
'coverfile': cover, 'coverfile': cover,
'filename': filename, 'filename': filename,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment