From 264b23e1a42378d52f8774a07c1d906cd1cff96c Mon Sep 17 00:00:00 2001 From: kennell Date: Sun, 18 Oct 2015 19:56:22 +0200 Subject: [PATCH 1/4] adds thumbnail support for ZDF Mediathek extractor --- youtube_dl/extractor/zdf.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index 98f15177b..f376025e1 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -70,6 +70,23 @@ def extract_from_xml_url(ie, video_id, xml_url): '_available': is_available, } + def xml_to_thumbnails(fnode): + thumbnails = list() + for node in fnode: + width_x_height = node.attrib['key'] + thumbnail = { + 'url': node.text, + 'width': int(width_x_height.split('x')[0]), + 'height': int(width_x_height.split('x')[1]) + } + thumbnails.append(thumbnail) + return thumbnails + + + thumbnail_nodes = doc.findall('.//teaserimages/teaserimage') + thumbnails = xml_to_thumbnails(thumbnail_nodes) + thumbnail = thumbnails[-1]['url'] + format_nodes = doc.findall('.//formitaeten/formitaet') formats = list(filter( lambda f: f['_available'], @@ -81,6 +98,8 @@ def extract_from_xml_url(ie, video_id, xml_url): 'title': title, 'description': description, 'duration': duration, + 'thumbnail': thumbnail, + 'thumbnails': thumbnails, 'uploader': uploader, 'uploader_id': uploader_id, 'upload_date': upload_date, From 8cc83d301dd0e8029aff804e362860d36e3d7e7a Mon Sep 17 00:00:00 2001 From: kennell Date: Sun, 18 Oct 2015 20:47:42 +0200 Subject: [PATCH 2/4] use int_or_none, check if attrib exists, remove thumbnail --- youtube_dl/extractor/zdf.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index f376025e1..d41c4e712 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -73,19 +73,17 @@ def extract_from_xml_url(ie, video_id, xml_url): def xml_to_thumbnails(fnode): thumbnails = list() for node in fnode: - width_x_height = node.attrib['key'] - thumbnail = { - 'url': node.text, - 'width': int(width_x_height.split('x')[0]), - 'height': int(width_x_height.split('x')[1]) - } + thumbnail = {'url': node.text} + if 'key' in node.attrib: + width_x_height = node.attrib['key'] + thumbnail['width'] = int_or_none(width_x_height.split('x')[0]) + thumbnail['height'] = int_or_none(width_x_height.split('x')[1]) thumbnails.append(thumbnail) return thumbnails thumbnail_nodes = doc.findall('.//teaserimages/teaserimage') thumbnails = xml_to_thumbnails(thumbnail_nodes) - thumbnail = thumbnails[-1]['url'] format_nodes = doc.findall('.//formitaeten/formitaet') formats = list(filter( @@ -98,7 +96,6 @@ def extract_from_xml_url(ie, video_id, xml_url): 'title': title, 'description': description, 'duration': duration, - 'thumbnail': thumbnail, 'thumbnails': thumbnails, 'uploader': uploader, 'uploader_id': uploader_id, From b243340f0ce311443a15a2dfd4356a9504e18c04 Mon Sep 17 00:00:00 2001 From: kennell Date: Sun, 18 Oct 2015 21:07:52 +0200 Subject: [PATCH 3/4] check if key attrib matches resolution pattern --- youtube_dl/extractor/zdf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index d41c4e712..ed385450c 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -75,9 +75,9 @@ def extract_from_xml_url(ie, video_id, xml_url): for node in fnode: thumbnail = {'url': node.text} if 'key' in node.attrib: - width_x_height = node.attrib['key'] - thumbnail['width'] = int_or_none(width_x_height.split('x')[0]) - thumbnail['height'] = int_or_none(width_x_height.split('x')[1]) + if re.match("^[0-9]+x[0-9]+$", node.attrib['key']): + thumbnail['width'] = int_or_none(node.attrib['key'].split('x')[0]) + thumbnail['height'] = int_or_none(node.attrib['key'].split('x')[1]) thumbnails.append(thumbnail) return thumbnails From b7cedb16043c60d4032b206a83539acbd39f994f Mon Sep 17 00:00:00 2001 From: kennell Date: Sun, 18 Oct 2015 21:25:26 +0200 Subject: [PATCH 4/4] simplify thumbnail dict building --- youtube_dl/extractor/zdf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py index ed385450c..c2b196504 100644 --- a/youtube_dl/extractor/zdf.py +++ b/youtube_dl/extractor/zdf.py @@ -75,9 +75,10 @@ def extract_from_xml_url(ie, video_id, xml_url): for node in fnode: thumbnail = {'url': node.text} if 'key' in node.attrib: - if re.match("^[0-9]+x[0-9]+$", node.attrib['key']): - thumbnail['width'] = int_or_none(node.attrib['key'].split('x')[0]) - thumbnail['height'] = int_or_none(node.attrib['key'].split('x')[1]) + m = re.match('^([0-9]+)x([0-9]+)$', node.attrib['key']) + if m: + thumbnail['width'] = int(m.group(1)) + thumbnail['height'] = int(m.group(2)) thumbnails.append(thumbnail) return thumbnails