summary refs log tree commit diff
diff options
context:
space:
mode:
authorOdd Stråbø <oddstr13@openshell.no>2017-01-14 02:36:04 +0100
committerSergey M․ <dstftw@gmail.com>2017-01-31 23:10:40 +0700
commit8fd65faece98139def3a6538e98053bebd400263 (patch)
tree7c4f4837aad5b276d0a7d09c368125def5a2054e
parentd7e215b42dcaf71298a7e1dc953cf93523b3da81 (diff)
downloadyoutube-dl-8fd65faece98139def3a6538e98053bebd400263.tar.gz
youtube-dl-8fd65faece98139def3a6538e98053bebd400263.tar.xz
youtube-dl-8fd65faece98139def3a6538e98053bebd400263.zip
[NRKTV] Added NRKTVSeriesIE
[NRKTV] Added season and episode number to metadata.

[NRKTV] Added category to metadata.

[NRKTV] Added tests to NRKTVSeries.

[NRKTV] Fixed whitespace issues (flake8).
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/nrk.py49
2 files changed, 50 insertions, 0 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 2590b5e1b..06e6d4620 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -668,6 +668,7 @@ from .nrk import (
     NRKTVIE,
     NRKTVDirekteIE,
     NRKTVEpisodesIE,
+    NRKTVSeriesIE,
 )
 from .ntvde import NTVDeIE
 from .ntvru import NTVRuIE
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index ea7be005a..26604f84f 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -128,6 +128,18 @@ class NRKBaseIE(InfoExtractor):
         series = conviva.get('seriesName') or data.get('seriesTitle')
         episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
 
+        season_number = None
+        episode_number = None
+        if data.get('mediaElementType') == 'Episode':
+            _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
+                data.get('relativeOriginUrl', '')
+            EPISODENUM_RE = [
+                r'/s(?P<season>\d+)e(?P<episode>\d+)\.',
+                r'/sesong-(?P<season>\d+)/episode-(?P<episode>\d+)',
+            ]
+            season_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='season'))
+            episode_number = int_or_none(self._search_regex(EPISODENUM_RE, _season_episode, "S##E##", fatal=False, group='episode'))
+
         thumbnails = None
         images = data.get('images')
         if images and isinstance(images, dict):
@@ -140,11 +152,15 @@ class NRKBaseIE(InfoExtractor):
                 } for image in web_images if image.get('imageUrl')]
 
         description = data.get('description')
+        category = data.get('mediaAnalytics', {}).get('category')
 
         common_info = {
             'description': description,
             'series': series,
             'episode': episode,
+            'season_number': season_number,
+            'episode_number': episode_number,
+            'categories': [category] if category else None,
             'age_limit': parse_age_limit(data.get('legalAge')),
             'thumbnails': thumbnails,
         }
@@ -360,6 +376,39 @@ class NRKTVEpisodesIE(NRKPlaylistBaseIE):
             r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
 
 
+class NRKTVSeriesIE(InfoExtractor):
+    _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+)/?'
+    _ITEM_RE = r'data-season=["\'](?P<id>\d+)["\']'
+    _TESTS = [{
+        'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
+        'playlist_count': 1,
+    }, {
+        'url': 'https://tv.nrk.no/serie/saving-the-human-race',
+        'playlist_count': 1,
+    }, {
+        'url': 'https://tv.nrk.no/serie/postmann-pat',
+        'playlist_count': 3,
+    }, {
+        'url': 'https://tv.nrk.no/serie/groenn-glede',
+        'playlist_count': 9,
+    }]
+
+    def _real_extract(self, url):
+        series_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, series_id)
+
+        entries = [
+            self.url_result('https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
+                series=series_id,
+                season=season_id
+            ))
+            for season_id in re.findall(self._ITEM_RE, webpage)
+        ]
+
+        return self.playlist_result(entries)
+
+
 class NRKSkoleIE(InfoExtractor):
     IE_DESC = 'NRK Skole'
     _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'