about summary refs log tree commit diff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-02-19 03:53:23 +0700
committerSergey M <dstftw@gmail.com>2017-02-19 05:10:08 +0800
commit4248dad92bd87650c791194276296b148f668e68 (patch)
treee4c26e41802f4119b67830a61976c39c494b377e
parent0a840f584c3f1fedb6957c05587dec697143f2d5 (diff)
downloadyoutube-dl-4248dad92bd87650c791194276296b148f668e68.tar.gz
youtube-dl-4248dad92bd87650c791194276296b148f668e68.tar.xz
youtube-dl-4248dad92bd87650c791194276296b148f668e68.zip
Improve geo bypass mechanism
* Rename options to preffixly match with --geo-verification-proxy
* Introduce _GEO_COUNTRIES for extractors
* Implement faking IP right away for sites with known geo restriction
-rw-r--r--youtube_dl/extractor/common.py57
-rw-r--r--youtube_dl/extractor/dramafever.py3
-rw-r--r--youtube_dl/extractor/go.py3
-rw-r--r--youtube_dl/extractor/itv.py4
-rw-r--r--youtube_dl/extractor/nrk.py4
-rw-r--r--youtube_dl/extractor/ondemandkorea.py3
-rw-r--r--youtube_dl/extractor/pbs.py5
-rw-r--r--youtube_dl/extractor/srgssr.py6
-rw-r--r--youtube_dl/extractor/svt.py4
-rw-r--r--youtube_dl/extractor/vbox7.py3
-rw-r--r--youtube_dl/extractor/vgtv.py5
-rw-r--r--youtube_dl/extractor/viki.py2
-rw-r--r--youtube_dl/utils.py2
13 files changed, 71 insertions, 30 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 6eb6a25b8..272da74b6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -323,10 +323,15 @@ class InfoExtractor(object):
     _real_extract() methods and define a _VALID_URL regexp.
     Probably, they should also be added to the list of extractors.
 
-    _BYPASS_GEO attribute may be set to False in order to disable
+    _GEO_BYPASS attribute may be set to False in order to disable
     geo restriction bypass mechanisms for a particular extractor.
     Though it won't disable explicit geo restriction bypass based on
-    country code provided with geo_bypass_country.
+    country code provided with geo_bypass_country. (experimental)
+
+    _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+    countries for this extractor. One of these countries will be used by
+    geo restriction bypass mechanism right away in order to bypass
+    geo restriction, of course, if the mechanism is not disabled. (experimental)
 
     Finally, the _WORKING attribute should be set to False for broken IEs
     in order to warn the users and skip the tests.
@@ -335,7 +340,8 @@ class InfoExtractor(object):
     _ready = False
     _downloader = None
     _x_forwarded_for_ip = None
-    _BYPASS_GEO = True
+    _GEO_BYPASS = True
+    _GEO_COUNTRIES = None
     _WORKING = True
 
     def __init__(self, downloader=None):
@@ -370,13 +376,27 @@ class InfoExtractor(object):
 
     def initialize(self):
         """Initializes an instance (authentication, etc)."""
+        self.__initialize_geo_bypass()
+        if not self._ready:
+            self._real_initialize()
+            self._ready = True
+
+    def __initialize_geo_bypass(self):
         if not self._x_forwarded_for_ip:
             country_code = self._downloader.params.get('geo_bypass_country', None)
+            # If there is no explicit country for geo bypass specified and
+            # the extractor is known to be geo restricted let's fake IP
+            # as X-Forwarded-For right away.
+            if (not country_code and
+                    self._GEO_BYPASS and
+                    self._downloader.params.get('geo_bypass', True) and
+                    self._GEO_COUNTRIES):
+                country_code = random.choice(self._GEO_COUNTRIES)
             if country_code:
                 self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
-        if not self._ready:
-            self._real_initialize()
-            self._ready = True
+                if self._downloader.params.get('verbose', False):
+                    self._downloader.to_stdout(
+                        '[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
 
     def extract(self, url):
         """Extracts URL information and returns it in list of dicts."""
@@ -389,16 +409,8 @@ class InfoExtractor(object):
                         ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
                     return ie_result
                 except GeoRestrictedError as e:
-                    if (not self._downloader.params.get('geo_bypass_country', None) and
-                            self._BYPASS_GEO and
-                            self._downloader.params.get('geo_bypass', True) and
-                            not self._x_forwarded_for_ip and
-                            e.countries):
-                        self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
-                        if self._x_forwarded_for_ip:
-                            self.report_warning(
-                                'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
-                            continue
+                    if self.__maybe_fake_ip_and_retry(e.countries):
+                        continue
                     raise
         except ExtractorError:
             raise
@@ -407,6 +419,19 @@ class InfoExtractor(object):
         except (KeyError, StopIteration) as e:
             raise ExtractorError('An extractor error has occurred.', cause=e)
 
+    def __maybe_fake_ip_and_retry(self, countries):
+        if (not self._downloader.params.get('geo_bypass_country', None) and
+                self._GEO_BYPASS and
+                self._downloader.params.get('geo_bypass', True) and
+                not self._x_forwarded_for_ip and
+                countries):
+            self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
+            if self._x_forwarded_for_ip:
+                self.report_warning(
+                    'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
+                return True
+        return False
+
     def set_downloader(self, downloader):
         """Sets the downloader for this IE."""
         self._downloader = downloader
diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py
index 755db806a..e7abc8889 100644
--- a/youtube_dl/extractor/dramafever.py
+++ b/youtube_dl/extractor/dramafever.py
@@ -20,6 +20,7 @@ from ..utils import (
 class DramaFeverBaseIE(AMPIE):
     _LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
     _NETRC_MACHINE = 'dramafever'
+    _GEO_COUNTRIES = ['US', 'CA']
 
     _CONSUMER_SECRET = 'DA59dtVXYLxajktV'
 
@@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE):
             if isinstance(e.cause, compat_HTTPError):
                 self.raise_geo_restricted(
                     msg='Currently unavailable in your country',
-                    countries=['US', 'CA'])
+                    countries=self._GEO_COUNTRIES)
             raise
 
         series_id, episode_number = video_id.split('.')
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
index ec902c670..b205bfc7c 100644
--- a/youtube_dl/extractor/go.py
+++ b/youtube_dl/extractor/go.py
@@ -37,6 +37,7 @@ class GoIE(AdobePassIE):
         }
     }
     _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
+    _GEO_COUNTRIES = ['US']
     _TESTS = [{
         'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
         'info_dict': {
@@ -104,7 +105,7 @@ class GoIE(AdobePassIE):
                         for error in errors:
                             if error.get('code') == 1002:
                                 self.raise_geo_restricted(
-                                    error['message'], countries=['US'])
+                                    error['message'], countries=self._GEO_COUNTRIES)
                         error_message = ', '.join([error['message'] for error in errors])
                         raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
                     asset_url += '?' + entitlement['uplynkData']['sessionKey']
diff --git a/youtube_dl/extractor/itv.py b/youtube_dl/extractor/itv.py
index aabde15f3..021c6b278 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -24,6 +24,7 @@ from ..utils import (
 
 class ITVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
+    _GEO_COUNTRIES = ['GB']
     _TEST = {
         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
         'info_dict': {
@@ -101,7 +102,8 @@ class ITVIE(InfoExtractor):
             fault_code = xpath_text(resp_env, './/faultcode')
             fault_string = xpath_text(resp_env, './/faultstring')
             if fault_code == 'InvalidGeoRegion':
-                self.raise_geo_restricted(msg=fault_string, countries=['GB'])
+                self.raise_geo_restricted(
+                    msg=fault_string, countries=self._GEO_COUNTRIES)
             raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
         title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
         video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index 78ece33e1..13af9ed1f 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -14,6 +14,7 @@ from ..utils import (
 
 
 class NRKBaseIE(InfoExtractor):
+    _GEO_COUNTRIES = ['NO']
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
@@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor):
             # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
             if 'IsGeoBlocked' in message_type:
                 self.raise_geo_restricted(
-                    msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO'])
+                    msg=MESSAGES.get('ProgramIsGeoBlocked'),
+                    countries=self._GEO_COUNTRIES)
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, MESSAGES.get(
                     message_type, message_type)),
diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dl/extractor/ondemandkorea.py
index 0c85d549e..df1ce3c1d 100644
--- a/youtube_dl/extractor/ondemandkorea.py
+++ b/youtube_dl/extractor/ondemandkorea.py
@@ -10,6 +10,7 @@ from ..utils import (
 
 class OnDemandKoreaIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
+    _GEO_COUNTRIES = ['US', 'CA']
     _TEST = {
         'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
         'info_dict': {
@@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor):
         if 'msg_block_01.png' in webpage:
             self.raise_geo_restricted(
                 msg='This content is not available in your region',
-                countries=['US', 'CA'])
+                countries=self._GEO_COUNTRIES)
 
         if 'This video is only available to ODK PLUS members.' in webpage:
             raise ExtractorError(
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dl/extractor/pbs.py
index 64f47bae3..3e51b4dd7 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
         )
     ''' % '|'.join(list(zip(*_STATIONS))[0])
 
+    _GEO_COUNTRIES = ['US']
+
     _TESTS = [
         {
             'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
@@ -492,7 +494,8 @@ class PBSIE(InfoExtractor):
                 message = self._ERRORS.get(
                     redirect_info['http_code'], redirect_info['message'])
                 if redirect_info['http_code'] == 403:
-                    self.raise_geo_restricted(msg=message, countries=['US'])
+                    self.raise_geo_restricted(
+                        msg=message, countries=self._GEO_COUNTRIES)
                 raise ExtractorError(
                     '%s said: %s' % (self.IE_NAME, message), expected=True)
 
diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py
index a35a0a538..bb73eb1d5 100644
--- a/youtube_dl/extractor/srgssr.py
+++ b/youtube_dl/extractor/srgssr.py
@@ -14,7 +14,8 @@ from ..utils import (
 
 class SRGSSRIE(InfoExtractor):
     _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
+    _GEO_COUNTRIES = ['CH']
 
     _ERRORS = {
         'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
@@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor):
         if media_data.get('block') and media_data['block'] in self._ERRORS:
             message = self._ERRORS[media_data['block']]
             if media_data['block'] == 'GEOBLOCK':
-                self.raise_geo_restricted(msg=message, countries=['CH'])
+                self.raise_geo_restricted(
+                    msg=message, countries=self._GEO_COUNTRIES)
             raise ExtractorError(
                 '%s said: %s' % (self.IE_NAME, message), expected=True)
 
diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py
index f2a2200bf..9e2c9fcc6 100644
--- a/youtube_dl/extractor/svt.py
+++ b/youtube_dl/extractor/svt.py
@@ -13,6 +13,7 @@ from ..utils import (
 
 
 class SVTBaseIE(InfoExtractor):
+    _GEO_COUNTRIES = ['SE']
     def _extract_video(self, video_info, video_id):
         formats = []
         for vr in video_info['videoReferences']:
@@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor):
                 })
         if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
             self.raise_geo_restricted(
-                'This video is only available in Sweden', countries=['SE'])
+                'This video is only available in Sweden',
+                countries=self._GEO_COUNTRIES)
         self._sort_formats(formats)
 
         subtitles = {}
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dl/extractor/vbox7.py
index f86d804c1..8152acefd 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dl/extractor/vbox7.py
@@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
                         )
                         (?P<id>[\da-fA-F]+)
                     '''
+    _GEO_COUNTRIES = ['BG']
     _TESTS = [{
         'url': 'http://vbox7.com/play:0946fff23c',
         'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor):
         video_url = video['src']
 
         if '/na.mp4' in video_url:
-            self.raise_geo_restricted(countries=['BG'])
+            self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 
         uploader = video.get('uploader')
 
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dl/extractor/vgtv.py
index 1709fd6bb..0f8c156a7 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dl/extractor/vgtv.py
@@ -14,7 +14,7 @@ from ..utils import (
 
 class VGTVIE(XstreamIE):
     IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
 
     _HOST_TO_APPNAME = {
         'vgtv.no': 'vgtv',
@@ -218,7 +218,8 @@ class VGTVIE(XstreamIE):
             properties = try_get(
                 data, lambda x: x['streamConfiguration']['properties'], list)
             if properties and 'geoblocked' in properties:
-                raise self.raise_geo_restricted(countries=['NO'])
+                raise self.raise_geo_restricted(
+                    countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
 
         self._sort_formats(info['formats'])
 
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 68a74e246..e9c8bf824 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor):
     _APP_VERSION = '2.2.5.1428709186'
     _APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
 
-    _BYPASS_GEO = False
+    _GEO_BYPASS = False
     _NETRC_MACHINE = 'viki'
 
     _token = None
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index cbf7639c5..17b83794a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3291,7 +3291,7 @@ class GeoUtils(object):
         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
         addr_max = addr_min | (0xffffffff >> int(preflen))
         return compat_str(socket.inet_ntoa(
-            compat_struct_pack('!I', random.randint(addr_min, addr_max))))
+            compat_struct_pack('!L', random.randint(addr_min, addr_max))))
 
 
 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):