From dadf8ed045e5761317416dcc7d4805e788945fb7 Mon Sep 17 00:00:00 2001 From: Jesusaves Date: Sun, 5 Mar 2023 17:38:37 -0300 Subject: Import mwclient v0.10.1 --- game/python-extra/mwclient/__init__.py | 42 ++ game/python-extra/mwclient/client.py | 1138 ++++++++++++++++++++++++++++++++ game/python-extra/mwclient/errors.py | 114 ++++ game/python-extra/mwclient/image.py | 78 +++ game/python-extra/mwclient/listing.py | 325 +++++++++ game/python-extra/mwclient/page.py | 541 +++++++++++++++ game/python-extra/mwclient/sleep.py | 88 +++ game/python-extra/mwclient/util.py | 24 + 8 files changed, 2350 insertions(+) create mode 100644 game/python-extra/mwclient/__init__.py create mode 100644 game/python-extra/mwclient/client.py create mode 100644 game/python-extra/mwclient/errors.py create mode 100644 game/python-extra/mwclient/image.py create mode 100644 game/python-extra/mwclient/listing.py create mode 100644 game/python-extra/mwclient/page.py create mode 100644 game/python-extra/mwclient/sleep.py create mode 100644 game/python-extra/mwclient/util.py diff --git a/game/python-extra/mwclient/__init__.py b/game/python-extra/mwclient/__init__.py new file mode 100644 index 0000000..4e46dcd --- /dev/null +++ b/game/python-extra/mwclient/__init__.py @@ -0,0 +1,42 @@ +""" + Copyright (c) 2006-2011 Bryan Tong Minh + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. +""" + +from mwclient.errors import * # noqa: F401, F403 +from mwclient.client import Site, __version__ # noqa: F401 +import logging +import warnings + +# Show DeprecationWarning +warnings.simplefilter('always', DeprecationWarning) + +# Logging: Add a null handler to avoid "No handler found" warnings. +try: + from logging import NullHandler +except ImportError: + class NullHandler(logging.Handler): + def emit(self, record): + pass + +logging.getLogger(__name__).addHandler(NullHandler()) diff --git a/game/python-extra/mwclient/client.py b/game/python-extra/mwclient/client.py new file mode 100644 index 0000000..b3057a8 --- /dev/null +++ b/game/python-extra/mwclient/client.py @@ -0,0 +1,1138 @@ +# encoding=utf-8 +import warnings +import logging +from six import text_type +import six + +from collections import OrderedDict + +import json +import requests +from requests.auth import HTTPBasicAuth, AuthBase +from requests_oauthlib import OAuth1 + +import mwclient.errors as errors +import mwclient.listing as listing +from mwclient.sleep import Sleepers +from mwclient.util import parse_timestamp, read_in_chunks + +try: + import gzip +except ImportError: + gzip = None + +__version__ = '0.10.1' + +log = logging.getLogger(__name__) + +USER_AGENT = 'mwclient/{} ({})'.format(__version__, + 'https://github.com/mwclient/mwclient') + + +class Site(object): + """A MediaWiki site identified by its hostname. + + >>> import mwclient + >>> site = mwclient.Site('en.wikipedia.org') + + Do not include the leading "http://". + + Mwclient assumes that the script path (where index.php and api.php are located) + is '/w/'. If the site uses a different script path, you must specify this + (path must end in a '/'). + + Examples: + + >>> site = mwclient.Site('vim.wikia.com', path='/') + >>> site = mwclient.Site('sourceforge.net', path='/apps/mediawiki/mwclient/') + + """ + api_limit = 500 + + def __init__(self, host, path='/w/', ext='.php', pool=None, retry_timeout=30, + max_retries=25, wait_callback=lambda *x: None, clients_useragent=None, + max_lag=3, compress=True, force_login=True, do_init=True, httpauth=None, + reqs=None, consumer_token=None, consumer_secret=None, access_token=None, + access_secret=None, client_certificate=None, custom_headers=None, + scheme='https'): + # Setup member variables + self.host = host + self.path = path + self.ext = ext + self.credentials = None + self.compress = compress + self.max_lag = text_type(max_lag) + self.force_login = force_login + self.requests = reqs or {} + self.scheme = scheme + if 'timeout' not in self.requests: + self.requests['timeout'] = 30 # seconds + + if consumer_token is not None: + auth = OAuth1(consumer_token, consumer_secret, access_token, access_secret) + elif isinstance(httpauth, (list, tuple)): + auth = HTTPBasicAuth(*httpauth) + elif httpauth is None or isinstance(httpauth, (AuthBase,)): + auth = httpauth + else: + raise RuntimeError('Authentication is not a tuple or an instance of AuthBase') + + self.sleepers = Sleepers(max_retries, retry_timeout, wait_callback) + + # Site properties + self.blocked = False # Whether current user is blocked + self.hasmsg = False # Whether current user has new messages + self.groups = [] # Groups current user belongs to + self.rights = [] # Rights current user has + self.tokens = {} # Edit tokens of the current user + self.version = None + + self.namespaces = self.default_namespaces + self.writeapi = False + + # Setup connection + if pool is None: + self.connection = requests.Session() + self.connection.auth = auth + if client_certificate: + self.connection.cert = client_certificate + + # Set User-Agent header field + if clients_useragent: + ua = clients_useragent + ' ' + USER_AGENT + else: + ua = USER_AGENT + self.connection.headers['User-Agent'] = ua + + if custom_headers: + self.connection.headers.update(custom_headers) + else: + self.connection = pool + + # Page generators + self.pages = listing.PageList(self) + self.categories = listing.PageList(self, namespace=14) + self.images = listing.PageList(self, namespace=6) + + # Compat page generators + self.Pages = self.pages + self.Categories = self.categories + self.Images = self.images + + # Initialization status + self.initialized = False + + # Upload chunk size in bytes + self.chunk_size = 1048576 + + if do_init: + try: + self.site_init() + except errors.APIError as e: + if e.args[0] == 'mwoauth-invalid-authorization': + raise errors.OAuthAuthorizationError(self, e.code, e.info) + + # Private wiki, do init after login + if e.args[0] not in {u'unknown_action', u'readapidenied'}: + raise + + def site_init(self): + + if self.initialized: + info = self.get('query', meta='userinfo', uiprop='groups|rights') + userinfo = info['query']['userinfo'] + self.username = userinfo['name'] + self.groups = userinfo.get('groups', []) + self.rights = userinfo.get('rights', []) + self.tokens = {} + return + + meta = self.get('query', meta='siteinfo|userinfo', + siprop='general|namespaces', uiprop='groups|rights', + retry_on_error=False) + + # Extract site info + self.site = meta['query']['general'] + self.namespaces = { + namespace['id']: namespace.get('*', '') + for namespace in six.itervalues(meta['query']['namespaces']) + } + self.writeapi = 'writeapi' in self.site + + self.version = self.version_tuple_from_generator(self.site['generator']) + + # Require MediaWiki version >= 1.16 + self.require(1, 16) + + # User info + userinfo = meta['query']['userinfo'] + self.username = userinfo['name'] + self.groups = userinfo.get('groups', []) + self.rights = userinfo.get('rights', []) + self.initialized = True + + @staticmethod + def version_tuple_from_generator(string, prefix='MediaWiki '): + """Return a version tuple from a MediaWiki Generator string. + + Example: + "MediaWiki 1.5.1" → (1, 5, 1) + + Args: + prefix (str): The expected prefix of the string + """ + if not string.startswith(prefix): + raise errors.MediaWikiVersionError('Unknown generator {}'.format(string)) + + version = string[len(prefix):].split('.') + + def split_num(s): + """Split the string on the first non-digit character. + + Returns: + A tuple of the digit part as int and, if available, + the rest of the string. + """ + i = 0 + while i < len(s): + if s[i] < '0' or s[i] > '9': + break + i += 1 + if s[i:]: + return (int(s[:i]), s[i:], ) + else: + return (int(s[:i]), ) + + version_tuple = sum((split_num(s) for s in version), ()) + + if len(version_tuple) < 2: + raise errors.MediaWikiVersionError('Unknown MediaWiki {}' + .format('.'.join(version))) + + return version_tuple + + default_namespaces = { + 0: u'', 1: u'Talk', 2: u'User', 3: u'User talk', 4: u'Project', + 5: u'Project talk', 6: u'Image', 7: u'Image talk', 8: u'MediaWiki', + 9: u'MediaWiki talk', 10: u'Template', 11: u'Template talk', 12: u'Help', + 13: u'Help talk', 14: u'Category', 15: u'Category talk', + -1: u'Special', -2: u'Media' + } + + def __repr__(self): + return "" % (self.host, self.path) + + def get(self, action, *args, **kwargs): + """Perform a generic API call using GET. + + This is just a shorthand for calling api() with http_method='GET'. + All arguments will be passed on. + + Returns: + The raw response from the API call, as a dictionary. + """ + return self.api(action, 'GET', *args, **kwargs) + + def post(self, action, *args, **kwargs): + """Perform a generic API call using POST. + + This is just a shorthand for calling api() with http_method='POST'. + All arguments will be passed on. + + Returns: + The raw response from the API call, as a dictionary. + """ + return self.api(action, 'POST', *args, **kwargs) + + def api(self, action, http_method='POST', *args, **kwargs): + """Perform a generic API call and handle errors. + + All arguments will be passed on. + + Example: + To get coordinates from the GeoData MediaWiki extension at English Wikipedia: + + >>> site = Site('en.wikipedia.org') + >>> result = site.api('query', prop='coordinates', titles='Oslo|Copenhagen') + >>> for page in result['query']['pages'].values(): + ... if 'coordinates' in page: + ... print '{} {} {}'.format(page['title'], + ... page['coordinates'][0]['lat'], + ... page['coordinates'][0]['lon']) + Oslo 59.95 10.75 + Copenhagen 55.6761 12.5683 + + Returns: + The raw response from the API call, as a dictionary. + """ + kwargs.update(args) + + if action == 'query' and 'continue' not in kwargs: + kwargs['continue'] = '' + if action == 'query': + if 'meta' in kwargs: + kwargs['meta'] += '|userinfo' + else: + kwargs['meta'] = 'userinfo' + if 'uiprop' in kwargs: + kwargs['uiprop'] += '|blockinfo|hasmsg' + else: + kwargs['uiprop'] = 'blockinfo|hasmsg' + + sleeper = self.sleepers.make() + + while True: + info = self.raw_api(action, http_method, **kwargs) + if not info: + info = {} + if self.handle_api_result(info, sleeper=sleeper): + return info + + def handle_api_result(self, info, kwargs=None, sleeper=None): + if sleeper is None: + sleeper = self.sleepers.make() + + try: + userinfo = info['query']['userinfo'] + except KeyError: + userinfo = () + if 'blockedby' in userinfo: + self.blocked = (userinfo['blockedby'], userinfo.get('blockreason', u'')) + else: + self.blocked = False + self.hasmsg = 'messages' in userinfo + self.logged_in = 'anon' not in userinfo + if 'warnings' in info: + for module, warning in info['warnings'].items(): + if '*' in warning: + log.warning(warning['*']) + + if 'error' in info: + if info['error'].get('code') in {u'internal_api_error_DBConnectionError', + u'internal_api_error_DBQueryError'}: + sleeper.sleep() + return False + + # cope with https://phabricator.wikimedia.org/T106066 + if ( + info['error'].get('code') == u'mwoauth-invalid-authorization' + and 'Nonce already used' in info['error'].get('info') + ): + log.warning('Retrying due to nonce error, see' + 'https://phabricator.wikimedia.org/T106066') + sleeper.sleep() + return False + + if 'query' in info['error']: + # Semantic Mediawiki does not follow the standard error format + raise errors.APIError(None, info['error']['query'], kwargs) + + if '*' in info['error']: + raise errors.APIError(info['error']['code'], + info['error']['info'], info['error']['*']) + raise errors.APIError(info['error']['code'], + info['error']['info'], kwargs) + return True + + @staticmethod + def _query_string(*args, **kwargs): + kwargs.update(args) + qs1 = [ + (k, v) for k, v in six.iteritems(kwargs) if k not in {'wpEditToken', 'token'} + ] + qs2 = [ + (k, v) for k, v in six.iteritems(kwargs) if k in {'wpEditToken', 'token'} + ] + return OrderedDict(qs1 + qs2) + + def raw_call(self, script, data, files=None, retry_on_error=True, http_method='POST'): + """ + Perform a generic request and return the raw text. + + In the event of a network problem, or a HTTP response with status code 5XX, + we'll wait and retry the configured number of times before giving up + if `retry_on_error` is True. + + `requests.exceptions.HTTPError` is still raised directly for + HTTP responses with status codes in the 4XX range, and invalid + HTTP responses. + + Args: + script (str): Script name, usually 'api'. + data (dict): Post data + files (dict): Files to upload + retry_on_error (bool): Retry on connection error + http_method (str): The HTTP method, defaults to 'POST' + + Returns: + The raw text response. + """ + headers = {} + if self.compress and gzip: + headers['Accept-Encoding'] = 'gzip' + sleeper = self.sleepers.make((script, data)) + + scheme = self.scheme + host = self.host + if isinstance(host, (list, tuple)): + warnings.warn( + 'Specifying host as a tuple is deprecated as of mwclient 0.10.1. ' + + 'Please use the new scheme argument instead.', + DeprecationWarning + ) + scheme, host = host + + url = '{scheme}://{host}{path}{script}{ext}'.format(scheme=scheme, host=host, + path=self.path, script=script, + ext=self.ext) + + while True: + try: + args = {'files': files, 'headers': headers} + for k, v in self.requests.items(): + args[k] = v + if http_method == 'GET': + args['params'] = data + else: + args['data'] = data + + stream = self.connection.request(http_method, url, **args) + + if stream.headers.get('x-database-lag'): + wait_time = int(stream.headers.get('retry-after')) + log.warning('Database lag exceeds max lag. ' + 'Waiting for {} seconds'.format(wait_time)) + sleeper.sleep(wait_time) + elif stream.status_code == 200: + return stream.text + elif stream.status_code < 500 or stream.status_code > 599: + stream.raise_for_status() + else: + if not retry_on_error: + stream.raise_for_status() + log.warning('Received {status} response: {text}. ' + 'Retrying in a moment.' + .format(status=stream.status_code, + text=stream.text)) + sleeper.sleep() + + except requests.exceptions.ConnectionError: + # In the event of a network problem + # (e.g. DNS failure, refused connection, etc), + # Requests will raise a ConnectionError exception. + if not retry_on_error: + raise + log.warning('Connection error. Retrying in a moment.') + sleeper.sleep() + + def raw_api(self, action, http_method='POST', *args, **kwargs): + """Send a call to the API.""" + try: + retry_on_error = kwargs.pop('retry_on_error') + except KeyError: + retry_on_error = True + kwargs['action'] = action + kwargs['format'] = 'json' + data = self._query_string(*args, **kwargs) + res = self.raw_call('api', data, retry_on_error=retry_on_error, + http_method=http_method) + + try: + return json.loads(res, object_pairs_hook=OrderedDict) + except ValueError: + if res.startswith('MediaWiki API is not enabled for this site.'): + raise errors.APIDisabledError + raise errors.InvalidResponse(res) + + def raw_index(self, action, http_method='POST', *args, **kwargs): + """Sends a call to index.php rather than the API.""" + kwargs['action'] = action + kwargs['maxlag'] = self.max_lag + data = self._query_string(*args, **kwargs) + return self.raw_call('index', data, http_method=http_method) + + def require(self, major, minor, revision=None, raise_error=True): + if self.version is None: + if raise_error is None: + return + raise RuntimeError('Site %s has not yet been initialized' % repr(self)) + + if revision is None: + if self.version[:2] >= (major, minor): + return True + elif raise_error: + raise errors.MediaWikiVersionError( + 'Requires version {required[0]}.{required[1]}, ' + 'current version is {current[0]}.{current[1]}' + .format(required=(major, minor), + current=(self.version[:2])) + ) + else: + return False + else: + raise NotImplementedError + + # Actions + def email(self, user, text, subject, cc=False): + """ + Send email to a specified user on the wiki. + + >>> try: + ... site.email('SomeUser', 'Some message', 'Some subject') + ... except mwclient.errors.NoSpecifiedEmail: + ... print('User does not accept email, or has no email address.') + + Args: + user (str): User name of the recipient + text (str): Body of the email + subject (str): Subject of the email + cc (bool): True to send a copy of the email to yourself (default is False) + + Returns: + Dictionary of the JSON response + + Raises: + NoSpecifiedEmail (mwclient.errors.NoSpecifiedEmail): User doesn't accept email + EmailError (mwclient.errors.EmailError): Other email errors + """ + + token = self.get_token('email') + + try: + info = self.post('emailuser', target=user, subject=subject, + text=text, ccme=cc, token=token) + except errors.APIError as e: + if e.args[0] == u'noemail': + raise errors.NoSpecifiedEmail(user, e.args[1]) + raise errors.EmailError(*e) + + return info + + def login(self, username=None, password=None, cookies=None, domain=None): + """ + Login to the wiki using a username and password. The method returns + nothing if the login was successful, but raises and error if it was not. + + Args: + username (str): MediaWiki username + password (str): MediaWiki password + cookies (dict): Custom cookies to include with the log-in request. + domain (str): Sends domain name for authentication; used by some + MediaWiki plug-ins like the 'LDAP Authentication' extension. + + Raises: + LoginError (mwclient.errors.LoginError): Login failed, the reason can be + obtained from e.code and e.info (where e is the exception object) and + will be one of the API:Login errors. The most common error code is + "Failed", indicating a wrong username or password. + + MaximumRetriesExceeded: API call to log in failed and was retried until all + retries were exhausted. This will not occur if the credentials are merely + incorrect. See MaximumRetriesExceeded for possible reasons. + + APIError: An API error occurred. Rare, usually indicates an internal server + error. + """ + + if username and password: + self.credentials = (username, password, domain) + if cookies: + self.connection.cookies.update(cookies) + + if self.credentials: + sleeper = self.sleepers.make() + kwargs = { + 'lgname': self.credentials[0], + 'lgpassword': self.credentials[1] + } + if self.credentials[2]: + kwargs['lgdomain'] = self.credentials[2] + + # Try to login using the scheme for MW 1.27+. If the wiki is read protected, + # it is not possible to get the wiki version upfront using the API, so we just + # have to try. If the attempt fails, we try the old method. + try: + kwargs['lgtoken'] = self.get_token('login') + except (errors.APIError, KeyError): + log.debug('Failed to get login token, MediaWiki is older than 1.27.') + + while True: + login = self.post('login', **kwargs) + + if login['login']['result'] == 'Success': + break + elif login['login']['result'] == 'NeedToken': + kwargs['lgtoken'] = login['login']['token'] + elif login['login']['result'] == 'Throttled': + sleeper.sleep(int(login['login'].get('wait', 5))) + else: + raise errors.LoginError(self, login['login']['result'], + login['login']['reason']) + + self.site_init() + + def get_token(self, type, force=False, title=None): + + if self.version is None or self.version[:2] >= (1, 24): + # The 'csrf' (cross-site request forgery) token introduced in 1.24 replaces + # the majority of older tokens, like edittoken and movetoken. + if type not in {'watch', 'patrol', 'rollback', 'userrights', 'login'}: + type = 'csrf' + + if type not in self.tokens: + self.tokens[type] = '0' + + if self.tokens.get(type, '0') == '0' or force: + + if self.version is None or self.version[:2] >= (1, 24): + # We use raw_api() rather than api() because api() is adding "userinfo" + # to the query and this raises an readapideniederror if the wiki is read + # protected and we're trying to fetch a login token. + info = self.raw_api('query', 'GET', meta='tokens', type=type) + + self.handle_api_result(info) + + # Note that for read protected wikis, we don't know the version when + # fetching the login token. If it's < 1.27, the request below will + # raise a KeyError that we should catch. + self.tokens[type] = info['query']['tokens']['%stoken' % type] + + else: + if title is None: + # Some dummy title was needed to get a token prior to 1.24 + title = 'Test' + info = self.post('query', titles=title, + prop='info', intoken=type) + for i in six.itervalues(info['query']['pages']): + if i['title'] == title: + self.tokens[type] = i['%stoken' % type] + + return self.tokens[type] + + def upload(self, file=None, filename=None, description='', ignore=False, + file_size=None, url=None, filekey=None, comment=None): + """Upload a file to the site. + + Note that one of `file`, `filekey` and `url` must be specified, but not + more than one. For normal uploads, you specify `file`. + + Args: + file (str): File object or stream to upload. + filename (str): Destination filename, don't include namespace + prefix like 'File:' + description (str): Wikitext for the file description page. + ignore (bool): True to upload despite any warnings. + file_size (int): Deprecated in mwclient 0.7 + url (str): URL to fetch the file from. + filekey (str): Key that identifies a previous upload that was + stashed temporarily. + comment (str): Upload comment. Also used as the initial page text + for new files if `description` is not specified. + + Example: + + >>> client.upload(open('somefile', 'rb'), filename='somefile.jpg', + description='Some description') + + Returns: + JSON result from the API. + + Raises: + errors.InsufficientPermission + requests.exceptions.HTTPError + """ + + if file_size is not None: + # Note that DeprecationWarning is hidden by default since Python 2.7 + warnings.warn( + 'file_size is deprecated since mwclient 0.7', + DeprecationWarning + ) + + if filename is None: + raise TypeError('filename must be specified') + + if len([x for x in [file, filekey, url] if x is not None]) != 1: + raise TypeError( + "exactly one of 'file', 'filekey' and 'url' must be specified" + ) + + image = self.Images[filename] + if not image.can('upload'): + raise errors.InsufficientPermission(filename) + + if comment is None: + comment = description + text = None + else: + comment = comment + text = description + + if file is not None: + if not hasattr(file, 'read'): + file = open(file, 'rb') + + content_size = file.seek(0, 2) + file.seek(0) + + if self.version[:2] >= (1, 20) and content_size > self.chunk_size: + return self.chunk_upload(file, filename, ignore, comment, text) + + predata = { + 'action': 'upload', + 'format': 'json', + 'filename': filename, + 'comment': comment, + 'text': text, + 'token': image.get_token('edit'), + } + + if ignore: + predata['ignorewarnings'] = 'true' + if url: + predata['url'] = url + + # sessionkey was renamed to filekey in MediaWiki 1.18 + # https://phabricator.wikimedia.org/rMW5f13517e36b45342f228f3de4298bb0fe186995d + if self.version[:2] < (1, 18): + predata['sessionkey'] = filekey + else: + predata['filekey'] = filekey + + postdata = predata + files = None + if file is not None: + + # Workaround for https://github.com/mwclient/mwclient/issues/65 + # ---------------------------------------------------------------- + # Since the filename in Content-Disposition is not interpreted, + # we can send some ascii-only dummy name rather than the real + # filename, which might contain non-ascii. + files = {'file': ('fake-filename', file)} + + sleeper = self.sleepers.make() + while True: + data = self.raw_call('api', postdata, files) + info = json.loads(data) + if not info: + info = {} + if self.handle_api_result(info, kwargs=predata, sleeper=sleeper): + response = info.get('upload', {}) + break + if file is not None: + file.close() + return response + + def chunk_upload(self, file, filename, ignorewarnings, comment, text): + """Upload a file to the site in chunks. + + This method is called by `Site.upload` if you are connecting to a newer + MediaWiki installation, so it's normally not necessary to call this + method directly. + + Args: + file (file-like object): File object or stream to upload. + params (dict): Dict containing upload parameters. + """ + image = self.Images[filename] + + content_size = file.seek(0, 2) + file.seek(0) + + params = { + 'action': 'upload', + 'format': 'json', + 'stash': 1, + 'offset': 0, + 'filename': filename, + 'filesize': content_size, + 'token': image.get_token('edit'), + } + if ignorewarnings: + params['ignorewarnings'] = 'true' + + sleeper = self.sleepers.make() + offset = 0 + for chunk in read_in_chunks(file, self.chunk_size): + while True: + data = self.raw_call('api', params, files={'chunk': chunk}) + info = json.loads(data) + if self.handle_api_result(info, kwargs=params, sleeper=sleeper): + response = info.get('upload', {}) + break + + offset += chunk.tell() + chunk.close() + log.debug('%s: Uploaded %d of %d bytes', filename, offset, content_size) + params['filekey'] = response['filekey'] + if response['result'] == 'Continue': + params['offset'] = response['offset'] + elif response['result'] == 'Success': + file.close() + break + else: + # Some kind or error or warning occured. In any case, we do not + # get the parameters we need to continue, so we should return + # the response now. + file.close() + return response + + del params['action'] + del params['stash'] + del params['offset'] + params['comment'] = comment + params['text'] = text + return self.post('upload', **params) + + def parse(self, text=None, title=None, page=None, prop=None, + redirects=False, mobileformat=False): + kwargs = {} + if text is not None: + kwargs['text'] = text + if title is not None: + kwargs['title'] = title + if page is not None: + kwargs['page'] = page + if prop is not None: + kwargs['prop'] = prop + if redirects: + kwargs['redirects'] = '1' + if mobileformat: + kwargs['mobileformat'] = '1' + result = self.post('parse', **kwargs) + return result['parse'] + + # def block(self): TODO? + # def unblock: TODO? + # def patrol: TODO? + # def import: TODO? + + # Lists + def allpages(self, start=None, prefix=None, namespace='0', filterredir='all', + minsize=None, maxsize=None, prtype=None, prlevel=None, + limit=None, dir='ascending', filterlanglinks='all', generator=True, + end=None): + """Retrieve all pages on the wiki as a generator.""" + + pfx = listing.List.get_prefix('ap', generator) + kwargs = dict(listing.List.generate_kwargs( + pfx, ('from', start), ('to', end), prefix=prefix, + minsize=minsize, maxsize=maxsize, prtype=prtype, prlevel=prlevel, + namespace=namespace, filterredir=filterredir, dir=dir, + filterlanglinks=filterlanglinks, + )) + return listing.List.get_list(generator)(self, 'allpages', 'ap', + limit=limit, return_values='title', + **kwargs) + + def allimages(self, start=None, prefix=None, minsize=None, maxsize=None, limit=None, + dir='ascending', sha1=None, sha1base36=None, generator=True, end=None): + """Retrieve all images on the wiki as a generator.""" + + pfx = listing.List.get_prefix('ai', generator) + kwargs = dict(listing.List.generate_kwargs( + pfx, ('from', start), ('to', end), prefix=prefix, + minsize=minsize, maxsize=maxsize, + dir=dir, sha1=sha1, sha1base36=sha1base36, + )) + return listing.List.get_list(generator)(self, 'allimages', 'ai', limit=limit, + return_values='timestamp|url', + **kwargs) + + def alllinks(self, start=None, prefix=None, unique=False, prop='title', + namespace='0', limit=None, generator=True, end=None): + """Retrieve a list of all links on the wiki as a generator.""" + + pfx = listing.List.get_prefix('al', generator) + kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), ('to', end), + prefix=prefix, + prop=prop, namespace=namespace)) + if unique: + kwargs[pfx + 'unique'] = '1' + return listing.List.get_list(generator)(self, 'alllinks', 'al', limit=limit, + return_values='title', **kwargs) + + def allcategories(self, start=None, prefix=None, dir='ascending', limit=None, + generator=True, end=None): + """Retrieve all categories on the wiki as a generator.""" + + pfx = listing.List.get_prefix('ac', generator) + kwargs = dict(listing.List.generate_kwargs(pfx, ('from', start), ('to', end), + prefix=prefix, dir=dir)) + return listing.List.get_list(generator)(self, 'allcategories', 'ac', limit=limit, + **kwargs) + + def allusers(self, start=None, prefix=None, group=None, prop=None, limit=None, + witheditsonly=False, activeusers=False, rights=None, end=None): + """Retrieve all users on the wiki as a generator.""" + + kwargs = dict(listing.List.generate_kwargs('au', ('from', start), ('to', end), + prefix=prefix, + group=group, prop=prop, + rights=rights, + witheditsonly=witheditsonly, + activeusers=activeusers)) + return listing.List(self, 'allusers', 'au', limit=limit, **kwargs) + + def blocks(self, start=None, end=None, dir='older', ids=None, users=None, limit=None, + prop='id|user|by|timestamp|expiry|reason|flags'): + """Retrieve blocks as a generator. + + Returns: + mwclient.listings.List: Generator yielding dicts, each dict containing: + - user: The username or IP address of the user + - id: The ID of the block + - timestamp: When the block was added + - expiry: When the block runs out (infinity for indefinite blocks) + - reason: The reason they are blocked + - allowusertalk: Key is present (empty string) if the user is allowed to + edit their user talk page + - by: the administrator who blocked the user + - nocreate: key is present (empty string) if the user's ability to create + accounts has been disabled. + + """ + + # TODO: Fix. Fix what? + kwargs = dict(listing.List.generate_kwargs('bk', start=start, end=end, dir=dir, + ids=ids, users=users, prop=prop)) + return listing.List(self, 'blocks', 'bk', limit=limit, **kwargs) + + def deletedrevisions(self, start=None, end=None, dir='older', namespace=None, + limit=None, prop='user|comment'): + # TODO: Fix + + kwargs = dict(listing.List.generate_kwargs('dr', start=start, end=end, dir=dir, + namespace=namespace, prop=prop)) + return listing.List(self, 'deletedrevs', 'dr', limit=limit, **kwargs) + + def exturlusage(self, query, prop=None, protocol='http', namespace=None, limit=None): + r"""Retrieve the list of pages that link to a particular domain or URL, + as a generator. + + This API call mirrors the Special:LinkSearch function on-wiki. + + Query can be a domain like 'bbc.co.uk'. + Wildcards can be used, e.g. '\*.bbc.co.uk'. + Alternatively, a query can contain a full domain name and some or all of a URL: + e.g. '\*.wikipedia.org/wiki/\*' + + See for details. + + Returns: + mwclient.listings.List: Generator yielding dicts, each dict containing: + - url: The URL linked to. + - ns: Namespace of the wiki page + - pageid: The ID of the wiki page + - title: The page title. + + """ + + kwargs = dict(listing.List.generate_kwargs('eu', query=query, prop=prop, + protocol=protocol, + namespace=namespace)) + return listing.List(self, 'exturlusage', 'eu', limit=limit, **kwargs) + + def logevents(self, type=None, prop=None, start=None, end=None, + dir='older', user=None, title=None, limit=None, action=None): + """Retrieve logevents as a generator.""" + kwargs = dict(listing.List.generate_kwargs('le', prop=prop, type=type, + start=start, end=end, dir=dir, + user=user, title=title, action=action)) + return listing.List(self, 'logevents', 'le', limit=limit, **kwargs) + + def checkuserlog(self, user=None, target=None, limit=10, dir='older', + start=None, end=None): + """Retrieve checkuserlog items as a generator.""" + + kwargs = dict(listing.List.generate_kwargs('cul', target=target, start=start, + end=end, dir=dir, user=user)) + return listing.NestedList('entries', self, 'checkuserlog', 'cul', + limit=limit, **kwargs) + + # def protectedtitles requires 1.15 + def random(self, namespace, limit=20): + """Retrieve a generator of random pages from a particular namespace. + + limit specifies the number of random articles retrieved. + namespace is a namespace identifier integer. + + Generator contains dictionary with namespace, page ID and title. + + """ + + kwargs = dict(listing.List.generate_kwargs('rn', namespace=namespace)) + return listing.List(self, 'random', 'rn', limit=limit, **kwargs) + + def recentchanges(self, start=None, end=None, dir='older', namespace=None, + prop=None, show=None, limit=None, type=None, toponly=None): + """List recent changes to the wiki, à la Special:Recentchanges. + """ + kwargs = dict(listing.List.generate_kwargs('rc', start=start, end=end, dir=dir, + namespace=namespace, prop=prop, + show=show, type=type, + toponly='1' if toponly else None)) + return listing.List(self, 'recentchanges', 'rc', limit=limit, **kwargs) + + def revisions(self, revids, prop='ids|timestamp|flags|comment|user'): + """Get data about a list of revisions. + + See also the `Page.revisions()` method. + + API doc: https://www.mediawiki.org/wiki/API:Revisions + + Example: Get revision text for two revisions: + + >>> for revision in site.revisions([689697696, 689816909], prop='content'): + ... print revision['*'] + + Args: + revids (list): A list of (max 50) revisions. + prop (str): Which properties to get for each revision. + + Returns: + A list of revisions + """ + kwargs = { + 'prop': 'revisions', + 'rvprop': prop, + 'revids': '|'.join(map(text_type, revids)) + } + + revisions = [] + pages = self.get('query', **kwargs).get('query', {}).get('pages', {}).values() + for page in pages: + for revision in page.get('revisions', ()): + revision['pageid'] = page.get('pageid') + revision['pagetitle'] = page.get('title') + revision['timestamp'] = parse_timestamp(revision['timestamp']) + revisions.append(revision) + return revisions + + def search(self, search, namespace='0', what=None, redirects=False, limit=None): + """Perform a full text search. + + API doc: https://www.mediawiki.org/wiki/API:Search + + Example: + >>> for result in site.search('prefix:Template:Citation/'): + ... print(result.get('title')) + + Args: + search (str): The query string + namespace (int): The namespace to search (default: 0) + what (str): Search scope: 'text' for fulltext, or 'title' for titles only. + Depending on the search backend, + both options may not be available. + For instance + `CirrusSearch `_ + doesn't support 'title', but instead provides an "intitle:" + query string filter. + redirects (bool): Include redirect pages in the search + (option removed in MediaWiki 1.23). + + Returns: + mwclient.listings.List: Search results iterator + """ + kwargs = dict(listing.List.generate_kwargs('sr', search=search, + namespace=namespace, what=what)) + if redirects: + kwargs['srredirects'] = '1' + return listing.List(self, 'search', 'sr', limit=limit, **kwargs) + + def usercontributions(self, user, start=None, end=None, dir='older', namespace=None, + prop=None, show=None, limit=None, uselang=None): + """ + List the contributions made by a given user to the wiki. + + API doc: https://www.mediawiki.org/wiki/API:Usercontribs + """ + kwargs = dict(listing.List.generate_kwargs('uc', user=user, start=start, end=end, + dir=dir, namespace=namespace, + prop=prop, show=show)) + return listing.List(self, 'usercontribs', 'uc', limit=limit, uselang=uselang, + **kwargs) + + def users(self, users, prop='blockinfo|groups|editcount'): + """ + Get information about a list of users. + + API doc: https://www.mediawiki.org/wiki/API:Users + """ + + return listing.List(self, 'users', 'us', ususers='|'.join(users), usprop=prop) + + def watchlist(self, allrev=False, start=None, end=None, namespace=None, dir='older', + prop=None, show=None, limit=None): + """ + List the pages on the current user's watchlist. + + API doc: https://www.mediawiki.org/wiki/API:Watchlist + """ + + kwargs = dict(listing.List.generate_kwargs('wl', start=start, end=end, + namespace=namespace, dir=dir, + prop=prop, show=show)) + if allrev: + kwargs['wlallrev'] = '1' + return listing.List(self, 'watchlist', 'wl', limit=limit, **kwargs) + + def expandtemplates(self, text, title=None, generatexml=False): + """ + Takes wikitext (text) and expands templates. + + API doc: https://www.mediawiki.org/wiki/API:Expandtemplates + """ + + kwargs = {} + if title is None: + kwargs['title'] = title + if generatexml: + kwargs['generatexml'] = '1' + + result = self.get('expandtemplates', text=text, **kwargs) + + if generatexml: + return result['expandtemplates']['*'], result['parsetree']['*'] + else: + return result['expandtemplates']['*'] + + def ask(self, query, title=None): + """ + Ask a query against Semantic MediaWiki. + + API doc: https://semantic-mediawiki.org/wiki/Ask_API + + Returns: + Generator for retrieving all search results, with each answer as a dictionary. + If the query is invalid, an APIError is raised. A valid query with zero + results will not raise any error. + + Examples: + + >>> query = "[[Category:my cat]]|[[Has name::a name]]|?Has property" + >>> for answer in site.ask(query): + >>> for title, data in answer.items() + >>> print(title) + >>> print(data) + """ + kwargs = {} + if title is None: + kwargs['title'] = title + + offset = 0 + while offset is not None: + results = self.raw_api('ask', query=u'{query}|offset={offset}'.format( + query=query, offset=offset), http_method='GET', **kwargs) + self.handle_api_result(results) # raises APIError on error + offset = results.get('query-continue-offset') + answers = results['query'].get('results', []) + + if isinstance(answers, dict): + # In older versions of Semantic MediaWiki (at least until 2.3.0) + # a list was returned. In newer versions an object is returned + # with the page title as key. + answers = [answer for answer in answers.values()] + + for answer in answers: + yield answer diff --git a/game/python-extra/mwclient/errors.py b/game/python-extra/mwclient/errors.py new file mode 100644 index 0000000..845cf31 --- /dev/null +++ b/game/python-extra/mwclient/errors.py @@ -0,0 +1,114 @@ +class MwClientError(RuntimeError): + pass + + +class MediaWikiVersionError(MwClientError): + pass + + +class APIDisabledError(MwClientError): + pass + + +class MaximumRetriesExceeded(MwClientError): + pass + + +class APIError(MwClientError): + + def __init__(self, code, info, kwargs): + self.code = code + self.info = info + super(APIError, self).__init__(code, info, kwargs) + + +class InsufficientPermission(MwClientError): + pass + + +class UserBlocked(InsufficientPermission): + pass + + +class EditError(MwClientError): + pass + + +class ProtectedPageError(EditError, InsufficientPermission): + + def __init__(self, page, code=None, info=None): + self.page = page + self.code = code + self.info = info + + def __str__(self): + if self.info is not None: + return self.info + return 'You do not have the "edit" right.' + + +class FileExists(EditError): + pass + + +class LoginError(MwClientError): + + def __init__(self, site, code, info): + super(LoginError, self).__init__( + site, + {'result': code, 'reason': info} # For backwards-compability + ) + self.site = site + self.code = code + self.info = info + + def __str__(self): + return self.info + + +class OAuthAuthorizationError(LoginError): + pass + + +class AssertUserFailedError(MwClientError): + + def __init__(self): + super(AssertUserFailedError, self).__init__(( + 'By default, mwclient protects you from accidentally editing ' + 'without being logged in. If you actually want to edit without ' + 'logging in, you can set force_login on the Site object to False.' + )) + + def __str__(self): + return self.args[0] + + +class EmailError(MwClientError): + pass + + +class NoSpecifiedEmail(EmailError): + pass + + +class NoWriteApi(MwClientError): + pass + + +class InvalidResponse(MwClientError): + + def __init__(self, response_text=None): + super(InvalidResponse, self).__init__(( + 'Did not get a valid JSON response from the server. Check that ' + 'you used the correct hostname. If you did, the server might ' + 'be wrongly configured or experiencing temporary problems.'), + response_text + ) + self.response_text = response_text + + def __str__(self): + return self.args[0] + + +class InvalidPageTitle(MwClientError): + pass diff --git a/game/python-extra/mwclient/image.py b/game/python-extra/mwclient/image.py new file mode 100644 index 0000000..922eeb0 --- /dev/null +++ b/game/python-extra/mwclient/image.py @@ -0,0 +1,78 @@ +import mwclient.listing +import mwclient.page + + +class Image(mwclient.page.Page): + + def __init__(self, site, name, info=None): + super(Image, self).__init__( + site, name, info, extra_properties={ + 'imageinfo': ( + ('iiprop', + 'timestamp|user|comment|url|size|sha1|metadata|archivename'), + ) + } + ) + self.imagerepository = self._info.get('imagerepository', '') + self.imageinfo = self._info.get('imageinfo', ({}, ))[0] + + def imagehistory(self): + """ + Get file revision info for the given file. + + API doc: https://www.mediawiki.org/wiki/API:Imageinfo + """ + return mwclient.listing.PageProperty( + self, 'imageinfo', 'ii', + iiprop='timestamp|user|comment|url|size|sha1|metadata|archivename' + ) + + def imageusage(self, namespace=None, filterredir='all', redirect=False, + limit=None, generator=True): + """ + List pages that use the given file. + + API doc: https://www.mediawiki.org/wiki/API:Imageusage + """ + prefix = mwclient.listing.List.get_prefix('iu', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs( + prefix, title=self.name, namespace=namespace, filterredir=filterredir + )) + if redirect: + kwargs['%sredirect' % prefix] = '1' + return mwclient.listing.List.get_list(generator)( + self.site, 'imageusage', 'iu', limit=limit, return_values='title', **kwargs + ) + + def duplicatefiles(self, limit=None): + """ + List duplicates of the current file. + + API doc: https://www.mediawiki.org/wiki/API:Duplicatefiles + """ + return mwclient.listing.PageProperty(self, 'duplicatefiles', 'df', dflimit=limit) + + def download(self, destination=None): + """ + Download the file. If `destination` is given, the file will be written + directly to the stream. Otherwise the file content will be stored in memory + and returned (with the risk of running out of memory for large files). + + Recommended usage: + + >>> with open(filename, 'wb') as fd: + ... image.download(fd) + + Args: + destination (file object): Destination file + """ + url = self.imageinfo['url'] + if destination is not None: + res = self.site.connection.get(url, stream=True) + for chunk in res.iter_content(1024): + destination.write(chunk) + else: + return self.site.connection.get(url).content + + def __repr__(self): + return "" % (self.name.encode('utf-8'), self.site) diff --git a/game/python-extra/mwclient/listing.py b/game/python-extra/mwclient/listing.py new file mode 100644 index 0000000..79964b7 --- /dev/null +++ b/game/python-extra/mwclient/listing.py @@ -0,0 +1,325 @@ +import six +import six.moves +from six import text_type +from mwclient.util import parse_timestamp +import mwclient.page +import mwclient.image + + +class List(object): + """Base class for lazy iteration over api response content + + This is a class providing lazy iteration. This means that the + content is loaded in chunks as long as the response hints at + continuing content. + """ + + def __init__(self, site, list_name, prefix, + limit=None, return_values=None, max_items=None, + *args, **kwargs): + # NOTE: Fix limit + self.site = site + self.list_name = list_name + self.generator = 'list' + self.prefix = prefix + + kwargs.update(args) + self.args = kwargs + + if limit is None: + limit = site.api_limit + self.args[self.prefix + 'limit'] = text_type(limit) + + self.count = 0 + self.max_items = max_items + + self._iter = iter(six.moves.range(0)) + + self.last = False + self.result_member = list_name + self.return_values = return_values + + def __iter__(self): + return self + + def __next__(self): + if self.max_items is not None: + if self.count >= self.max_items: + raise StopIteration + + # For filered lists, we might have to do several requests + # to get the next element due to miser mode. + # See: https://github.com/mwclient/mwclient/issues/194 + while True: + try: + item = six.next(self._iter) + if item is not None: + break + except StopIteration: + if self.last: + raise + self.load_chunk() + + self.count += 1 + if 'timestamp' in item: + item['timestamp'] = parse_timestamp(item['timestamp']) + + if isinstance(self, GeneratorList): + return item + if type(self.return_values) is tuple: + return tuple((item[i] for i in self.return_values)) + if self.return_values is not None: + return item[self.return_values] + return item + + def next(self, *args, **kwargs): + """ For Python 2.x support """ + return self.__next__(*args, **kwargs) + + def load_chunk(self): + """Query a new chunk of data + + If the query is empty, `raise StopIteration`. + + Else, update the iterator accordingly. + + If 'continue' is in the response, it is added to `self.args` + (new style continuation, added in MediaWiki 1.21). + + If not, but 'query-continue' is in the response, query its + item called `self.list_name` and add this to `self.args` (old + style continuation). + + Else, set `self.last` to True. + """ + data = self.site.get( + 'query', (self.generator, self.list_name), + *[(text_type(k), v) for k, v in six.iteritems(self.args)] + ) + if not data: + # Non existent page + raise StopIteration + + # Process response if not empty. + # See: https://github.com/mwclient/mwclient/issues/194 + if 'query' in data: + self.set_iter(data) + + if data.get('continue'): + # New style continuation, added in MediaWiki 1.21 + self.args.update(data['continue']) + + elif self.list_name in data.get('query-continue', ()): + # Old style continuation + self.args.update(data['query-continue'][self.list_name]) + + else: + self.last = True + + def set_iter(self, data): + """Set `self._iter` to the API response `data`.""" + if self.result_member not in data['query']: + self._iter = iter(six.moves.range(0)) + elif type(data['query'][self.result_member]) is list: + self._iter = iter(data['query'][self.result_member]) + else: + self._iter = six.itervalues(data['query'][self.result_member]) + + def __repr__(self): + return "" % (self.list_name, self.site) + + @staticmethod + def generate_kwargs(_prefix, *args, **kwargs): + kwargs.update(args) + for key, value in six.iteritems(kwargs): + if value is not None and value is not False: + yield _prefix + key, value + + @staticmethod + def get_prefix(prefix, generator=False): + return ('g' if generator else '') + prefix + + @staticmethod + def get_list(generator=False): + return GeneratorList if generator else List + + +class NestedList(List): + def __init__(self, nested_param, *args, **kwargs): + super(NestedList, self).__init__(*args, **kwargs) + self.nested_param = nested_param + + def set_iter(self, data): + self._iter = iter(data['query'][self.result_member][self.nested_param]) + + +class GeneratorList(List): + """Lazy-loaded list of Page, Image or Category objects + + While the standard List class yields raw response data + (optionally filtered based on the value of List.return_values), + this subclass turns the data into Page, Image or Category objects. + """ + + def __init__(self, site, list_name, prefix, *args, **kwargs): + super(GeneratorList, self).__init__(site, list_name, prefix, + *args, **kwargs) + + self.args['g' + self.prefix + 'limit'] = self.args[self.prefix + 'limit'] + del self.args[self.prefix + 'limit'] + self.generator = 'generator' + + self.args['prop'] = 'info|imageinfo' + self.args['inprop'] = 'protection' + + self.result_member = 'pages' + + self.page_class = mwclient.page.Page + + def __next__(self): + info = super(GeneratorList, self).__next__() + if info['ns'] == 14: + return Category(self.site, u'', info) + if info['ns'] == 6: + return mwclient.image.Image(self.site, u'', info) + return mwclient.page.Page(self.site, u'', info) + + def load_chunk(self): + # Put this here so that the constructor does not fail + # on uninitialized sites + self.args['iiprop'] = 'timestamp|user|comment|url|size|sha1|metadata|archivename' + return super(GeneratorList, self).load_chunk() + + +class Category(mwclient.page.Page, GeneratorList): + + def __init__(self, site, name, info=None, namespace=None): + mwclient.page.Page.__init__(self, site, name, info) + kwargs = {} + kwargs['gcmtitle'] = self.name + if namespace: + kwargs['gcmnamespace'] = namespace + GeneratorList.__init__(self, site, 'categorymembers', 'cm', **kwargs) + + def __repr__(self): + return "" % (self.name.encode('utf-8'), self.site) + + def members(self, prop='ids|title', namespace=None, sort='sortkey', + dir='asc', start=None, end=None, generator=True): + prefix = self.get_prefix('cm', generator) + kwargs = dict(self.generate_kwargs(prefix, prop=prop, namespace=namespace, + sort=sort, dir=dir, start=start, end=end, + title=self.name)) + return self.get_list(generator)(self.site, 'categorymembers', 'cm', **kwargs) + + +class PageList(GeneratorList): + + def __init__(self, site, prefix=None, start=None, namespace=0, redirects='all', + end=None): + self.namespace = namespace + + kwargs = {} + if prefix: + kwargs['gapprefix'] = prefix + if start: + kwargs['gapfrom'] = start + if end: + kwargs['gapto'] = end + + super(PageList, self).__init__(site, 'allpages', 'ap', + gapnamespace=text_type(namespace), + gapfilterredir=redirects, + **kwargs) + + def __getitem__(self, name): + return self.get(name, None) + + def get(self, name, info=()): + """Return the page of name `name` as an object. + + If self.namespace is not zero, use {namespace}:{name} as the + page name, otherwise guess the namespace from the name using + `self.guess_namespace`. + + Returns: + One of Category, Image or Page (default), according to namespace. + """ + if self.namespace != 0: + full_page_name = u"{namespace}:{name}".format( + namespace=self.site.namespaces[self.namespace], + name=name, + ) + namespace = self.namespace + else: + full_page_name = name + try: + namespace = self.guess_namespace(name) + except AttributeError: + # raised when `namespace` doesn't have a `startswith` attribute + namespace = 0 + + cls = { + 14: Category, + 6: mwclient.image.Image, + }.get(namespace, mwclient.page.Page) + + return cls(self.site, full_page_name, info) + + def guess_namespace(self, name): + """Guess the namespace from name + + If name starts with any of the site's namespaces' names or + default_namespaces, use that. Else, return zero. + + Args: + name (str): The pagename as a string (having `.startswith`) + + Returns: + The id of the guessed namespace or zero. + """ + for ns in self.site.namespaces: + if ns == 0: + continue + namespace = u'%s:' % self.site.namespaces[ns].replace(' ', '_') + if name.startswith(namespace): + return ns + elif ns in self.site.default_namespaces: + namespace = u'%s:' % self.site.default_namespaces[ns].replace(' ', '_') + if name.startswith(namespace): + return ns + return 0 + + +class PageProperty(List): + + def __init__(self, page, prop, prefix, *args, **kwargs): + super(PageProperty, self).__init__(page.site, prop, prefix, + titles=page.name, + *args, **kwargs) + self.page = page + self.generator = 'prop' + + def set_iter(self, data): + for page in six.itervalues(data['query']['pages']): + if page['title'] == self.page.name: + self._iter = iter(page.get(self.list_name, ())) + return + raise StopIteration + + +class PagePropertyGenerator(GeneratorList): + + def __init__(self, page, prop, prefix, *args, **kwargs): + super(PagePropertyGenerator, self).__init__(page.site, prop, prefix, + titles=page.name, + *args, **kwargs) + self.page = page + + +class RevisionsIterator(PageProperty): + + def load_chunk(self): + if 'rvstartid' in self.args and 'rvstart' in self.args: + del self.args['rvstart'] + return super(RevisionsIterator, self).load_chunk() diff --git a/game/python-extra/mwclient/page.py b/game/python-extra/mwclient/page.py new file mode 100644 index 0000000..d83896c --- /dev/null +++ b/game/python-extra/mwclient/page.py @@ -0,0 +1,541 @@ +import six +from six import text_type +import time +from mwclient.util import parse_timestamp +import mwclient.listing +import mwclient.errors + + +class Page(object): + + def __init__(self, site, name, info=None, extra_properties=None): + if type(name) is type(self): + self.__dict__.update(name.__dict__) + return + self.site = site + self.name = name + self._textcache = {} + + if not info: + if extra_properties: + prop = 'info|' + '|'.join(six.iterkeys(extra_properties)) + extra_props = [] + for extra_prop in six.itervalues(extra_properties): + extra_props.extend(extra_prop) + else: + prop = 'info' + extra_props = () + + if type(name) is int: + info = self.site.get('query', prop=prop, pageids=name, + inprop='protection', *extra_props) + else: + info = self.site.get('query', prop=prop, titles=name, + inprop='protection', *extra_props) + info = six.next(six.itervalues(info['query']['pages'])) + self._info = info + + if 'invalid' in info: + raise mwclient.errors.InvalidPageTitle(info.get('invalidreason')) + + self.namespace = info.get('ns', 0) + self.name = info.get('title', u'') + if self.namespace: + self.page_title = self.strip_namespace(self.name) + else: + self.page_title = self.name + + self.base_title = self.page_title.split('/')[0] + self.base_name = self.name.split('/')[0] + + self.touched = parse_timestamp(info.get('touched')) + self.revision = info.get('lastrevid', 0) + self.exists = 'missing' not in info + self.length = info.get('length') + self.protection = { + i['type']: (i['level'], i['expiry']) + for i in info.get('protection', ()) + if i + } + self.redirect = 'redirect' in info + self.pageid = info.get('pageid', None) + self.contentmodel = info.get('contentmodel', None) + self.pagelanguage = info.get('pagelanguage', None) + self.restrictiontypes = info.get('restrictiontypes', None) + + self.last_rev_time = None + self.edit_time = None + + def redirects_to(self): + """ Get the redirect target page, or None if the page is not a redirect.""" + info = self.site.get('query', prop='pageprops', titles=self.name, redirects='') + if 'redirects' in info['query']: + for page in info['query']['redirects']: + if page['from'] == self.name: + return Page(self.site, page['to']) + return None + else: + return None + + def resolve_redirect(self): + """ Get the redirect target page, or the current page if its not a redirect.""" + target_page = self.redirects_to() + if target_page is None: + return self + else: + return target_page + + def __repr__(self): + return "" % (self.name.encode('utf-8'), self.site) + + def __unicode__(self): + return self.name + + @staticmethod + def strip_namespace(title): + if title[0] == ':': + title = title[1:] + return title[title.find(':') + 1:] + + @staticmethod + def normalize_title(title): + # TODO: Make site dependent + title = title.strip() + if title[0] == ':': + title = title[1:] + title = title[0].upper() + title[1:] + title = title.replace(' ', '_') + return title + + def can(self, action): + """Check if the current user has the right to carry out some action + with the current page. + + Example: + >>> page.can('edit') + True + + """ + level = self.protection.get(action, (action,))[0] + if level == 'sysop': + level = 'editprotected' + + return level in self.site.rights + + def get_token(self, type, force=False): + return self.site.get_token(type, force, title=self.name) + + def text(self, section=None, expandtemplates=False, cache=True, slot='main'): + """Get the current wikitext of the page, or of a specific section. + + If the page does not exist, an empty string is returned. By + default, results will be cached and if you call text() again + with the same section and expandtemplates the result will come + from the cache. The cache is stored on the instance, so it + lives as long as the instance does. + + Args: + section (int): Section number, to only get text from a single section. + expandtemplates (bool): Expand templates (default: `False`) + cache (bool): Use in-memory caching (default: `True`) + """ + + if not self.can('read'): + raise mwclient.errors.InsufficientPermission(self) + if not self.exists: + return u'' + if section is not None: + section = text_type(section) + + key = hash((section, expandtemplates)) + if cache and key in self._textcache: + return self._textcache[key] + + revs = self.revisions(prop='content|timestamp', limit=1, section=section, + slots=slot) + try: + rev = next(revs) + if 'slots' in rev: + text = rev['slots'][slot]['*'] + else: + text = rev['*'] + self.last_rev_time = rev['timestamp'] + except StopIteration: + text = u'' + self.last_rev_time = None + if not expandtemplates: + self.edit_time = time.gmtime() + else: + # The 'rvexpandtemplates' option was removed in MediaWiki 1.32, so we have to + # make an extra API call, see https://github.com/mwclient/mwclient/issues/214 + text = self.site.expandtemplates(text) + + if cache: + self._textcache[key] = text + return text + + def save(self, *args, **kwargs): + """Alias for edit, for maintaining backwards compatibility.""" + return self.edit(*args, **kwargs) + + def edit(self, text, summary=u'', minor=False, bot=True, section=None, **kwargs): + """Update the text of a section or the whole page by performing an edit operation. + """ + return self._edit(summary, minor, bot, section, text=text, **kwargs) + + def append(self, text, summary=u'', minor=False, bot=True, section=None, + **kwargs): + """Append text to a section or the whole page by performing an edit operation. + """ + return self._edit(summary, minor, bot, section, appendtext=text, **kwargs) + + def prepend(self, text, summary=u'', minor=False, bot=True, section=None, + **kwargs): + """Prepend text to a section or the whole page by performing an edit operation. + """ + return self._edit(summary, minor, bot, section, prependtext=text, **kwargs) + + def _edit(self, summary, minor, bot, section, **kwargs): + if not self.site.logged_in and self.site.force_login: + raise mwclient.errors.AssertUserFailedError() + if self.site.blocked: + raise mwclient.errors.UserBlocked(self.site.blocked) + if not self.can('edit'): + raise mwclient.errors.ProtectedPageError(self) + + if not self.site.writeapi: + raise mwclient.errors.NoWriteApi(self) + + data = {} + if minor: + data['minor'] = '1' + if not minor: + data['notminor'] = '1' + if self.last_rev_time: + data['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', self.last_rev_time) + if self.edit_time: + data['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', self.edit_time) + if bot: + data['bot'] = '1' + if section is not None: + data['section'] = section + + data.update(kwargs) + + if self.site.force_login: + data['assert'] = 'user' + + def do_edit(): + result = self.site.post('edit', title=self.name, summary=summary, + token=self.get_token('edit'), + **data) + if result['edit'].get('result').lower() == 'failure': + raise mwclient.errors.EditError(self, result['edit']) + return result + + try: + result = do_edit() + except mwclient.errors.APIError as e: + if e.code == 'badtoken': + # Retry, but only once to avoid an infinite loop + self.get_token('edit', force=True) + try: + result = do_edit() + except mwclient.errors.APIError as e: + self.handle_edit_error(e, summary) + else: + self.handle_edit_error(e, summary) + + # 'newtimestamp' is not included if no change was made + if 'newtimestamp' in result['edit'].keys(): + self.last_rev_time = parse_timestamp(result['edit'].get('newtimestamp')) + + # Workaround for https://phabricator.wikimedia.org/T211233 + for cookie in self.site.connection.cookies: + if 'PostEditRevision' in cookie.name: + self.site.connection.cookies.clear(cookie.domain, cookie.path, + cookie.name) + + # clear the page text cache + self._textcache = {} + return result['edit'] + + def handle_edit_error(self, e, summary): + if e.code == 'editconflict': + raise mwclient.errors.EditError(self, summary, e.info) + elif e.code in {'protectedtitle', 'cantcreate', 'cantcreate-anon', + 'noimageredirect-anon', 'noimageredirect', 'noedit-anon', + 'noedit', 'protectedpage', 'cascadeprotected', + 'customcssjsprotected', + 'protectednamespace-interface', 'protectednamespace'}: + raise mwclient.errors.ProtectedPageError(self, e.code, e.info) + elif e.code == 'assertuserfailed': + raise mwclient.errors.AssertUserFailedError() + else: + raise e + + def touch(self): + """Perform a "null edit" on the page to update the wiki's cached data of it. + This is useful in contrast to purge when needing to update stored data on a wiki, + for example Semantic MediaWiki properties or Cargo table values, since purge + only forces update of a page's displayed values and not its store. + """ + if not self.exists: + return + self.append('') + + def move(self, new_title, reason='', move_talk=True, no_redirect=False): + """Move (rename) page to new_title. + + If user account is an administrator, specify no_redirect as True to not + leave a redirect. + + If user does not have permission to move page, an InsufficientPermission + exception is raised. + + """ + if not self.can('move'): + raise mwclient.errors.InsufficientPermission(self) + + if not self.site.writeapi: + raise mwclient.errors.NoWriteApi(self) + + data = {} + if move_talk: + data['movetalk'] = '1' + if no_redirect: + data['noredirect'] = '1' + result = self.site.post('move', ('from', self.name), to=new_title, + token=self.get_token('move'), reason=reason, **data) + return result['move'] + + def delete(self, reason='', watch=False, unwatch=False, oldimage=False): + """Delete page. + + If user does not have permission to delete page, an InsufficientPermission + exception is raised. + + """ + if not self.can('delete'): + raise mwclient.errors.InsufficientPermission(self) + + if not self.site.writeapi: + raise mwclient.errors.NoWriteApi(self) + + data = {} + if watch: + data['watch'] = '1' + if unwatch: + data['unwatch'] = '1' + if oldimage: + data['oldimage'] = oldimage + result = self.site.post('delete', title=self.name, + token=self.get_token('delete'), + reason=reason, **data) + return result['delete'] + + def purge(self): + """Purge server-side cache of page. This will re-render templates and other + dynamic content. + + """ + self.site.post('purge', titles=self.name) + + # def watch: requires 1.14 + + # Properties + def backlinks(self, namespace=None, filterredir='all', redirect=False, + limit=None, generator=True): + """List pages that link to the current page, similar to Special:Whatlinkshere. + + API doc: https://www.mediawiki.org/wiki/API:Backlinks + + """ + prefix = mwclient.listing.List.get_prefix('bl', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs( + prefix, namespace=namespace, filterredir=filterredir, + )) + if redirect: + kwargs['%sredirect' % prefix] = '1' + kwargs[prefix + 'title'] = self.name + + return mwclient.listing.List.get_list(generator)( + self.site, 'backlinks', 'bl', limit=limit, return_values='title', + **kwargs + ) + + def categories(self, generator=True, show=None): + """List categories used on the current page. + + API doc: https://www.mediawiki.org/wiki/API:Categories + + Args: + generator (bool): Return generator (Default: True) + show (str): Set to 'hidden' to only return hidden categories + or '!hidden' to only return non-hidden ones. + + Returns: + mwclient.listings.PagePropertyGenerator + """ + prefix = mwclient.listing.List.get_prefix('cl', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs( + prefix, show=show + )) + + if generator: + return mwclient.listing.PagePropertyGenerator( + self, 'categories', 'cl', **kwargs + ) + else: + # TODO: return sortkey if wanted + return mwclient.listing.PageProperty( + self, 'categories', 'cl', return_values='title', **kwargs + ) + + def embeddedin(self, namespace=None, filterredir='all', limit=None, generator=True): + """List pages that transclude the current page. + + API doc: https://www.mediawiki.org/wiki/API:Embeddedin + + Args: + namespace (int): Restricts search to a given namespace (Default: None) + filterredir (str): How to filter redirects, either 'all' (default), + 'redirects' or 'nonredirects'. + limit (int): Maximum amount of pages to return per request + generator (bool): Return generator (Default: True) + + Returns: + mwclient.listings.List: Page iterator + """ + prefix = mwclient.listing.List.get_prefix('ei', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, namespace=namespace, + filterredir=filterredir)) + kwargs[prefix + 'title'] = self.name + + return mwclient.listing.List.get_list(generator)( + self.site, 'embeddedin', 'ei', limit=limit, return_values='title', + **kwargs + ) + + def extlinks(self): + """List external links from the current page. + + API doc: https://www.mediawiki.org/wiki/API:Extlinks + + """ + return mwclient.listing.PageProperty(self, 'extlinks', 'el', return_values='*') + + def images(self, generator=True): + """List files/images embedded in the current page. + + API doc: https://www.mediawiki.org/wiki/API:Images + + """ + if generator: + return mwclient.listing.PagePropertyGenerator(self, 'images', '') + else: + return mwclient.listing.PageProperty(self, 'images', '', + return_values='title') + + def iwlinks(self): + """List interwiki links from the current page. + + API doc: https://www.mediawiki.org/wiki/API:Iwlinks + + """ + return mwclient.listing.PageProperty(self, 'iwlinks', 'iw', + return_values=('prefix', '*')) + + def langlinks(self, **kwargs): + """List interlanguage links from the current page. + + API doc: https://www.mediawiki.org/wiki/API:Langlinks + + """ + return mwclient.listing.PageProperty(self, 'langlinks', 'll', + return_values=('lang', '*'), + **kwargs) + + def links(self, namespace=None, generator=True, redirects=False): + """List links to other pages from the current page. + + API doc: https://www.mediawiki.org/wiki/API:Links + + """ + prefix = mwclient.listing.List.get_prefix('pl', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, namespace=namespace)) + + if redirects: + kwargs['redirects'] = '1' + if generator: + return mwclient.listing.PagePropertyGenerator(self, 'links', 'pl', **kwargs) + else: + return mwclient.listing.PageProperty(self, 'links', 'pl', + return_values='title', **kwargs) + + def revisions(self, startid=None, endid=None, start=None, end=None, + dir='older', user=None, excludeuser=None, limit=50, + prop='ids|timestamp|flags|comment|user', + expandtemplates=False, section=None, + diffto=None, slots=None, uselang=None): + """List revisions of the current page. + + API doc: https://www.mediawiki.org/wiki/API:Revisions + + Args: + startid (int): Revision ID to start listing from. + endid (int): Revision ID to stop listing at. + start (str): Timestamp to start listing from. + end (str): Timestamp to end listing at. + dir (str): Direction to list in: 'older' (default) or 'newer'. + user (str): Only list revisions made by this user. + excludeuser (str): Exclude revisions made by this user. + limit (int): The maximum number of revisions to return per request. + prop (str): Which properties to get for each revision, + default: 'ids|timestamp|flags|comment|user' + expandtemplates (bool): Expand templates in rvprop=content output + section (int): Section number. If rvprop=content is set, only the contents + of this section will be retrieved. + diffto (str): Revision ID to diff each revision to. Use "prev", "next" and + "cur" for the previous, next and current revision respectively. + slots (str): The content slot (Mediawiki >= 1.32) to retrieve content from. + uselang (str): Language to use for parsed edit comments and other localized + messages. + + Returns: + mwclient.listings.List: Revision iterator + """ + kwargs = dict(mwclient.listing.List.generate_kwargs( + 'rv', startid=startid, endid=endid, start=start, end=end, user=user, + excludeuser=excludeuser, diffto=diffto, slots=slots + )) + + if self.site.version[:2] < (1, 32) and 'rvslots' in kwargs: + # https://github.com/mwclient/mwclient/issues/199 + del kwargs['rvslots'] + + kwargs['rvdir'] = dir + kwargs['rvprop'] = prop + kwargs['uselang'] = uselang + if expandtemplates: + kwargs['rvexpandtemplates'] = '1' + if section is not None: + kwargs['rvsection'] = section + + return mwclient.listing.RevisionsIterator(self, 'revisions', 'rv', limit=limit, + **kwargs) + + def templates(self, namespace=None, generator=True): + """List templates used on the current page. + + API doc: https://www.mediawiki.org/wiki/API:Templates + + """ + prefix = mwclient.listing.List.get_prefix('tl', generator) + kwargs = dict(mwclient.listing.List.generate_kwargs(prefix, namespace=namespace)) + if generator: + return mwclient.listing.PagePropertyGenerator(self, 'templates', prefix, + **kwargs) + else: + return mwclient.listing.PageProperty(self, 'templates', prefix, + return_values='title', **kwargs) diff --git a/game/python-extra/mwclient/sleep.py b/game/python-extra/mwclient/sleep.py new file mode 100644 index 0000000..2b808f6 --- /dev/null +++ b/game/python-extra/mwclient/sleep.py @@ -0,0 +1,88 @@ +import time +import logging +from mwclient.errors import MaximumRetriesExceeded + +log = logging.getLogger(__name__) + + +class Sleepers(object): + """ + A class that allows for the creation of multiple `Sleeper` objects with shared + arguments. + Examples: + Firstly a `Sleepers` object containing the shared attributes has to be created. + >>> max_retries, retry_timeout = 5, 5 + >>> sleepers = Sleepers(max_retries, retry_timeout) + From this `Sleepers` object multiple individual `Sleeper` objects can be created + using the `make` method. + >>> sleeper = sleepers.make() + Args: + max_retries (int): The maximum number of retries to perform. + retry_timeout (int): The time to sleep for each past retry. + callback (Callable[[int, Any], None]): A callable to be called on each retry. + Attributes: + max_retries (int): The maximum number of retries to perform. + retry_timeout (int): The time to sleep for each past retry. + callback (callable): A callable to be called on each retry. + """ + def __init__(self, max_retries, retry_timeout, callback=lambda *x: None): + self.max_retries = max_retries + self.retry_timeout = retry_timeout + self.callback = callback + + def make(self, args=None): + """ + Creates a new `Sleeper` object. + Args: + args (Any): Arguments to be passed to the `callback` callable. + Returns: + Sleeper: A `Sleeper` object. + """ + return Sleeper(args, self.max_retries, self.retry_timeout, self.callback) + + +class Sleeper(object): + """ + For any given operation, a `Sleeper` object keeps count of the number of retries. + For each retry, the sleep time increases until the max number of retries is reached + and a `MaximumRetriesExceeded` is raised. The sleeper object should be discarded + once the operation is successful. + Args: + args (Any): Arguments to be passed to the `callback` callable. + max_retries (int): The maximum number of retries to perform. + retry_timeout (int): The time to sleep for each past retry. + callback (callable, None]): A callable to be called on each retry. + Attributes: + args (Any): Arguments to be passed to the `callback` callable. + retries (int): The number of retries that have been performed. + max_retries (int): The maximum number of retries to perform. + retry_timeout (int): The time to sleep for each past retry. + callback (callable): A callable to be called on each retry. + """ + def __init__(self, args, max_retries, retry_timeout, callback): + self.args = args + self.retries = 0 + self.max_retries = max_retries + self.retry_timeout = retry_timeout + self.callback = callback + + def sleep(self, min_time=0): + """ + Sleeps for a minimum of `min_time` seconds. The actual sleeping time will increase + with the number of retries. + Args: + min_time (int): The minimum sleeping time. + Raises: + MaximumRetriesExceeded: If the number of retries exceeds the maximum. + """ + self.retries += 1 + if self.retries > self.max_retries: + raise MaximumRetriesExceeded(self, self.args) + + self.callback(self, self.retries, self.args) + + timeout = self.retry_timeout * (self.retries - 1) + if timeout < min_time: + timeout = min_time + log.debug('Sleeping for %d seconds', timeout) + time.sleep(timeout) diff --git a/game/python-extra/mwclient/util.py b/game/python-extra/mwclient/util.py new file mode 100644 index 0000000..afa9954 --- /dev/null +++ b/game/python-extra/mwclient/util.py @@ -0,0 +1,24 @@ +import time +import io + + +def parse_timestamp(t): + """Parses a string containing a timestamp. + + Args: + t (str): A string containing a timestamp. + + Returns: + time.struct_time: A timestamp. + """ + if t is None or t == '0000-00-00T00:00:00Z': + return time.struct_time((0, 0, 0, 0, 0, 0, 0, 0, 0)) + return time.strptime(t, '%Y-%m-%dT%H:%M:%SZ') + + +def read_in_chunks(stream, chunk_size): + while True: + data = stream.read(chunk_size) + if not data: + break + yield io.BytesIO(data) -- cgit v1.2.3-60-g2f50