summaryrefslogtreecommitdiff
path: root/game/python-extra/mwclient/listing.py
blob: 79964b7264cfa94e782d47703a9c753c59c68aeb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
import six
import six.moves
from six import text_type
from mwclient.util import parse_timestamp
import mwclient.page
import mwclient.image


class List(object):
    """Base class for lazy iteration over api response content

    This is a class providing lazy iteration.  This means that the
    content is loaded in chunks as long as the response hints at
    continuing content.
    """

    def __init__(self, site, list_name, prefix,
                 limit=None, return_values=None, max_items=None,
                 *args, **kwargs):
        # NOTE: Fix limit
        self.site = site
        self.list_name = list_name
        self.generator = 'list'
        self.prefix = prefix

        kwargs.update(args)
        self.args = kwargs

        if limit is None:
            limit = site.api_limit
        self.args[self.prefix + 'limit'] = text_type(limit)

        self.count = 0
        self.max_items = max_items

        self._iter = iter(six.moves.range(0))

        self.last = False
        self.result_member = list_name
        self.return_values = return_values

    def __iter__(self):
        return self

    def __next__(self):
        if self.max_items is not None:
            if self.count >= self.max_items:
                raise StopIteration

        # For filered lists, we might have to do several requests
        # to get the next element due to miser mode.
        # See: https://github.com/mwclient/mwclient/issues/194
        while True:
            try:
                item = six.next(self._iter)
                if item is not None:
                    break
            except StopIteration:
                if self.last:
                    raise
                self.load_chunk()

        self.count += 1
        if 'timestamp' in item:
            item['timestamp'] = parse_timestamp(item['timestamp'])

        if isinstance(self, GeneratorList):
            return item
        if type(self.return_values) is tuple:
            return tuple((item[i] for i in self.return_values))
        if self.return_values is not None:
            return item[self.return_values]
        return item

    def next(self, *args, **kwargs):
        """ For Python 2.x support """
        return self.__next__(*args, **kwargs)

    def load_chunk(self):
        """Query a new chunk of data

        If the query is empty, `raise StopIteration`.

        Else, update the iterator accordingly.

        If 'continue' is in the response, it is added to `self.args`
        (new style continuation, added in MediaWiki 1.21).

        If not, but 'query-continue' is in the response, query its
        item called `self.list_name` and add this to `self.args` (old
        style continuation).

        Else, set `self.last` to True.
        """
        data = self.site.get(
            'query', (self.generator, self.list_name),
            *[(text_type(k), v) for k, v in six.iteritems(self.args)]
        )
        if not data:
            # Non existent page
            raise StopIteration

        # Process response if not empty.
        # See: https://github.com/mwclient/mwclient/issues/194
        if 'query' in data:
            self.set_iter(data)

        if data.get('continue'):
            # New style continuation, added in MediaWiki 1.21
            self.args.update(data['continue'])

        elif self.list_name in data.get('query-continue', ()):
            # Old style continuation
            self.args.update(data['query-continue'][self.list_name])

        else:
            self.last = True

    def set_iter(self, data):
        """Set `self._iter` to the API response `data`."""
        if self.result_member not in data['query']:
            self._iter = iter(six.moves.range(0))
        elif type(data['query'][self.result_member]) is list:
            self._iter = iter(data['query'][self.result_member])
        else:
            self._iter = six.itervalues(data['query'][self.result_member])

    def __repr__(self):
        return "<List object '%s' for %s>" % (self.list_name, self.site)

    @staticmethod
    def generate_kwargs(_prefix, *args, **kwargs):
        kwargs.update(args)
        for key, value in six.iteritems(kwargs):
            if value is not None and value is not False:
                yield _prefix + key, value

    @staticmethod
    def get_prefix(prefix, generator=False):
        return ('g' if generator else '') + prefix

    @staticmethod
    def get_list(generator=False):
        return GeneratorList if generator else List


class NestedList(List):
    def __init__(self, nested_param, *args, **kwargs):
        super(NestedList, self).__init__(*args, **kwargs)
        self.nested_param = nested_param

    def set_iter(self, data):
        self._iter = iter(data['query'][self.result_member][self.nested_param])


class GeneratorList(List):
    """Lazy-loaded list of Page, Image or Category objects

    While the standard List class yields raw response data
    (optionally filtered based on the value of List.return_values),
    this subclass turns the data into Page, Image or Category objects.
    """

    def __init__(self, site, list_name, prefix, *args, **kwargs):
        super(GeneratorList, self).__init__(site, list_name, prefix,
                                            *args, **kwargs)

        self.args['g' + self.prefix + 'limit'] = self.args[self.prefix + 'limit']
        del self.args[self.prefix + 'limit']
        self.generator = 'generator'

        self.args['prop'] = 'info|imageinfo'
        self.args['inprop'] = 'protection'

        self.result_member = 'pages'

        self.page_class = mwclient.page.Page

    def __next__(self):
        info = super(GeneratorList, self).__next__()
        if info['ns'] == 14:
            return Category(self.site, u'', info)
        if info['ns'] == 6:
            return mwclient.image.Image(self.site, u'', info)
        return mwclient.page.Page(self.site, u'', info)

    def load_chunk(self):
        # Put this here so that the constructor does not fail
        # on uninitialized sites
        self.args['iiprop'] = 'timestamp|user|comment|url|size|sha1|metadata|archivename'
        return super(GeneratorList, self).load_chunk()


class Category(mwclient.page.Page, GeneratorList):

    def __init__(self, site, name, info=None, namespace=None):
        mwclient.page.Page.__init__(self, site, name, info)
        kwargs = {}
        kwargs['gcmtitle'] = self.name
        if namespace:
            kwargs['gcmnamespace'] = namespace
        GeneratorList.__init__(self, site, 'categorymembers', 'cm', **kwargs)

    def __repr__(self):
        return "<Category object '%s' for %s>" % (self.name.encode('utf-8'), self.site)

    def members(self, prop='ids|title', namespace=None, sort='sortkey',
                dir='asc', start=None, end=None, generator=True):
        prefix = self.get_prefix('cm', generator)
        kwargs = dict(self.generate_kwargs(prefix, prop=prop, namespace=namespace,
                                           sort=sort, dir=dir, start=start, end=end,
                                           title=self.name))
        return self.get_list(generator)(self.site, 'categorymembers', 'cm', **kwargs)


class PageList(GeneratorList):

    def __init__(self, site, prefix=None, start=None, namespace=0, redirects='all',
                 end=None):
        self.namespace = namespace

        kwargs = {}
        if prefix:
            kwargs['gapprefix'] = prefix
        if start:
            kwargs['gapfrom'] = start
        if end:
            kwargs['gapto'] = end

        super(PageList, self).__init__(site, 'allpages', 'ap',
                                       gapnamespace=text_type(namespace),
                                       gapfilterredir=redirects,
                                       **kwargs)

    def __getitem__(self, name):
        return self.get(name, None)

    def get(self, name, info=()):
        """Return the page of name `name` as an object.

        If self.namespace is not zero, use {namespace}:{name} as the
        page name, otherwise guess the namespace from the name using
        `self.guess_namespace`.

        Returns:
            One of Category, Image or Page (default), according to namespace.
        """
        if self.namespace != 0:
            full_page_name = u"{namespace}:{name}".format(
                namespace=self.site.namespaces[self.namespace],
                name=name,
            )
            namespace = self.namespace
        else:
            full_page_name = name
            try:
                namespace = self.guess_namespace(name)
            except AttributeError:
                # raised when `namespace` doesn't have a `startswith` attribute
                namespace = 0

        cls = {
            14: Category,
            6: mwclient.image.Image,
        }.get(namespace, mwclient.page.Page)

        return cls(self.site, full_page_name, info)

    def guess_namespace(self, name):
        """Guess the namespace from name

        If name starts with any of the site's namespaces' names or
        default_namespaces, use that.  Else, return zero.

        Args:
            name (str): The pagename as a string (having `.startswith`)

        Returns:
            The id of the guessed namespace or zero.
        """
        for ns in self.site.namespaces:
            if ns == 0:
                continue
            namespace = u'%s:' % self.site.namespaces[ns].replace(' ', '_')
            if name.startswith(namespace):
                return ns
            elif ns in self.site.default_namespaces:
                namespace = u'%s:' % self.site.default_namespaces[ns].replace(' ', '_')
                if name.startswith(namespace):
                    return ns
        return 0


class PageProperty(List):

    def __init__(self, page, prop, prefix, *args, **kwargs):
        super(PageProperty, self).__init__(page.site, prop, prefix,
                                           titles=page.name,
                                           *args, **kwargs)
        self.page = page
        self.generator = 'prop'

    def set_iter(self, data):
        for page in six.itervalues(data['query']['pages']):
            if page['title'] == self.page.name:
                self._iter = iter(page.get(self.list_name, ()))
                return
        raise StopIteration


class PagePropertyGenerator(GeneratorList):

    def __init__(self, page, prop, prefix, *args, **kwargs):
        super(PagePropertyGenerator, self).__init__(page.site, prop, prefix,
                                                    titles=page.name,
                                                    *args, **kwargs)
        self.page = page


class RevisionsIterator(PageProperty):

    def load_chunk(self):
        if 'rvstartid' in self.args and 'rvstart' in self.args:
            del self.args['rvstart']
        return super(RevisionsIterator, self).load_chunk()