#!/usr/bin/env python3

"""

=head1 NAME

moinmoin_pages - lists the number of pages in all wikis of a MoinMoin wikifarm


ACL-protected pages are included.


=head1 CONFIGURATION

 [moinmoin_*]
 user www


=head1 IMPLEMENTATION NOTES

The plugin is quite koumbit-specific:

=over 4

=item 1. the wikifarm config is hardcoded

=item 2. "wikilist.py" is assumed to contain the list of wiki -> url patterns

=item 3. url patterns are assumed to be simple enough, that they are decodable into an url

=back

Also note that this plugin reuses code from MoinMoin/wikimacro.py's SystemInfo macro.

Finally, i tried using XMLRPC instead of native functions to fetch the data, but it ended up being
slower.  For the record, here is what the getPageList() call would have looked like:

 xmlrpclib.ServerProxy("http://wiki.koumbit.net/?action=xmlrpc2").getAllPages()

The quick benchmark I did yielded those results for the getAllPages() vs getPageList() calls:

 xmlrpc:         2.35 real         0.12 user         0.04 sys
 native:         1.44 real         1.07 user         0.35 sys

So the plugin is spending more time in the CPU (all time, actually), but it's doing in faster.
It is highly possible that the CPU time spared in XMLRPC is in fact used by the server.


=head1 AUTHORS

Copyleft 2007, The Anarcat <anarcat@koumbit.org>


=head1 LICENSE

Licensed under the GPLv2 or any later version.

SPDX-License-Identifier: GPL-2.0-or-later

=cut
"""

import os
from re import sub
import sys

from MoinMoin import wikiutil
from MoinMoin.request import RequestCLI

os.chdir('/export/wiki/config')
sys.path.insert(0, '/export/wiki/config')

from farmconfig import wikis  # noqa: E402


def _formatInReadableUnits(size):
    size = float(size)
    unit = u' Byte'
    if size > 9999:
        unit = u' KiB'
        size /= 1024
    if size > 9999:
        unit = u' MiB'
        size /= 1024
    if size > 9999:
        unit = u' GiB'
        size /= 1024
    return u"%.1f %s" % (size, unit)


def _getDirectorySize(path):
    try:
        dirsize = 0
        for root, dirs, files in os.walk(path):
            dirsize += sum([os.path.getsize(os.path.join(root, name)) for name in files])
    except EnvironmentError:
        dirsize = -1
    return dirsize


def main():
    for wiki in wikis:
        name = wiki[0]
        url = wiki[1]
        # XXX, hack: transform the regexp into a canonical url
        # we need canonical urls in the config for this to be clean
        # look for (foo|bar) and replace with foo
        url = sub(r'\(([^\|]*)(\|[^\)]*\))+', r'\1', url)
        # remove common regexp patterns and slap a protocol to make this a real url
        url = sub(r'[\^\$]|(\.\*)', '', url)

        request = RequestCLI(url)
        pagelist = request.rootpage.getPageList(user='')

        systemPages = [page for page in pagelist if wikiutil.isSystemPage(request, page)]
        print(name + '.value ' + str(len(pagelist) - len(systemPages)))


def config():
    print("""graph_title Wiki size
graph_vlabel Number of pages
graph_args --base 1000 -l 0
graph_scale no
graph_category wiki
graph_info The number of pages excludes system pages but includes ACL-protected pages.""")
    for wiki in wikis:
        name = wiki[0]
        mod = getattr(__import__(name), 'Config')
        print(name + '.label ' + getattr(mod, 'sitename'))


if __name__ == "__main__":
    if len(sys.argv) > 1 and sys.argv[1] == 'config':
        config()
    else:
        main()