# -*- coding: utf-8 -*-
"""
lifebot FAMILIA
Create the subcategory named 'FAMILIA (Indexed)' if it doesn't exist. This cat itself has the tags 'Category:FAMILIA' and 'Category:Indexed plant families'.
Collect all names of images that are included in articles under the category FAMILIA.
For every of these images:
1. Add tag 'Category:FAMILIA (Indexed)' if it isn't there.
2. Remove all tags 'Category:FAMILIA' if there. (Alternatively, just rename)
Options:
"""
#
# (C) R Stephan 2006
#
# Distributed under the terms of the GPL2.
#
__version__ = '0.10'
#
import wikipedia,re,sys,config
import catlib
wikipedia.get_throttle.setDelay(5)
wikipedia.put_throttle.setDelay(10)
msg={
'en': 'LifeBot:Tree of Life maintenance',
}
def main (FAMILIA):
# TODO: catch more read/write errors gracefully
site = wikipedia.getSite()
pl = catlib.Category (site, 'Category:'+FAMILIA)
subcats = pl.subcategories (recurse = False);
# Create index cat if it doesn't exist
indexedcat = 'This category is for photos of '+FAMILIA+' which have been indexed in a '+FAMILIA+""" Commons article.
[[Category: """+FAMILIA+"""]]
[[Category:Plantae by familia (Indexed)]]
"""
pli = catlib.Category (site, 'Category:'+FAMILIA+' (Indexed)')
if not pli.exists():
print '---> Index cat does not exist. Creating... '
pli.put(indexedcat, 'Maintenance category')
# Get list of pages in FAMILIA category (but not subcategories)
pages = pl.articles()
print '---> number of pages in ',FAMILIA,': ',len(pages)
# Read all article pages, make list of images in all article pages
num_arts = 0
indexed_images = []
for page in pages:
if not page.isImage():
print '---> Reading article '+ page.aslink().encode(config.console_encoding, 'replace')
num_arts = num_arts + 1
indexed_images = indexed_images + page.imagelinks()
indexed_images = catlib.unique (indexed_images)
print '---> number of articles in ',FAMILIA,': ',num_arts
print '---> number of images linked from articles in ',FAMILIA,': ',len(indexed_images)
# Prepare patterns for search/replace
fam_re = re.compile ('\[\[ *[Cc]ategory *: *%s *\|*.*\]\]' % FAMILIA, re.IGNORECASE)
fami_str = '[[Category:'+FAMILIA+' (Indexed)]]'
# Make changes to image
for image in indexed_images:
try:
text = image.get()
except wikipedia.NoPage:
continue
changed = True
if text.find (fami_str) < 0:
if not fam_re.search (text) == None:
text = fam_re.sub ('', text)
text = text + '\n' + fami_str
else:
if not fam_re.search (text) == None:
changed = False
else:
text = fam_re.sub ('', text)
if changed:
image.put(text, 'Plant image indexed in species article')
print '---> Changed '+image.aslink().encode(config.console_encoding, 'replace')
else:
print '---> Unchanged '+image.aslink().encode(config.console_encoding, 'replace')
if __name__ == '__main__':
for arg in sys.argv[1:]:
if arg:
try:
main (arg)
finally:
wikipedia.stopme()