#!/usr/bin/python
#
# wikiadded.py
# Wesley McGrew
# wesley@mcgrewsecurity.com
# http://mcgrewsecurity.com

import sys
from xml.etree.ElementTree import ElementTree

pre = '{http://www.mediawiki.org/xml/export-0.5/}'

xml_file = sys.argv[1]
phrase   = sys.argv[2]

xml = ElementTree()
xml.parse(xml_file)

in_previous = False

for page in xml.iter(pre+'page'):
	title = page.iter(pre+'title').next()
	for revision in page.iter(pre+'revision'):
		rev_id = revision.iter(pre+'id').next()
		current_text = revision.iter(pre+'text').next()
		timestamp = revision.iter(pre+'timestamp').next()
		if current_text.text.upper().find(phrase.upper()) != -1:
			if in_previous:
				continue
			else:
				contributor = revision.iter(pre+'contributor').next()
				try:
					username = contributor.iter(pre+'username').next()
					user_id = contributor.iter(pre+'id').next()
					contrib_user = '%s (%s)' % (username.text,user_id.text)
				except:
					ip = contributor.iter(pre+'ip').next()
					contrib_user = 'IP: %s' % (ip.text)
				print "%s,%s,http://wikipedia.org/w/index.php?title=%s&oldid=%s" % (timestamp.text,contrib_user,title.text,rev_id.text)
				in_previous = True
		else:
			in_previous = False
