#!/usr/bin/python
import re
import urllib
import os

blogexp = open('oltcuisine.wordpress.2015-06-04-with-gallery-links-fixed.xml', 'r')
log = open('ops.log', 'w')

pattern = """(<a href=['"]http://mon-url-de-gallery/main.php\?g2_itemId=([0-9]+)['"]><img .+?alt=["'](.*?)['"].*?</a>)"""


for line in blogexp:
	matches = re.findall(pattern, line)
	for m in matches:
		distant_filename_base = m[2]
		distant_filename = distant_filename_base
		if distant_filename_base[-4:] != ".jpg":
			distant_filename = distant_filename_base + ".jpg"
		url = "http://mon-url-de-gallery/d/"+m[1]+"-1/"+ distant_filename
		local_filename = "picz/" + m[1]+"-"+distant_filename
		urllib.urlretrieve(url, local_filename)
		if os.path.getsize(local_filename) < 1500:
			distant_filename = distant_filename_base + ".JPG"
			url = "http://mon-url-de-gallery/d/"+m[1]+"-1/"+ distant_filename
			urllib.urlretrieve(url, local_filename)
			if os.path.getsize(local_filename) < 1500:
				distant_filename = distant_filename_base+ "_001.jpg"
				url =  url = "http://mon-url-de-gallery/d/"+m[1]+"-1/"+ distant_filename
				urllib.urlretrieve(url, local_filename)
			
		log.write(m[0] + " " + local_filename + "\n")
