#!/usr/bin/python import os import time import sys import psycopg2 import urllib2 import random import threading import Queue import socket socket.setdefaulttimeout(120) import urllib import sc sc = sc.SystemConfiguration() import re links = re.compile(r'(.*?)',re.DOTALL) #postliste = re.compile(r'.*?postliste.*?',re.DOTALL) #postjournal = re.compile(r'.*?postjournal.*?',re.DOTALL) generator = re.compile(r'',re.DOTALL) con = psycopg2.connect(user=sc.urlrepusername, database=sc.urlrepdatabase, password=sc.urlreppassword) cur = con.cursor() cur.execute('select distinct domain,random() from site where isactive=TRUE order by random();') municipalitylists = [i[0] for i in cur.fetchall()] #Hack municipalitylists = municipalitylists #End of hack #municipalitylists = ['www.lillesand.kommune.no'] #random.shuffle(municipalitylists) #print 'Municipality, Has Postlist,Service'#, Videooverføring, Innsyn' def unquote(url,site): if not url: return '' if type(url) == type([]): url = url[0] if not url.startswith('http'): url = url.lstrip('/') url = 'http://'+site+'/' + url url = url.replace('&','&') url = urllib.unquote(url) return url results = [] class ReqestServer(threading.Thread): def __init__(self): threading.Thread.__init__(self) self.dataqueue=Queue.Queue() self.results = [] def push(self,url): self.dataqueue.put((url)) def stop(self): self.dataqueue.put(None) def checkforpostliste(self,site): #returns postjournal,viedooverføring,innsyn #print 'Downloading index page' self.data = '' try: pre = time.time() self.data = urllib2.urlopen('http://%(site)s/'%locals()).read().lower() print time.time()-pre except:# urllib2.URLerror: return ('Error')#,'Error','Error') self.result = [] self.postlisturl = '' self.theselinks = [] if 'postliste' in self.data or 'postjournal' in self.data: self.result.append(True) self.theselinks = links.findall(self.data) res = [i[0] for i in self.theselinks if ('innsyn' in i[1] or 'postliste' in i[1] or 'postjournal' in i[1] or 'postlister' in i[1])] res = unquote(res,site) self.postlisturl = res self.result.append(res) else: self.result.append(False) self.result.append('') if self.theselinks: somelink = random.sample([i for i in self.theselinks if not ('innsyn' in i[1] or 'postliste' in i[1] or 'postjournal' in i[1])],1)[0][0] somelink = unquote(somelink,site) try: #self.data = urllib2.urlopen(somelink).read().lower() self.data = '' except: self.data = '' if '