#!/usr/bin/python
import os
import time
import sys
import psycopg2
import urllib2
import random
import threading
import Queue
import socket
socket.setdefaulttimeout(120)
import urllib
import sc
sc = sc.SystemConfiguration()
import re
links = re.compile(r'(.*?)',re.DOTALL)
#postliste = re.compile(r'.*?postliste.*?',re.DOTALL)
#postjournal = re.compile(r'.*?postjournal.*?',re.DOTALL)
generator = re.compile(r'',re.DOTALL)
con = psycopg2.connect(user=sc.urlrepusername, database=sc.urlrepdatabase, password=sc.urlreppassword)
cur = con.cursor()
cur.execute('select distinct domain,random() from site where isactive=TRUE order by random();')
municipalitylists = [i[0] for i in cur.fetchall()]
#Hack
municipalitylists = municipalitylists
#End of hack
#municipalitylists = ['www.lillesand.kommune.no']
#random.shuffle(municipalitylists)
#print 'Municipality, Has Postlist,Service'#, Videooverføring, Innsyn'
def unquote(url,site):
if not url:
return ''
if type(url) == type([]):
url = url[0]
if not url.startswith('http'):
url = url.lstrip('/')
url = 'http://'+site+'/' + url
url = url.replace('&','&')
url = urllib.unquote(url)
return url
results = []
class ReqestServer(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
self.dataqueue=Queue.Queue()
self.results = []
def push(self,url):
self.dataqueue.put((url))
def stop(self):
self.dataqueue.put(None)
def checkforpostliste(self,site):
#returns postjournal,viedooverføring,innsyn
#print 'Downloading index page'
self.data = ''
try:
pre = time.time()
self.data = urllib2.urlopen('http://%(site)s/'%locals()).read().lower()
print time.time()-pre
except:# urllib2.URLerror:
return ('Error')#,'Error','Error')
self.result = []
self.postlisturl = ''
self.theselinks = []
if 'postliste' in self.data or 'postjournal' in self.data:
self.result.append(True)
self.theselinks = links.findall(self.data)
res = [i[0] for i in self.theselinks if ('innsyn' in i[1] or 'postliste' in i[1] or 'postjournal' in i[1] or 'postlister' in i[1])]
res = unquote(res,site)
self.postlisturl = res
self.result.append(res)
else:
self.result.append(False)
self.result.append('')
if self.theselinks:
somelink = random.sample([i for i in self.theselinks if not ('innsyn' in i[1] or 'postliste' in i[1] or 'postjournal' in i[1])],1)[0][0]
somelink = unquote(somelink,site)
try:
#self.data = urllib2.urlopen(somelink).read().lower()
self.data = ''
except:
self.data = ''
if '