#! /usr/bin/env python # -*- coding: UTF-8 -*- """ Test that the crawler can crawl $Id: test.py 1350 2009-04-25 10:02:44Z goodwin $ """ # Copyright 2008-2010 eGovMon # This program is distributed under the terms of the GNU General # Public License. # # This file is part of the eGovernment Monitoring # (eGovMon) # # eGovMon is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # eGovMon is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with eGovMon; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301 USA __author__ = "$Author$" __version__ = "$Revision$" __updated__ = "$LastChangedDate: 2009-04-25 10:02:44 +0000 (Sat, 25 Apr 2009) $" import unittest import os import time import random import psycopg2 import sc import RDFreader sc = sc.SystemConfiguration() class TestSequenceFunctions(unittest.TestCase): def testnormalsequence(self): #Setup towards URL repositoriy con = psycopg2.connect(host=sc.egovmondbhost,user=sc.egovmondbusername,database=sc.egovmondbdatabase,password=sc.egovmondbpassword) cur = con.cursor() site,title,source,coverage,category,testrunid = 'www.egovmon.no','eGovernment Monitoring','maintainer@egovmon.no','','','1' db = dbname="rdf_"+str(testrunid)+"_"+str(time.time()).replace('.','_')+"_"+str(random.randint(1,100000)) cur.execute("insert into egovmondb.testrun (testrunid,started,responsible,urllistid) select %(testrunid)s,timestamp 'now',%(source)s,1 where not exists (select True from egovmondb.testrun where testrunid = %(testrunid)s);",locals()) con.commit() cur.execute('insert into egovmondb.WebSite (site,title) select %(site)s,%(title)s where not exists (select True from egovmondb.WebSite where site=%(site)s);',locals()) cur.execute('update egovmondb.WebSite set smallsite=True where site=%(site)s;',locals()) cur.execute('delete from egovmondb.SiteWAMResults where siteid in (select siteid from website where site=%(site)s) and testrunid=%(testrunid)s;',locals()) con.commit() cur.execute('insert into egovmondb.SiteWAMResults (siteid,testrunid) select (select siteid from website where site=%(site)s),%(testrunid)s;',locals()) cur.execute('delete from WebPage where siteid in (select siteid from website where site=%(site)s) and testrunid=%(testrunid)s;',locals()) con.commit() #Checking that the eGovMonDB is empty. cur.execute('select count(*) from WebPage where siteid in (select siteid from website where site=%(site)s) and testrunid=%(testrunid)s;',locals()) self.assert_(cur.fetchall()[0][0]==0) #Install the crawler curdir = os.path.abspath(os.path.curdir) if sc.homedir.rstrip('/').endswith('trunk'): sc.homedir = sc.homedir[:-5] os.chdir(os.path.join(sc.homedir,'trunk','Crawler')) os.system('python setup.py install') #Restarting Memcached os.system('/etc/init.d/memcached restart') #Running the crawler os.system('crawlerwrapper onlycrawl %(site)s %(testrunid)s'%locals()) #Checking that there is pages in the URL repository cur.execute('select count(*) from WebPage where siteid in (select siteid from website where site=%(site)s) and testrunid=%(testrunid)s;',locals()) self.assert_(cur.fetchall()[0][0]>0) if __name__ == '__main__': unittest.main()