# -*- coding: UTF-8 -*- """Test for lerning algorithms for postlist Id$ """ # Copyright 2008,2009 eGovMon # This program is distributed under the terms of the GNU General # Public License. # # This file is part of the eGovernment Monitoring # (eGovMon) # # eGovMon is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # eGovMon is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with eGovMon; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301 USA __author__ = "$Author$" __version__ = "$Revision$" __updated__ = "$LastChangedDate$" import unittest import doctest from getstatistics import * from stopwords import * from urls import * class TestSequenceFunctions(unittest.TestCase): def testDistance(self): """Testing that distance works""" self.assert_(getDistance(['one'],['one'])==0) self.assert_(getDistance(['one','two'],['one','two'])==0) self.assert_(getDistance(['one','three'],['one','two'])>0) self.assert_(getDistance(['two'],['one','two'])>0) self.assert_(getDistance(['two'],['one','two','three'])0) def teststopswords(self): """Testting that stopwords work""" self.assert_(removestopwords({'morten':1,'arbeid':1,'begge':1,'alle':1})=={'morten':1}) def testasciiwords(self): """Testting that asciiwords work""" self.assert_(removenonascii({'morten øæ.å':1,'arbeid224{':1,'-begge233':1,'1.+\åå':1})=={'morten øæ.å':1}) def testheavystringstrip(self): """Testing of heavy string stripping""" self.assert_(heavystrip({'Morten1':1,'test.':1,'verdi4?':1,'\\mgo':1})=={'morten1':1, 'test':1, 'verdi4':1, 'mgo':1}) def testnearest(self): """Testing that the nearest neighbor works""" #Training this_p_neighbor = [['eggs','spam','ham'],[['spam','ham','sausage']]] this_n_neighbor = [['parrot','polly','cage','fjords','slug']] #Classification to_classify = ['eggs','ham','sausage','spam','cage'] self.assert_(getNearest(to_classify,this_p_neighbor,this_n_neighbor,number=1)[1]=='p') self.assert_(getNearest(this_n_neighbor[0],this_p_neighbor,this_n_neighbor,number=1)[1]=='n') self.assert_(getNearest(this_p_neighbor[0],this_p_neighbor,this_n_neighbor,number=1)[1]=='p') self.assert_(getNearest(this_n_neighbor[0],this_p_neighbor,this_n_neighbor,number=1)[0][0][0]==0.0) def testtitle(self): """Testing title""" data = """Grimstad kommune -- Postliste""" values = getTitleText(data) self.assert_(values==['grimstad','kommune','postliste']) def testlinktext(self): """Testing title""" data = """søk i postlisten""" values = getLinkText(data) #print values self.assert_(values==['søk','i','postlisten']) data = """ smoela.jpg """ values = getLinkText(data) self.assert_(values==[]) data = """søk i postlisten bla i postlisten""" values = getLinkText(data) #print values self.assert_(values==['søk','i','postlisten','bla','i','postlisten']) data = open('./postlists/www.midsund.kommune.no').read() values = getLinkText(data) def testattributevalues(self): """Testing attribute values""" data = """""" values = getAttributeValues(data) self.assert_(values==['keywords', 'her', 'skal', 'n\xc3\xb8kkelord', 'skrives', 'inn', 'i', 'en', 'kommaseparert', 'liste']) def testurldata(self): """Testing URL data""" for key,url in mailrecordurls.items() + notmailrecordurls.items(): self.assert_('http://' in url or 'https://' in url) def testoneurl(self): """Testingign URL data""" protocol,domain,path,filename,parameters,attributes = getURLInfo('p','www.grimstad.kommune.no') self.assert_(protocol=='http') self.assert_(domain=='www.grimstad.kommune.no') protocol,domain,path,filename,parameters,attributes = getURLInfo('n','www.grimstad.kommune.no') self.assert_(protocol=='http') self.assert_(domain=='www.grimstad.kommune.no') def testonlydistinct(self): for key,url in mailrecordurls.items(): self.assert_(url not in notmailrecordurls.values()) for key,url in notmailrecordurls.items(): self.assert_(url not in mailrecordurls.values()) if __name__ == "__main__": unittest.main()