[1] | 1 | from Products.CMFCore.ActionProviderBase import ActionProviderBase |
---|
| 2 | from Products.CMFPlone.PloneFolder import PloneFolder |
---|
| 3 | from config import TOOL_ID, PROJECTNAME |
---|
| 4 | from Products.Archetypes.public import * |
---|
| 5 | from Acquisition import aq_base |
---|
| 6 | from OFS.SimpleItem import SimpleItem |
---|
| 7 | from OFS.PropertyManager import PropertyManager |
---|
| 8 | from Products.CMFCore.utils import UniqueObject |
---|
| 9 | from Globals import InitializeClass |
---|
| 10 | from errors import BlacklistedURL |
---|
| 11 | from AccessControl import ClassSecurityInfo |
---|
| 12 | |
---|
| 13 | from Products.CMFCore.utils import getToolByName |
---|
| 14 | from zLOG import LOG |
---|
| 15 | import re, os |
---|
| 16 | from config import * |
---|
| 17 | |
---|
| 18 | def _parseLine(line): |
---|
| 19 | parts = line.split('#') |
---|
| 20 | pattern = parts[0].strip() |
---|
| 21 | return re.compile(pattern) |
---|
| 22 | |
---|
| 23 | def stripUrl(url): |
---|
| 24 | """cut all prefixes from url""" |
---|
| 25 | if url.startswith('http://'): |
---|
| 26 | url = url[7:] |
---|
| 27 | if url.startswith('www.'): |
---|
| 28 | url = url[4:] |
---|
| 29 | if url.endswith('/'): |
---|
| 30 | url = url[:-1] |
---|
| 31 | return url |
---|
| 32 | |
---|
| 33 | class TrackSpamTool(UniqueObject, SimpleItem, PropertyManager, ActionProviderBase): |
---|
| 34 | """ This tool has to validate for spam |
---|
| 35 | """ |
---|
| 36 | |
---|
| 37 | security = ClassSecurityInfo() |
---|
| 38 | |
---|
| 39 | id = TOOL_ID |
---|
| 40 | meta_type= TOOL_METATYPE |
---|
| 41 | title = 'TrackSpam Tool' |
---|
| 42 | plone_tool = True |
---|
| 43 | |
---|
| 44 | __implements__ = (SimpleItem.__implements__, |
---|
| 45 | ActionProviderBase.__implements__) |
---|
| 46 | |
---|
| 47 | blacklist = [] |
---|
| 48 | blacklist_compiled = [] |
---|
| 49 | |
---|
| 50 | def loadInitialFile(self): |
---|
| 51 | if not self.blackList: |
---|
| 52 | txtpath = os.path.join( os.path.dirname(__file__), 'mt_blacklist.txt' ) |
---|
| 53 | fp = file(txtpath, 'r') |
---|
| 54 | for line in fp: |
---|
| 55 | line = line.strip() |
---|
| 56 | if line and not line.startswith('#'): |
---|
| 57 | self.blacklist_compiled.append( _parseLine(line) ) |
---|
| 58 | del fp |
---|
| 59 | |
---|
| 60 | def checkTrackback(self, trackback): |
---|
| 61 | url = trackback.getURL() |
---|
| 62 | for regexp in self.blacklist_compiled: |
---|
| 63 | match = regexp.search(url) |
---|
| 64 | if match: |
---|
| 65 | return 0 #raise BlacklistedURL(url, match) |
---|
| 66 | return 1 |
---|
| 67 | |
---|
| 68 | def checkURL(self, url): |
---|
| 69 | if not url: |
---|
| 70 | return 0 |
---|
| 71 | for regexp in self.blacklist_compiled: |
---|
| 72 | match = regexp.search(url) |
---|
| 73 | if match: |
---|
| 74 | return 0 #raise BlacklistedURL(url, match) |
---|
| 75 | return 1 |
---|
| 76 | |
---|
| 77 | def getBlackList(self): |
---|
| 78 | """ return list for editing """ |
---|
| 79 | return '\n'.join(self.blacklist) |
---|
| 80 | |
---|
| 81 | def setBlackList(self, data): |
---|
| 82 | """ save the blacklist """ |
---|
| 83 | lines = [l.strip() for l in data.split('\n')] |
---|
| 84 | res=[] |
---|
| 85 | for r in lines: |
---|
| 86 | if lines.index(r)+1<len(lines)-1: |
---|
| 87 | linesc = lines[lines.index(r)+1:] |
---|
| 88 | if r not in linesc: |
---|
| 89 | res.append(r) |
---|
| 90 | else: |
---|
| 91 | res.append(r) |
---|
| 92 | self.blacklist = res |
---|
| 93 | self.blacklist_compiled = [_parseLine(r) for r in res] |
---|
| 94 | |
---|
| 95 | def blackListAndRemove(self, trbacks_checked): |
---|
| 96 | """ add the entries to black list remove them and the similar ones """ |
---|
| 97 | if not trbacks_checked: |
---|
| 98 | return 0, 0 |
---|
| 99 | uid_catalog = getToolByName(self, 'uid_catalog') |
---|
| 100 | catalog = getToolByName(self, 'portal_catalog') |
---|
| 101 | urls = [] |
---|
| 102 | counter = 0 |
---|
| 103 | bl_counter = 0 |
---|
| 104 | for trb in trbacks_checked: |
---|
| 105 | obj = uid_catalog(UID=trb)[0].getObject() |
---|
| 106 | geturl = obj.getUrl() |
---|
| 107 | urls.append(geturl) |
---|
| 108 | url = stripUrl(geturl) |
---|
| 109 | if url not in self.blacklist: |
---|
| 110 | self.blacklist.append(url) |
---|
| 111 | self.blacklist_compiled.append(_parseLine(url)) |
---|
| 112 | bl_counter += 1 |
---|
| 113 | obj.aq_parent.manage_delObjects([obj.getId()]) |
---|
| 114 | counter += 1 |
---|
| 115 | res = catalog(portal_type='TrackBack', review_state='pending',sort_on='Date') |
---|
| 116 | for r in res: |
---|
| 117 | obj = r.getObject() |
---|
| 118 | if obj.getUrl() in urls: |
---|
| 119 | obj.aq_parent.manage_delObjects([obj.getId()]) |
---|
| 120 | counter += 1 |
---|
| 121 | return bl_counter, counter |
---|
| 122 | |
---|
| 123 | |
---|
| 124 | InitializeClass(TrackSpamTool) |
---|