| 1 |
from Products.CMFCore.ActionProviderBase import ActionProviderBase |
|---|
| 2 |
from Products.CMFPlone.PloneFolder import PloneFolder |
|---|
| 3 |
from config import TOOL_ID, PROJECTNAME |
|---|
| 4 |
from Products.Archetypes.public import * |
|---|
| 5 |
from Acquisition import aq_base |
|---|
| 6 |
from OFS.SimpleItem import SimpleItem |
|---|
| 7 |
from OFS.PropertyManager import PropertyManager |
|---|
| 8 |
from Products.CMFCore.utils import UniqueObject |
|---|
| 9 |
from Globals import InitializeClass |
|---|
| 10 |
from errors import BlacklistedURL |
|---|
| 11 |
from AccessControl import ClassSecurityInfo |
|---|
| 12 |
|
|---|
| 13 |
from Products.CMFCore.utils import getToolByName |
|---|
| 14 |
from zLOG import LOG |
|---|
| 15 |
import re, os |
|---|
| 16 |
from config import * |
|---|
| 17 |
|
|---|
| 18 |
def _parseLine(line): |
|---|
| 19 |
parts = line.split('#') |
|---|
| 20 |
pattern = parts[0].strip() |
|---|
| 21 |
return re.compile(pattern) |
|---|
| 22 |
|
|---|
| 23 |
def stripUrl(url): |
|---|
| 24 |
"""cut all prefixes from url""" |
|---|
| 25 |
if url.startswith('http://'): |
|---|
| 26 |
url = url[7:] |
|---|
| 27 |
if url.startswith('www.'): |
|---|
| 28 |
url = url[4:] |
|---|
| 29 |
if url.endswith('/'): |
|---|
| 30 |
url = url[:-1] |
|---|
| 31 |
return url |
|---|
| 32 |
|
|---|
| 33 |
class TrackSpamTool(UniqueObject, SimpleItem, PropertyManager, ActionProviderBase): |
|---|
| 34 |
""" This tool has to validate for spam |
|---|
| 35 |
""" |
|---|
| 36 |
|
|---|
| 37 |
security = ClassSecurityInfo() |
|---|
| 38 |
|
|---|
| 39 |
id = TOOL_ID |
|---|
| 40 |
meta_type= TOOL_METATYPE |
|---|
| 41 |
title = 'TrackSpam Tool' |
|---|
| 42 |
plone_tool = True |
|---|
| 43 |
|
|---|
| 44 |
__implements__ = (SimpleItem.__implements__, |
|---|
| 45 |
ActionProviderBase.__implements__) |
|---|
| 46 |
|
|---|
| 47 |
blacklist = [] |
|---|
| 48 |
blacklist_compiled = [] |
|---|
| 49 |
|
|---|
| 50 |
def loadInitialFile(self): |
|---|
| 51 |
if not self.blackList: |
|---|
| 52 |
txtpath = os.path.join( os.path.dirname(__file__), 'mt_blacklist.txt' ) |
|---|
| 53 |
fp = file(txtpath, 'r') |
|---|
| 54 |
for line in fp: |
|---|
| 55 |
line = line.strip() |
|---|
| 56 |
if line and not line.startswith('#'): |
|---|
| 57 |
self.blacklist_compiled.append( _parseLine(line) ) |
|---|
| 58 |
del fp |
|---|
| 59 |
|
|---|
| 60 |
def checkTrackback(self, trackback): |
|---|
| 61 |
url = trackback.getURL() |
|---|
| 62 |
for regexp in self.blacklist_compiled: |
|---|
| 63 |
match = regexp.search(url) |
|---|
| 64 |
if match: |
|---|
| 65 |
return 0 |
|---|
| 66 |
return 1 |
|---|
| 67 |
|
|---|
| 68 |
def checkURL(self, url): |
|---|
| 69 |
if not url: |
|---|
| 70 |
return 0 |
|---|
| 71 |
for regexp in self.blacklist_compiled: |
|---|
| 72 |
match = regexp.search(url) |
|---|
| 73 |
if match: |
|---|
| 74 |
return 0 |
|---|
| 75 |
return 1 |
|---|
| 76 |
|
|---|
| 77 |
def getBlackList(self): |
|---|
| 78 |
""" return list for editing """ |
|---|
| 79 |
return '\n'.join(self.blacklist) |
|---|
| 80 |
|
|---|
| 81 |
def setBlackList(self, data): |
|---|
| 82 |
""" save the blacklist """ |
|---|
| 83 |
lines = [l.strip() for l in data.split('\n')] |
|---|
| 84 |
res=[] |
|---|
| 85 |
for r in lines: |
|---|
| 86 |
if lines.index(r)+1<len(lines)-1: |
|---|
| 87 |
linesc = lines[lines.index(r)+1:] |
|---|
| 88 |
if r not in linesc: |
|---|
| 89 |
res.append(r) |
|---|
| 90 |
else: |
|---|
| 91 |
res.append(r) |
|---|
| 92 |
self.blacklist = res |
|---|
| 93 |
self.blacklist_compiled = [_parseLine(r) for r in res] |
|---|
| 94 |
|
|---|
| 95 |
def blackListAndRemove(self, trbacks_checked): |
|---|
| 96 |
""" add the entries to black list remove them and the similar ones """ |
|---|
| 97 |
if not trbacks_checked: |
|---|
| 98 |
return 0, 0 |
|---|
| 99 |
uid_catalog = getToolByName(self, 'uid_catalog') |
|---|
| 100 |
catalog = getToolByName(self, 'portal_catalog') |
|---|
| 101 |
urls = [] |
|---|
| 102 |
counter = 0 |
|---|
| 103 |
bl_counter = 0 |
|---|
| 104 |
for trb in trbacks_checked: |
|---|
| 105 |
obj = uid_catalog(UID=trb)[0].getObject() |
|---|
| 106 |
geturl = obj.getUrl() |
|---|
| 107 |
urls.append(geturl) |
|---|
| 108 |
url = stripUrl(geturl) |
|---|
| 109 |
if url not in self.blacklist: |
|---|
| 110 |
self.blacklist.append(url) |
|---|
| 111 |
self.blacklist_compiled.append(_parseLine(url)) |
|---|
| 112 |
bl_counter += 1 |
|---|
| 113 |
obj.aq_parent.manage_delObjects([obj.getId()]) |
|---|
| 114 |
counter += 1 |
|---|
| 115 |
res = catalog(portal_type='TrackBack', review_state='pending',sort_on='Date') |
|---|
| 116 |
for r in res: |
|---|
| 117 |
obj = r.getObject() |
|---|
| 118 |
if obj.getUrl() in urls: |
|---|
| 119 |
obj.aq_parent.manage_delObjects([obj.getId()]) |
|---|
| 120 |
counter += 1 |
|---|
| 121 |
return bl_counter, counter |
|---|
| 122 |
|
|---|
| 123 |
|
|---|
| 124 |
InitializeClass(TrackSpamTool) |
|---|