| 1 |
import urllib, re, os, commands |
|---|
| 2 |
from xml.dom import minidom, Node |
|---|
| 3 |
|
|---|
| 4 |
from zope.interface import implements |
|---|
| 5 |
from Products.Five.browser import BrowserView |
|---|
| 6 |
|
|---|
| 7 |
from Products.CMFCore.utils import getToolByName |
|---|
| 8 |
from Products.CMFPlone import PloneMessageFactory as _ |
|---|
| 9 |
|
|---|
| 10 |
from interfaces import IValidateSEOKeywordsView |
|---|
| 11 |
|
|---|
| 12 |
class ValidateSEOKeywordsView(BrowserView): |
|---|
| 13 |
|
|---|
| 14 |
implements(IValidateSEOKeywordsView) |
|---|
| 15 |
|
|---|
| 16 |
def validateKeywords(self, text): |
|---|
| 17 |
""" see interface """ |
|---|
| 18 |
|
|---|
| 19 |
|
|---|
| 20 |
if not text.strip(): |
|---|
| 21 |
return _(u'Keywords list is empty!') |
|---|
| 22 |
|
|---|
| 23 |
keywords = map(lambda x: x.strip(), text.lower().split('\n')) |
|---|
| 24 |
if not keywords: |
|---|
| 25 |
return _(u'Keywords list is empty!') |
|---|
| 26 |
|
|---|
| 27 |
|
|---|
| 28 |
url = '%s?qseo_without_additional_keywords=1' % self.context.absolute_url() |
|---|
| 29 |
|
|---|
| 30 |
|
|---|
| 31 |
|
|---|
| 32 |
|
|---|
| 33 |
|
|---|
| 34 |
|
|---|
| 35 |
|
|---|
| 36 |
|
|---|
| 37 |
|
|---|
| 38 |
|
|---|
| 39 |
|
|---|
| 40 |
|
|---|
| 41 |
|
|---|
| 42 |
|
|---|
| 43 |
|
|---|
| 44 |
|
|---|
| 45 |
|
|---|
| 46 |
|
|---|
| 47 |
|
|---|
| 48 |
|
|---|
| 49 |
|
|---|
| 50 |
|
|---|
| 51 |
|
|---|
| 52 |
|
|---|
| 53 |
|
|---|
| 54 |
|
|---|
| 55 |
|
|---|
| 56 |
|
|---|
| 57 |
|
|---|
| 58 |
|
|---|
| 59 |
|
|---|
| 60 |
|
|---|
| 61 |
|
|---|
| 62 |
|
|---|
| 63 |
page_text = commands.getoutput('lynx --dump --nolist %s' % url).lower() |
|---|
| 64 |
if page_text and page_text != 'sh: lynx: command not found': |
|---|
| 65 |
|
|---|
| 66 |
page_text = page_text |
|---|
| 67 |
else: |
|---|
| 68 |
return _(u'Could not find lynx browser!') |
|---|
| 69 |
|
|---|
| 70 |
|
|---|
| 71 |
missing = [] |
|---|
| 72 |
added = {} |
|---|
| 73 |
for keyword in keywords: |
|---|
| 74 |
if keyword not in added.keys() and not re.compile(r'\b%s\b' % keyword, re.I).search(page_text): |
|---|
| 75 |
missing.append(keyword) |
|---|
| 76 |
added[keyword] = 1 |
|---|
| 77 |
|
|---|
| 78 |
|
|---|
| 79 |
if missing: |
|---|
| 80 |
msg = u"""Next keywords did not appear on the page:\n%s""" % '\n'.join(missing) |
|---|
| 81 |
else: |
|---|
| 82 |
msg = u"""All keywords found on the page!""" |
|---|
| 83 |
return _(msg) |
|---|
| 84 |
|
|---|
| 85 |
def walkTextNodes(self, parent, page_words=[]): |
|---|
| 86 |
for node in parent.childNodes: |
|---|
| 87 |
if node.nodeType == Node.ELEMENT_NODE: |
|---|
| 88 |
self.walkTextNodes(node, page_words) |
|---|
| 89 |
elif node.nodeType == Node.TEXT_NODE: |
|---|
| 90 |
value = node.nodeValue |
|---|
| 91 |
if value is not None: |
|---|
| 92 |
page_words.extend(map(lambda x: x.lower(), value.split())) |
|---|
| 93 |
|
|---|
| 94 |
def strip_tags(self, in_text): |
|---|
| 95 |
s_list = list(in_text) |
|---|
| 96 |
i,j = 0,0 |
|---|
| 97 |
|
|---|
| 98 |
while i < len(s_list): |
|---|
| 99 |
if s_list[i] == '<': |
|---|
| 100 |
while s_list[i] != '>': |
|---|
| 101 |
|
|---|
| 102 |
s_list.pop(i) |
|---|
| 103 |
|
|---|
| 104 |
|
|---|
| 105 |
s_list.pop(i) |
|---|
| 106 |
else: |
|---|
| 107 |
i=i+1 |
|---|
| 108 |
|
|---|
| 109 |
|
|---|
| 110 |
join_char='' |
|---|
| 111 |
return join_char.join(s_list) |
|---|