source: products/quintagroup.seoptimizer/trunk/quintagroup/seoptimizer/browser/keywords.py @ 1647

Last change on this file since 1647 was 1624, checked in by liebster, 14 years ago

Changed using the keyword taking into account the presence of at page

  • Property svn:eol-style set to native
File size: 3.2 KB
RevLine 
[387]1import urllib, re, os, commands
2from xml.dom import minidom, Node
3
4from zope.interface import implements
5from Products.Five.browser import BrowserView
6
7from Products.CMFCore.utils import getToolByName
8
9from interfaces import IValidateSEOKeywordsView
[1463]10from quintagroup.seoptimizer import SeoptimizerMessageFactory as _
[387]11
12class ValidateSEOKeywordsView(BrowserView):
13
14    implements(IValidateSEOKeywordsView)
15
16    def validateKeywords(self, text):
17        """ see interface """
[1466]18        ts = getToolByName(self.context, 'translation_service')
[387]19        # extract keywords from text
[1466]20        if text.lower().strip():
21            keywords = map(lambda x: x.strip(), text.lower().strip().split('\n'))
22        else:
23            return ts.utranslate(None, _(u'Keywords list is empty!'), context=self.context)
[387]24        # request html page of context object
[1624]25        url = '%s?without_metatag_keywords=1' % self.context.absolute_url()
[387]26
[1624]27        # extract words from url page using lynx browser (test page by 'url' randered without metatag keywords)
[387]28        page_text = commands.getoutput('lynx --dump --nolist %s' % url).lower()
29        if page_text and page_text != 'sh: lynx: command not found':
[1467]30            page_text = page_text.decode('utf8')
[387]31        else:
32            return _(u'Could not find lynx browser!')
33
34        # check every keyword on appearing in body of html page
35        missing = []
[1466]36        finding = []
[387]37        added = {}
[1466]38        finded = {}
[387]39        for keyword in keywords:
[1467]40            keyword = keyword.decode('utf8')
[1466]41            if keyword:
[1467]42                keyword_on_page =  len(re.findall(u'\\b%s\\b' % keyword, page_text, re.I|re.U))
[1466]43                if keyword not in added.keys() and not keyword_on_page:
[1491]44                    missing.append(keyword+u' - 0')
[1466]45                    added[keyword] = 1
46                if keyword not in finded.keys() and keyword_on_page:
[1467]47                    finding.append(keyword+u' - '+repr(keyword_on_page))
[1466]48                    finded[keyword] = 1
[1491]49        # return list of missing and fount keywords
50        if missing or finding:
51            msg = ts.utranslate(None, _('number_keywords', default=u'Number of keywords at page:\n${found}\n${missing}',
52                                mapping={'missing':'\n'.join(missing), 'found': '\n'.join(finding)}), context=self.context)
[387]53        else:
[1491]54            msg = ''
[1466]55        return msg
[387]56
57    def walkTextNodes(self, parent, page_words=[]):
58        for node in parent.childNodes:
59            if node.nodeType == Node.ELEMENT_NODE:
60                self.walkTextNodes(node, page_words)
61            elif node.nodeType == Node.TEXT_NODE:
62                value = node.nodeValue
63                if value is not None:
64                    page_words.extend(map(lambda x: x.lower(), value.split()))
65
66    def strip_tags(self, in_text):
67        s_list = list(in_text)
68        i,j = 0,0
69
70        while i < len(s_list):
71            if s_list[i] == '<':
72                while s_list[i] != '>':
73                    # pop everything from the the left-angle bracket until the right-angle bracket
74                    s_list.pop(i)
[1466]75
[387]76                # pops the right-angle bracket, too
77                s_list.pop(i)
78            else:
79                i=i+1
[1466]80
[387]81        # convert the list back into text
82        join_char=''
83        return join_char.join(s_list)
Note: See TracBrowser for help on using the repository browser.