source: products/quintagroup.seoptimizer/branches/refactoring2.3.0/quintagroup/seoptimizer/browser/keywords.py @ 1958

Last change on this file since 1958 was 1958, checked in by liebster, 14 years ago

Clean-up code http://codereview.corp.quintagroup.com/40241/show

  • Property svn:eol-style set to native
File size: 2.3 KB
Line 
1import re, commands
2from xml.dom import Node
3
4from zope.interface import implements
5from Products.Five.browser import BrowserView
6
7from Products.CMFPlone.utils import safe_unicode, getSiteEncoding
8from Products.CMFCore.utils import getToolByName
9
10from interfaces import IValidateSEOKeywordsView
11from quintagroup.seoptimizer import SeoptimizerMessageFactory as _
12
13class ValidateSEOKeywordsView(BrowserView):
14
15    implements(IValidateSEOKeywordsView)
16
17    def validateKeywords(self):
18        """ see interface """
19        text = self.request.get('text')
20        ts = getToolByName(self.context, 'translation_service')
21        # extract keywords from text
22        enc = getSiteEncoding(self.context)
23        if text.lower().strip():
24            keywords = filter(None, map(lambda x: safe_unicode(x.strip(), enc),
25                                         text.lower().strip().split('\n')))
26        else:
27            return ts.utranslate(domain='quintagroup.seoptimizer',
28                                 msgid=_(u'Keywords list is empty!'),
29                                 context=self.context)
30        # request html page of context object
31        url = '%s?without_metatag_keywords=1' % self.context.absolute_url()
32
33        # extract words from url page using lynx browser (test page by 'url'
34        # randered without metatag keywords)
35        page_text = commands.getoutput('lynx --dump --nolist %s' % url).lower()
36        if page_text and page_text != 'sh: lynx: command not found':
37            page_text = safe_unicode(page_text, 'utf-8')
38        else:
39            return ts.utranslate(domain='quintagroup.seoptimizer',
40                                 msgid=_(u'Could not find lynx browser!'),
41                                 context=self.context)
42
43        # check every keyword on appearing in body of html page
44        result = []
45        for keyword in keywords:
46            keyword_on_page = unicode(len(re.findall(u'\\b%s\\b' % keyword, page_text, re.I|re.U)))
47            result.append(' - '.join((keyword, keyword_on_page)))
48        return ts.utranslate(domain='quintagroup.seoptimizer',
49                             msgid=_(u'number_keywords',
50                               default=u'Number of keywords at page:\n${result}',
51                               mapping={'result':'\n'.join(result)}),
52                             context=self.context)
Note: See TracBrowser for help on using the repository browser.