source: products/quintagroup.seoptimizer/trunk/quintagroup/seoptimizer/browser/keywords.py @ 2901

Last change on this file since 2901 was 2901, checked in by mylan, 14 years ago

#233: Check SEO keywords: Added error handling, return reasonable info in case of error

  • Property svn:eol-style set to native
File size: 3.1 KB
Line 
1import re, commands, urllib2
2from xml.dom import Node
3
4from zope.interface import implements
5from zope.component import queryAdapter
6from zope.component import queryMultiAdapter
7from Products.Five.browser import BrowserView
8
9from Products.CMFPlone.utils import safe_unicode, getSiteEncoding
10from Products.CMFCore.utils import getToolByName
11
12from interfaces import IValidateSEOKeywordsView
13from quintagroup.seoptimizer import SeoptimizerMessageFactory as _
14from quintagroup.seoptimizer.browser.seo_configlet import ISEOConfigletSchema
15
16class ValidateSEOKeywordsView(BrowserView):
17
18    implements(IValidateSEOKeywordsView)
19
20    def validateKeywords(self):
21        """ see interface """
22        text = self.request.get('text')
23        ts = getToolByName(self.context, 'translation_service')
24        transforms = getToolByName(self.context, 'portal_transforms')
25        portal = getToolByName(self.context, 'portal_url').getPortalObject()
26        isExternal = queryAdapter(portal, ISEOConfigletSchema).external_keywords_test
27        # extract keywords from text
28        enc = getSiteEncoding(self.context)
29        if text.lower().strip():
30            keywords = filter(None, map(lambda x: safe_unicode(x.strip(), enc),
31                                         text.lower().strip().split('\n')))
32        else:
33            return ts.utranslate(domain='quintagroup.seoptimizer',
34                                 msgid=_(u'Keywords list is empty!'),
35                                 context=self.context)
36        # Get html page internally or with external request
37        if isExternal:
38            # Not pass timeout option because:
39            # 1. its value get from the global default timeout settings by default.
40            # 2. timeout option added in python 2.6 (so acceptable only in plone4+)
41            try:
42                try:
43                    resp = urllib2.urlopen(self.context.absolute_url())
44                    html = resp.read()
45                finally:
46                    'resp' in locals().keys() and resp.close()
47            except Exception:
48                # In case of exceed timeout period or other URL connection errors.
49                html = None
50        else:
51            html = unicode(self.context()).encode(enc)
52
53        # If no html - information about problem with page retrieval should be returned
54        result = []
55        if html is not None:
56            page_text = transforms.convert("html_to_text", html).getData()
57            # check every keyword on appearing in body of html page
58            for keyword in keywords:
59                keyword_on_page = unicode(len(re.findall(u'\\b%s\\b' % keyword, page_text, re.I|re.U)))
60                result.append(' - '.join((keyword, keyword_on_page)))
61        else:
62            result.append("Problem with page retrieval")
63
64        return ts.utranslate(domain='quintagroup.seoptimizer',
65                             msgid=_(u'number_keywords',
66                               default=u'Number of keywords at page:\n${result}',
67                               mapping={'result':'\n'.join(result)}),
68                             context=self.context)
Note: See TracBrowser for help on using the repository browser.