source: products/quintagroup.seoptimizer/trunk/quintagroup/seoptimizer/browser/keywords.py @ 2932

Last change on this file since 2932 was 2932, checked in by mylan, 14 years ago

#233: Force logging keywords calculation to nearest to context error_log object. Other minor fixes

  • Property svn:eol-style set to native
File size: 3.6 KB
Line 
1import re, sys, urllib2
2from xml.dom import Node
3
4from zope.interface import implements
5from zope.component import getUtility
6from zope.component import queryAdapter
7
8from Acquisition import aq_acquire
9from Products.Five.browser import BrowserView
10
11from Products.CMFCore.utils import getToolByName
12from Products.CMFPlone.utils import safe_unicode, getSiteEncoding
13from Products.PortalTransforms.interfaces import IPortalTransformsTool
14
15from interfaces import IValidateSEOKeywordsView
16from quintagroup.seoptimizer import SeoptimizerMessageFactory as _
17from quintagroup.seoptimizer.browser.seo_configlet import ISEOConfigletSchema
18
19class ValidateSEOKeywordsView(BrowserView):
20
21    implements(IValidateSEOKeywordsView)
22
23    def validateKeywords(self):
24        """ see interface """
25        text = self.request.get('text')
26        ts = getToolByName(self.context, 'translation_service')
27        transforms = getUtility(IPortalTransformsTool)
28        portal = getToolByName(self.context, 'portal_url').getPortalObject()
29        isExternal = queryAdapter(portal, ISEOConfigletSchema).external_keywords_test
30        # extract keywords from text
31        enc = getSiteEncoding(self.context)
32        if text.lower().strip():
33            keywords = filter(None, map(lambda x: safe_unicode(x.strip(), enc),
34                                         text.lower().strip().split('\n')))
35        else:
36            return ts.utranslate(domain='quintagroup.seoptimizer',
37                                 msgid=_(u'Keywords list is empty!'),
38                                 context=self.context)
39        # Get html page internally or with external request
40        error_url = ""
41        if isExternal:
42            # Not pass timeout option because:
43            # 1. its value get from the global default timeout settings.
44            # 2. timeout option added in python 2.6 (so acceptable only in plone4+)
45            try:
46                resp = urllib2.urlopen(self.context.absolute_url())
47                try:
48                    html = resp.read()
49                finally:
50                    resp.close()
51            except (urllib2.URLError, urllib2.HTTPError), e:
52                # In case of exceed timeout period or other URL connection errors.
53                # Get nearest to context error_log object (stolen from Zope2/App/startup.py)
54                html = None
55                info = sys.exc_info()
56                try:
57                    elog = aq_acquire(self.context, '__error_log__', containment=1)
58                except AttributeError:
59                    pass
60                else:
61                    error_url = elog.raising(info)
62        else:
63            html = unicode(self.context()).encode(enc)
64
65        # If no html - information about problem with page retrieval should be returned
66        result = []
67        if html is None:
68            result.append("Problem with page retrieval.")
69            if error_url:
70                result.append("Details at %s." % error_url)
71        else:
72            page_text = transforms.convert("html_to_text", html).getData()
73            # check every keyword on appearing in body of html page
74            for keyword in keywords:
75                keyword_on_page = unicode(len(re.findall(u'\\b%s\\b' % keyword, page_text, re.I|re.U)))
76                result.append(' - '.join((keyword, keyword_on_page)))
77
78        return ts.utranslate(domain='quintagroup.seoptimizer',
79                             msgid=_(u'number_keywords',
80                               default=u'Number of keywords at page:\n${result}',
81                               mapping={'result':'\n'.join(result)}),
82                             context=self.context)
Note: See TracBrowser for help on using the repository browser.