source: products/quintagroup.seoptimizer/trunk/quintagroup/seoptimizer/browser/keywords.py @ 2930

Last change on this file since 2930 was 2930, checked in by mylan, 14 years ago

#233: Force to get portal_transforms as utility

  • Property svn:eol-style set to native
File size: 3.5 KB
Line 
1import re, sys, urllib2
2from xml.dom import Node
3
4from zope.interface import implements
5from zope.component import getUtility
6from zope.component import queryAdapter
7#from zope.component import queryMultiAdapter
8from Products.Five.browser import BrowserView
9
10from Products.CMFCore.utils import getToolByName
11from Products.CMFPlone.utils import safe_unicode, getSiteEncoding
12from Products.PortalTransforms.interfaces import IPortalTransformsTool
13
14from interfaces import IValidateSEOKeywordsView
15from quintagroup.seoptimizer import SeoptimizerMessageFactory as _
16from quintagroup.seoptimizer.browser.seo_configlet import ISEOConfigletSchema
17
18class ValidateSEOKeywordsView(BrowserView):
19
20    implements(IValidateSEOKeywordsView)
21
22    def validateKeywords(self):
23        """ see interface """
24        text = self.request.get('text')
25        ts = getToolByName(self.context, 'translation_service')
26        transforms = getUtility(IPortalTransformsTool)
27        portal = getToolByName(self.context, 'portal_url').getPortalObject()
28        isExternal = queryAdapter(portal, ISEOConfigletSchema).external_keywords_test
29        # extract keywords from text
30        enc = getSiteEncoding(self.context)
31        if text.lower().strip():
32            keywords = filter(None, map(lambda x: safe_unicode(x.strip(), enc),
33                                         text.lower().strip().split('\n')))
34        else:
35            return ts.utranslate(domain='quintagroup.seoptimizer',
36                                 msgid=_(u'Keywords list is empty!'),
37                                 context=self.context)
38        # Get html page internally or with external request
39        error_url = ""
40        if isExternal:
41            # Not pass timeout option because:
42            # 1. its value get from the global default timeout settings.
43            # 2. timeout option added in python 2.6 (so acceptable only in plone4+)
44            try:
45                try:
46                    resp = urllib2.urlopen(self.context.absolute_url())
47                    html = resp.read()
48                finally:
49                    if 'resp' in locals().keys():
50                        resp.close()
51            except (urllib2.URLError, urllib2.HTTPError), e:
52                # In case of exceed timeout period or other URL connection errors.
53                info = sys.exc_info()
54                elog = getToolByName(self.context, "error_log")
55                if elog:
56                    error_url = elog.raising(info)
57                html = None
58        else:
59            html = unicode(self.context()).encode(enc)
60
61        # If no html - information about problem with page retrieval should be returned
62        result = []
63        if html is None:
64            sfx = error_url and ", details at %s." % error_url or "."
65            result.append("Problem with page retrieval" + sfx)
66        else:
67            page_text = transforms.convert("html_to_text", html).getData()
68            # check every keyword on appearing in body of html page
69            for keyword in keywords:
70                keyword_on_page = unicode(len(re.findall(u'\\b%s\\b' % keyword, page_text, re.I|re.U)))
71                result.append(' - '.join((keyword, keyword_on_page)))
72
73        return ts.utranslate(domain='quintagroup.seoptimizer',
74                             msgid=_(u'number_keywords',
75                               default=u'Number of keywords at page:\n${result}',
76                               mapping={'result':'\n'.join(result)}),
77                             context=self.context)
Note: See TracBrowser for help on using the repository browser.