1 | import re, sys, urllib2 |
---|
2 | from xml.dom import Node |
---|
3 | |
---|
4 | from zope.interface import implements |
---|
5 | from zope.component import getUtility |
---|
6 | from zope.component import queryAdapter |
---|
7 | |
---|
8 | from Acquisition import aq_acquire |
---|
9 | from Products.Five.browser import BrowserView |
---|
10 | |
---|
11 | from Products.CMFCore.utils import getToolByName |
---|
12 | from Products.CMFPlone.utils import safe_unicode, getSiteEncoding |
---|
13 | from Products.PortalTransforms.interfaces import IPortalTransformsTool |
---|
14 | |
---|
15 | from interfaces import IValidateSEOKeywordsView |
---|
16 | from quintagroup.seoptimizer import SeoptimizerMessageFactory as _ |
---|
17 | from quintagroup.seoptimizer.browser.seo_configlet import ISEOConfigletSchema |
---|
18 | |
---|
19 | class ValidateSEOKeywordsView(BrowserView): |
---|
20 | |
---|
21 | implements(IValidateSEOKeywordsView) |
---|
22 | |
---|
23 | def validateKeywords(self): |
---|
24 | """ see interface """ |
---|
25 | text = self.request.get('text') |
---|
26 | ts = getToolByName(self.context, 'translation_service') |
---|
27 | transforms = getUtility(IPortalTransformsTool) |
---|
28 | portal = getToolByName(self.context, 'portal_url').getPortalObject() |
---|
29 | isExternal = queryAdapter(portal, ISEOConfigletSchema).external_keywords_test |
---|
30 | # extract keywords from text |
---|
31 | enc = getSiteEncoding(self.context) |
---|
32 | if text.lower().strip(): |
---|
33 | keywords = filter(None, map(lambda x: safe_unicode(x.strip(), enc), |
---|
34 | text.lower().strip().split('\n'))) |
---|
35 | else: |
---|
36 | return ts.utranslate(domain='quintagroup.seoptimizer', |
---|
37 | msgid=_(u'Keywords list is empty!'), |
---|
38 | context=self.context) |
---|
39 | # Get html page internally or with external request |
---|
40 | error_url = "" |
---|
41 | if isExternal: |
---|
42 | # Not pass timeout option because: |
---|
43 | # 1. its value get from the global default timeout settings. |
---|
44 | # 2. timeout option added in python 2.6 (so acceptable only in plone4+) |
---|
45 | try: |
---|
46 | resp = urllib2.urlopen(self.context.absolute_url()) |
---|
47 | try: |
---|
48 | html = resp.read() |
---|
49 | finally: |
---|
50 | resp.close() |
---|
51 | except (urllib2.URLError, urllib2.HTTPError), e: |
---|
52 | # In case of exceed timeout period or other URL connection errors. |
---|
53 | # Get nearest to context error_log object (stolen from Zope2/App/startup.py) |
---|
54 | html = None |
---|
55 | info = sys.exc_info() |
---|
56 | try: |
---|
57 | elog = aq_acquire(self.context, '__error_log__', containment=1) |
---|
58 | except AttributeError: |
---|
59 | pass |
---|
60 | else: |
---|
61 | error_url = elog.raising(info) |
---|
62 | else: |
---|
63 | html = unicode(self.context()).encode(enc) |
---|
64 | |
---|
65 | # If no html - information about problem with page retrieval should be returned |
---|
66 | result = [] |
---|
67 | if html is None: |
---|
68 | result.append("Problem with page retrieval.") |
---|
69 | if error_url: |
---|
70 | result.append("Details at %s." % error_url) |
---|
71 | else: |
---|
72 | page_text = transforms.convert("html_to_text", html).getData() |
---|
73 | # check every keyword on appearing in body of html page |
---|
74 | for keyword in keywords: |
---|
75 | keyword_on_page = unicode(len(re.findall(u'\\b%s\\b' % keyword, page_text, re.I|re.U))) |
---|
76 | result.append(' - '.join((keyword, keyword_on_page))) |
---|
77 | |
---|
78 | return ts.utranslate(domain='quintagroup.seoptimizer', |
---|
79 | msgid=_(u'number_keywords', |
---|
80 | default=u'Number of keywords at page:\n${result}', |
---|
81 | mapping={'result':'\n'.join(result)}), |
---|
82 | context=self.context) |
---|