Changeset 2895 in products for quintagroup.seoptimizer/trunk/quintagroup/seoptimizer/browser/keywords.py
- Timestamp:
- Oct 19, 2010 5:52:01 PM (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
quintagroup.seoptimizer/trunk/quintagroup/seoptimizer/browser/keywords.py
r2890 r2895 1 import re, commands 1 import re, commands, urllib2 2 2 from xml.dom import Node 3 3 4 4 from zope.interface import implements 5 from zope.component import queryAdapter 6 from zope.component import queryMultiAdapter 5 7 from Products.Five.browser import BrowserView 6 8 … … 10 12 from interfaces import IValidateSEOKeywordsView 11 13 from quintagroup.seoptimizer import SeoptimizerMessageFactory as _ 12 13 #from pyquery import PyQuery as pq 14 from quintagroup.seoptimizer.browser.seo_configlet import ISEOConfigletSchema 14 15 15 16 class ValidateSEOKeywordsView(BrowserView): … … 21 22 text = self.request.get('text') 22 23 ts = getToolByName(self.context, 'translation_service') 24 transforms = getToolByName(self.context, 'portal_transforms') 25 portal = getToolByName(self.context, 'portal_url').getPortalObject() 26 isExternal = queryAdapter(portal, ISEOConfigletSchema).external_keywords_test 23 27 # extract keywords from text 24 28 enc = getSiteEncoding(self.context) … … 31 35 context=self.context) 32 36 # request html page of context object 33 url = '%s?without_metatag_keywords=1' % self.context.absolute_url() 34 35 # extract words from url page using lynx browser (test page by 'url' 36 # randered without metatag keywords) 37 #import pdb;pdb.set_trace() 38 page_text = commands.getoutput('lynx --dump --nolist %s' % url).lower() 39 if page_text and page_text != 'sh: lynx: command not found': 40 page_text = safe_unicode(page_text, 'utf-8') 37 if isExternal: 38 # Not pass timeout option because: 39 # 1. its value get from the global default timeout settings by default. 40 # 2. timeout option added in python 2.6 (so acceptable only in plone4+) 41 try: 42 html = urllib2.urlopen(self.context.absolute_url()) 43 except urllib2.URLError: 44 # In case of exceed timeout period 45 # or other URL connection errors. 46 html = unicode(self.context()).encode(enc) 41 47 else: 42 return ts.utranslate(domain='quintagroup.seoptimizer', 43 msgid=_(u'Could not find lynx browser!'), 44 context=self.context) 45 46 # html = self.context() 47 # page_text = pq("body", html).text() 48 48 html = unicode(self.context()).encode(enc) 49 page_text = transforms.convert("html_to_text", html).getData() 50 49 51 # check every keyword on appearing in body of html page 50 52 result = []
Note: See TracChangeset
for help on using the changeset viewer.