Changeset 1885 in products for quintagroup.seoptimizer/branches/refactoring2.3.0/quintagroup/seoptimizer/browser
- Timestamp:
- Mar 11, 2010 1:13:01 PM (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
quintagroup.seoptimizer/branches/refactoring2.3.0/quintagroup/seoptimizer/browser/keywords.py
r1852 r1885 5 5 from Products.Five.browser import BrowserView 6 6 7 from Products.CMFPlone.utils import safe_unicode, getSiteEncoding 7 8 from Products.CMFCore.utils import getToolByName 8 9 … … 18 19 ts = getToolByName(self.context, 'translation_service') 19 20 # extract keywords from text 21 enc = getSiteEncoding(self.context) 20 22 if text.lower().strip(): 21 keywords = map(lambda x: x.strip(), text.lower().strip().split('\n')) 23 keywords = filter(None, map(lambda x: safe_unicode(x.strip(), enc), 24 text.lower().strip().split('\n'))) 22 25 else: 23 return ts.utranslate(domain='quintagroup.seoptimizer', msgid=_(u'Keywords list is empty!'), context=self.context) 26 return ts.utranslate(domain='quintagroup.seoptimizer', 27 msgid=_(u'Keywords list is empty!'), 28 context=self.context) 24 29 # request html page of context object 25 30 url = '%s?without_metatag_keywords=1' % self.context.absolute_url() … … 28 33 page_text = commands.getoutput('lynx --dump --nolist %s' % url).lower() 29 34 if page_text and page_text != 'sh: lynx: command not found': 30 page_text = page_text.decode('utf8')35 page_text = safe_unicode(page_text, 'utf-8') 31 36 else: 32 return ts.utranslate(domain='quintagroup.seoptimizer', msgid=_(u'Could not find lynx browser!'), context=self.context) 37 return ts.utranslate(domain='quintagroup.seoptimizer', 38 msgid=_(u'Could not find lynx browser!'), 39 context=self.context) 33 40 34 41 # check every keyword on appearing in body of html page 35 missing = [] 36 finding = [] 37 added = {} 38 finded = {} 42 result = [] 39 43 for keyword in keywords: 40 keyword = keyword.decode('utf8') 41 if keyword: 42 keyword_on_page = len(re.findall(u'\\b%s\\b' % keyword, page_text, re.I|re.U)) 43 if keyword not in added.keys() and not keyword_on_page: 44 missing.append(keyword+u' - 0') 45 added[keyword] = 1 46 if keyword not in finded.keys() and keyword_on_page: 47 finding.append(keyword+u' - '+repr(keyword_on_page)) 48 finded[keyword] = 1 49 # return list of missing and fount keywords 50 if missing or finding: 51 msg = ts.utranslate(domain='quintagroup.seoptimizer', msgid=_(u'number_keywords', 52 default=u'Number of keywords at page:\n${found}\n${missing}', 53 mapping={'missing':'\n'.join(missing), 'found': '\n'.join(finding)}), 54 context=self.context) 55 else: 56 msg = '' 57 return msg 44 keyword_on_page = unicode(len(re.findall(u'\\b%s\\b' % keyword, page_text, re.I|re.U))) 45 result.append(' - '.join((keyword, keyword_on_page))) 46 return ts.utranslate(domain='quintagroup.seoptimizer', 47 msgid=_(u'number_keywords', 48 default=u'Number of keywords at page:\n${result}', 49 mapping={'result':'\n'.join(result)}), 50 context=self.context) 58 51 59 52 def walkTextNodes(self, parent, page_words=[]):
Note: See TracChangeset
for help on using the changeset viewer.