Context Navigation

source: products/quintagroup.seoptimizer/branches/refactoring2.3.0/quintagroup/seoptimizer/browser/keywords.py @ 1901

Last change on this file since 1901 was 1885, checked in by liebster, 15 years ago
#165: Refactoring using Check Keywords in seo-context-properties view
Property svn:eol-style set to `native`
File size: 3.2 KB

Line
1	import urllib, re, os, commands
2	from xml.dom import minidom, Node
3
4	from zope.interface import implements
5	from Products.Five.browser import BrowserView
6
7	from Products.CMFPlone.utils import safe_unicode, getSiteEncoding
8	from Products.CMFCore.utils import getToolByName
9
10	from interfaces import IValidateSEOKeywordsView
11	from quintagroup.seoptimizer import SeoptimizerMessageFactory as _
12
13	class ValidateSEOKeywordsView(BrowserView):
14
15	implements(IValidateSEOKeywordsView)
16
17	def validateKeywords(self, text):
18	""" see interface """
19	ts = getToolByName(self.context, 'translation_service')
20	# extract keywords from text
21	enc = getSiteEncoding(self.context)
22	if text.lower().strip():
23	keywords = filter(None, map(lambda x: safe_unicode(x.strip(), enc),
24	text.lower().strip().split('\n')))
25	else:
26	return ts.utranslate(domain='quintagroup.seoptimizer',
27	msgid=_(u'Keywords list is empty!'),
28	context=self.context)
29	# request html page of context object
30	url = '%s?without_metatag_keywords=1' % self.context.absolute_url()
31
32	# extract words from url page using lynx browser (test page by 'url' randered without metatag keywords)
33	page_text = commands.getoutput('lynx --dump --nolist %s' % url).lower()
34	if page_text and page_text != 'sh: lynx: command not found':
35	page_text = safe_unicode(page_text, 'utf-8')
36	else:
37	return ts.utranslate(domain='quintagroup.seoptimizer',
38	msgid=_(u'Could not find lynx browser!'),
39	context=self.context)
40
41	# check every keyword on appearing in body of html page
42	result = []
43	for keyword in keywords:
44	keyword_on_page = unicode(len(re.findall(u'\\b%s\\b' % keyword, page_text, re.I\|re.U)))
45	result.append(' - '.join((keyword, keyword_on_page)))
46	return ts.utranslate(domain='quintagroup.seoptimizer',
47	msgid=_(u'number_keywords',
48	default=u'Number of keywords at page:\n${result}',
49	mapping={'result':'\n'.join(result)}),
50	context=self.context)
51
52	def walkTextNodes(self, parent, page_words=[]):
53	for node in parent.childNodes:
54	if node.nodeType == Node.ELEMENT_NODE:
55	self.walkTextNodes(node, page_words)
56	elif node.nodeType == Node.TEXT_NODE:
57	value = node.nodeValue
58	if value is not None:
59	page_words.extend(map(lambda x: x.lower(), value.split()))
60
61	def strip_tags(self, in_text):
62	s_list = list(in_text)
63	i,j = 0,0
64
65	while i < len(s_list):
66	if s_list[i] == '<':
67	while s_list[i] != '>':
68	# pop everything from the the left-angle bracket until the right-angle bracket
69	s_list.pop(i)
70
71	# pops the right-angle bracket, too
72	s_list.pop(i)
73	else:
74	i=i+1
75
76	# convert the list back into text
77	join_char=''
78	return join_char.join(s_list)

Note: See TracBrowser for help on using the repository browser.

Download in other formats: