Changeset 1467 in products


Ignore:
Timestamp:
Dec 23, 2009 1:00:43 PM (14 years ago)
Author:
liebster
Message:

Added calculation of the number of found words on the page

File:
1 edited

Legend:

Unmodified
Added
Removed
  • quintagroup.seoptimizer/trunk/quintagroup/seoptimizer/browser/keywords.py

    r1466 r1467  
    6161        if page_text and page_text != 'sh: lynx: command not found': 
    6262            #page_words = page_text.lower().split() 
    63             page_text = page_text 
     63            page_text = page_text.decode('utf8') 
    6464        else: 
    6565            return _(u'Could not find lynx browser!') 
     
    7171        finded = {} 
    7272        for keyword in keywords: 
     73            keyword = keyword.decode('utf8') 
    7374            if keyword: 
    74                 keyword_on_page =  len(re.compile(r'\s%s\s' % keyword, re.I).findall(page_text)) 
     75                keyword_on_page =  len(re.findall(u'\\b%s\\b' % keyword, page_text, re.I|re.U)) 
    7576                if keyword not in added.keys() and not keyword_on_page: 
    7677                    missing.append(keyword.decode('utf8')) 
    7778                    added[keyword] = 1 
    7879                if keyword not in finded.keys() and keyword_on_page: 
    79                     finding.append(keyword.decode('utf8')+' - '+repr(keyword_on_page)) 
     80                    finding.append(keyword+u' - '+repr(keyword_on_page)) 
    8081                    finded[keyword] = 1 
    8182        # return list of missing keywords 
Note: See TracChangeset for help on using the changeset viewer.