| [1593] | 1 | from zope.interface import implements, Interface, Attribute |
|---|
| [3002] | 2 | from zope.component import queryMultiAdapter |
|---|
| [1593] | 3 | |
|---|
| [2742] | 4 | from Acquisition import aq_inner, aq_parent |
|---|
| [1593] | 5 | from Products.Five import BrowserView |
|---|
| 6 | from Products.CMFCore.utils import getToolByName |
|---|
| 7 | |
|---|
| [3163] | 8 | #from quintagroup.plonegooglesitemaps \ |
|---|
| 9 | # import qPloneGoogleSitemapsMessageFactory as _ |
|---|
| [3002] | 10 | from quintagroup.plonegooglesitemaps.interfaces import IBlackoutFilter |
|---|
| [3152] | 11 | from quintagroup.plonegooglesitemaps.browser.utils import additionalURLs, \ |
|---|
| [3480] | 12 | getUrlsObjects, urlFilter, OPERATIONS_PARSE |
|---|
| [1593] | 13 | |
|---|
| [3480] | 14 | from itertools import chain, starmap |
|---|
| [1593] | 15 | |
|---|
| [3480] | 16 | |
|---|
| 17 | SITEMAP_SIZE = 50000 |
|---|
| 18 | |
|---|
| [1593] | 19 | class ISitemapView(Interface): |
|---|
| 20 | """ |
|---|
| 21 | Sitemap view interface |
|---|
| 22 | """ |
|---|
| 23 | |
|---|
| 24 | def results(): |
|---|
| 25 | """ Return list of dictionary objects |
|---|
| 26 | which confirm Sitemap conditions |
|---|
| 27 | """ |
|---|
| 28 | |
|---|
| 29 | def getAdditionalURLs(): |
|---|
| 30 | """ Return additional URL list |
|---|
| 31 | """ |
|---|
| 32 | |
|---|
| 33 | def updateRequest(): |
|---|
| 34 | """ Add compression header to RESPONSE |
|---|
| 35 | if allowed |
|---|
| 36 | """ |
|---|
| 37 | |
|---|
| 38 | numEntries = Attribute("Return number of entries") |
|---|
| 39 | |
|---|
| [3152] | 40 | |
|---|
| [1593] | 41 | class CommonSitemapView(BrowserView): |
|---|
| 42 | """ |
|---|
| 43 | Sitemap browser view |
|---|
| 44 | """ |
|---|
| 45 | implements(ISitemapView) |
|---|
| 46 | |
|---|
| 47 | # key, function map for extend return results |
|---|
| 48 | # with mapping data |
|---|
| 49 | additional_maps = () |
|---|
| 50 | |
|---|
| 51 | def __init__(self, context, request): |
|---|
| 52 | self.context = context |
|---|
| 53 | self.request = request |
|---|
| 54 | |
|---|
| 55 | @property |
|---|
| 56 | def portal_catalog(self): |
|---|
| 57 | return getToolByName(self.context, 'portal_catalog') |
|---|
| 58 | |
|---|
| 59 | @property |
|---|
| 60 | def portal(self): |
|---|
| 61 | return getToolByName(self.context, 'portal_url').getPortalObject() |
|---|
| 62 | |
|---|
| [2742] | 63 | @property |
|---|
| 64 | def search_path(self): |
|---|
| 65 | return '/'.join(aq_parent(aq_inner(self.context)).getPhysicalPath()) |
|---|
| 66 | |
|---|
| [1593] | 67 | def getFilteredObjects(self): |
|---|
| 68 | """ Return brains |
|---|
| 69 | """ |
|---|
| 70 | return [] |
|---|
| 71 | |
|---|
| [3480] | 72 | def defaultPagesInfo(self, func, url_filter=lambda x: x[0:x.rfind('/')]): |
|---|
| 73 | """ Method gets default page. |
|---|
| 74 | It dedicated to generate {'http://...': func(brain),...} |
|---|
| 75 | """ |
|---|
| 76 | objects = self.portal_catalog.searchResults(is_default_page=True) |
|---|
| 77 | # func - this fuction gets info from brain |
|---|
| 78 | # eg: func = lambda x: DateTime(x.ModificationDate)) |
|---|
| 79 | # url_filter - get parent object (url) |
|---|
| 80 | return dict( |
|---|
| 81 | (url_filter(url), func(brain)) |
|---|
| 82 | for |
|---|
| 83 | url, brain |
|---|
| 84 | in |
|---|
| 85 | getUrlsObjects(objects)) |
|---|
| 86 | |
|---|
| 87 | def getObjectsInfo(self, modification_date): |
|---|
| 88 | """ Gets info (list of tuples) for sitemap """ |
|---|
| 89 | # get all brains |
|---|
| 90 | objects = self.getBOFiltered(self.getFilteredObjects()) |
|---|
| 91 | for url, brain in getUrlsObjects(objects): |
|---|
| 92 | yield url, modification_date(brain), \ |
|---|
| 93 | [(key, func(brain)) |
|---|
| 94 | for |
|---|
| 95 | key, func |
|---|
| 96 | in |
|---|
| 97 | self.additional_maps.iteritems() |
|---|
| 98 | if key != modification_date.__name__] |
|---|
| 99 | |
|---|
| [1593] | 100 | def results(self): |
|---|
| 101 | """ Prepare mapping for template |
|---|
| 102 | """ |
|---|
| [3480] | 103 | operations = [OPERATIONS_PARSE.match(op).groups() |
|---|
| 104 | for op in self.context.getReg_exp()] |
|---|
| [1593] | 105 | |
|---|
| [3480] | 106 | # eg: additional_maps == {'modification_date': lambda x: xxx)} |
|---|
| 107 | modification_date = self.additional_maps['modification_date'] |
|---|
| 108 | urls_info = self.defaultPagesInfo(modification_date) |
|---|
| 109 | # after changing 'modification date' we'll change |
|---|
| 110 | # url according with filter |
|---|
| 111 | num_entries = 0 |
|---|
| 112 | for url, date, additional_info in self.getObjectsInfo(modification_date): |
|---|
| [1593] | 113 | |
|---|
| [3480] | 114 | # TODO: check number of result. |
|---|
| 115 | # A Sitemap file can contain no more than 50,000 URLs. |
|---|
| 116 | if num_entries > SITEMAP_SIZE: |
|---|
| 117 | break |
|---|
| 118 | |
|---|
| 119 | if url in urls_info: |
|---|
| 120 | default_page_modification_date = urls_info.get(url) |
|---|
| 121 | |
|---|
| 122 | # trying to update modification date |
|---|
| 123 | date = date if date > default_page_modification_date \ |
|---|
| 124 | else default_page_modification_date |
|---|
| 125 | |
|---|
| 126 | result = dict(additional_info) |
|---|
| 127 | result.update({'modification_date': date.HTML4(), |
|---|
| 128 | 'url': urlFilter(url, operations)}) |
|---|
| 129 | |
|---|
| 130 | num_entries += 1 |
|---|
| 131 | yield result |
|---|
| 132 | |
|---|
| [3002] | 133 | def getBOFiltered(self, objects): |
|---|
| 134 | """Return black-out filtered objects |
|---|
| 135 | Every record in blackout_list filter should follow the spec: |
|---|
| 136 | [<filter name>:]<filter arguments> |
|---|
| 137 | For example: |
|---|
| 138 | 1| index.html |
|---|
| 139 | 2| id:index.html |
|---|
| 140 | 3| path:/folder_1_level/obj_in_folder |
|---|
| 141 | 4| path:./folder_near_sitemap/obj_in_folder |
|---|
| 142 | 5| foo_filter:arg-1, arg-2 |
|---|
| [3152] | 143 | |
|---|
| [3002] | 144 | 1->used default "id" filter - remove "index.html" objects; |
|---|
| 145 | 2->explicit "id" filter - remove "index.html" objects; |
|---|
| 146 | 3->"path" filter - remove /folder_1_level/obj_in_folder object, |
|---|
| 147 | path from the root of the plone site; |
|---|
| [3152] | 148 | 4->same to 3), but path get from the folder, where sitemap is |
|---|
| 149 | located; |
|---|
| [3002] | 150 | 5->filter name is "foo_filter" (must be registered IBlackoutFilter, |
|---|
| 151 | named "foo_filter"), which get filter arguments: arg-1, arg-2 |
|---|
| [3152] | 152 | |
|---|
| [3002] | 153 | Detailed explanation look in filters.txt doctest. |
|---|
| [3152] | 154 | """ |
|---|
| [3002] | 155 | blackout_list = self.context.getBlackout_list() |
|---|
| 156 | for frec in blackout_list: |
|---|
| 157 | fspec = frec.split(":", 1) |
|---|
| 158 | fargs = fspec.pop() |
|---|
| 159 | fname = fspec and fspec.pop() or "id" |
|---|
| 160 | fengine = queryMultiAdapter((self.context, self.request), |
|---|
| 161 | interface=IBlackoutFilter, name=fname) |
|---|
| 162 | if fengine: |
|---|
| 163 | objects = list(fengine.filterOut(objects, fargs)) |
|---|
| 164 | return objects |
|---|
| 165 | |
|---|
| [1593] | 166 | def updateRequest(self): |
|---|
| 167 | self.request.RESPONSE.setHeader('Content-Type', 'text/xml') |
|---|
| 168 | try: |
|---|
| 169 | compression = self.context.enableHTTPCompression() |
|---|
| 170 | if compression: |
|---|
| 171 | compression(request=self.request) |
|---|
| 172 | except: |
|---|
| 173 | pass |
|---|
| 174 | |
|---|
| 175 | def getAdditionalURLs(self): |
|---|
| 176 | return additionalURLs(self.context) |
|---|
| 177 | |
|---|
| 178 | @property |
|---|
| 179 | def numEntries(self): |
|---|
| 180 | return len(self.results()) + len(self.getAdditionalURLs()) |
|---|