[1593] | 1 | from zope.interface import implements, Interface, Attribute |
---|
[3002] | 2 | from zope.component import queryMultiAdapter |
---|
[1593] | 3 | |
---|
[2742] | 4 | from Acquisition import aq_inner, aq_parent |
---|
[1593] | 5 | from Products.Five import BrowserView |
---|
| 6 | from Products.CMFCore.utils import getToolByName |
---|
| 7 | |
---|
[3163] | 8 | #from quintagroup.plonegooglesitemaps \ |
---|
| 9 | # import qPloneGoogleSitemapsMessageFactory as _ |
---|
[3002] | 10 | from quintagroup.plonegooglesitemaps.interfaces import IBlackoutFilter |
---|
[3152] | 11 | from quintagroup.plonegooglesitemaps.browser.utils import additionalURLs, \ |
---|
[3480] | 12 | getUrlsObjects, urlFilter, OPERATIONS_PARSE |
---|
[1593] | 13 | |
---|
[3480] | 14 | from itertools import chain, starmap |
---|
[1593] | 15 | |
---|
[3480] | 16 | |
---|
| 17 | SITEMAP_SIZE = 50000 |
---|
| 18 | |
---|
[1593] | 19 | class ISitemapView(Interface): |
---|
| 20 | """ |
---|
| 21 | Sitemap view interface |
---|
| 22 | """ |
---|
| 23 | |
---|
| 24 | def results(): |
---|
| 25 | """ Return list of dictionary objects |
---|
| 26 | which confirm Sitemap conditions |
---|
| 27 | """ |
---|
| 28 | |
---|
| 29 | def getAdditionalURLs(): |
---|
| 30 | """ Return additional URL list |
---|
| 31 | """ |
---|
| 32 | |
---|
| 33 | def updateRequest(): |
---|
| 34 | """ Add compression header to RESPONSE |
---|
| 35 | if allowed |
---|
| 36 | """ |
---|
| 37 | |
---|
| 38 | numEntries = Attribute("Return number of entries") |
---|
| 39 | |
---|
[3152] | 40 | |
---|
[1593] | 41 | class CommonSitemapView(BrowserView): |
---|
| 42 | """ |
---|
| 43 | Sitemap browser view |
---|
| 44 | """ |
---|
| 45 | implements(ISitemapView) |
---|
| 46 | |
---|
| 47 | # key, function map for extend return results |
---|
| 48 | # with mapping data |
---|
| 49 | additional_maps = () |
---|
| 50 | |
---|
| 51 | def __init__(self, context, request): |
---|
| 52 | self.context = context |
---|
| 53 | self.request = request |
---|
| 54 | |
---|
| 55 | @property |
---|
| 56 | def portal_catalog(self): |
---|
| 57 | return getToolByName(self.context, 'portal_catalog') |
---|
| 58 | |
---|
| 59 | @property |
---|
| 60 | def portal(self): |
---|
| 61 | return getToolByName(self.context, 'portal_url').getPortalObject() |
---|
| 62 | |
---|
[2742] | 63 | @property |
---|
| 64 | def search_path(self): |
---|
| 65 | return '/'.join(aq_parent(aq_inner(self.context)).getPhysicalPath()) |
---|
| 66 | |
---|
[1593] | 67 | def getFilteredObjects(self): |
---|
| 68 | """ Return brains |
---|
| 69 | """ |
---|
| 70 | return [] |
---|
| 71 | |
---|
[3480] | 72 | def defaultPagesInfo(self, func, url_filter=lambda x: x[0:x.rfind('/')]): |
---|
| 73 | """ Method gets default page. |
---|
| 74 | It dedicated to generate {'http://...': func(brain),...} |
---|
| 75 | """ |
---|
| 76 | objects = self.portal_catalog.searchResults(is_default_page=True) |
---|
| 77 | # func - this fuction gets info from brain |
---|
| 78 | # eg: func = lambda x: DateTime(x.ModificationDate)) |
---|
| 79 | # url_filter - get parent object (url) |
---|
| 80 | return dict( |
---|
| 81 | (url_filter(url), func(brain)) |
---|
| 82 | for |
---|
| 83 | url, brain |
---|
| 84 | in |
---|
| 85 | getUrlsObjects(objects)) |
---|
| 86 | |
---|
| 87 | def getObjectsInfo(self, modification_date): |
---|
| 88 | """ Gets info (list of tuples) for sitemap """ |
---|
| 89 | # get all brains |
---|
| 90 | objects = self.getBOFiltered(self.getFilteredObjects()) |
---|
| 91 | for url, brain in getUrlsObjects(objects): |
---|
| 92 | yield url, modification_date(brain), \ |
---|
| 93 | [(key, func(brain)) |
---|
| 94 | for |
---|
| 95 | key, func |
---|
| 96 | in |
---|
| 97 | self.additional_maps.iteritems() |
---|
| 98 | if key != modification_date.__name__] |
---|
| 99 | |
---|
[1593] | 100 | def results(self): |
---|
| 101 | """ Prepare mapping for template |
---|
| 102 | """ |
---|
[3480] | 103 | operations = [OPERATIONS_PARSE.match(op).groups() |
---|
| 104 | for op in self.context.getReg_exp()] |
---|
[1593] | 105 | |
---|
[3480] | 106 | # eg: additional_maps == {'modification_date': lambda x: xxx)} |
---|
| 107 | modification_date = self.additional_maps['modification_date'] |
---|
| 108 | urls_info = self.defaultPagesInfo(modification_date) |
---|
| 109 | # after changing 'modification date' we'll change |
---|
| 110 | # url according with filter |
---|
| 111 | num_entries = 0 |
---|
| 112 | for url, date, additional_info in self.getObjectsInfo(modification_date): |
---|
[1593] | 113 | |
---|
[3480] | 114 | # TODO: check number of result. |
---|
| 115 | # A Sitemap file can contain no more than 50,000 URLs. |
---|
| 116 | if num_entries > SITEMAP_SIZE: |
---|
| 117 | break |
---|
| 118 | |
---|
| 119 | if url in urls_info: |
---|
| 120 | default_page_modification_date = urls_info.get(url) |
---|
| 121 | |
---|
| 122 | # trying to update modification date |
---|
| 123 | date = date if date > default_page_modification_date \ |
---|
| 124 | else default_page_modification_date |
---|
| 125 | |
---|
| 126 | result = dict(additional_info) |
---|
| 127 | result.update({'modification_date': date.HTML4(), |
---|
| 128 | 'url': urlFilter(url, operations)}) |
---|
| 129 | |
---|
| 130 | num_entries += 1 |
---|
| 131 | yield result |
---|
| 132 | |
---|
[3002] | 133 | def getBOFiltered(self, objects): |
---|
| 134 | """Return black-out filtered objects |
---|
| 135 | Every record in blackout_list filter should follow the spec: |
---|
| 136 | [<filter name>:]<filter arguments> |
---|
| 137 | For example: |
---|
| 138 | 1| index.html |
---|
| 139 | 2| id:index.html |
---|
| 140 | 3| path:/folder_1_level/obj_in_folder |
---|
| 141 | 4| path:./folder_near_sitemap/obj_in_folder |
---|
| 142 | 5| foo_filter:arg-1, arg-2 |
---|
[3152] | 143 | |
---|
[3002] | 144 | 1->used default "id" filter - remove "index.html" objects; |
---|
| 145 | 2->explicit "id" filter - remove "index.html" objects; |
---|
| 146 | 3->"path" filter - remove /folder_1_level/obj_in_folder object, |
---|
| 147 | path from the root of the plone site; |
---|
[3152] | 148 | 4->same to 3), but path get from the folder, where sitemap is |
---|
| 149 | located; |
---|
[3002] | 150 | 5->filter name is "foo_filter" (must be registered IBlackoutFilter, |
---|
| 151 | named "foo_filter"), which get filter arguments: arg-1, arg-2 |
---|
[3152] | 152 | |
---|
[3002] | 153 | Detailed explanation look in filters.txt doctest. |
---|
[3152] | 154 | """ |
---|
[3002] | 155 | blackout_list = self.context.getBlackout_list() |
---|
| 156 | for frec in blackout_list: |
---|
| 157 | fspec = frec.split(":", 1) |
---|
| 158 | fargs = fspec.pop() |
---|
| 159 | fname = fspec and fspec.pop() or "id" |
---|
| 160 | fengine = queryMultiAdapter((self.context, self.request), |
---|
| 161 | interface=IBlackoutFilter, name=fname) |
---|
| 162 | if fengine: |
---|
| 163 | objects = list(fengine.filterOut(objects, fargs)) |
---|
| 164 | return objects |
---|
| 165 | |
---|
[1593] | 166 | def updateRequest(self): |
---|
| 167 | self.request.RESPONSE.setHeader('Content-Type', 'text/xml') |
---|
| 168 | try: |
---|
| 169 | compression = self.context.enableHTTPCompression() |
---|
| 170 | if compression: |
---|
| 171 | compression(request=self.request) |
---|
| 172 | except: |
---|
| 173 | pass |
---|
| 174 | |
---|
| 175 | def getAdditionalURLs(self): |
---|
| 176 | return additionalURLs(self.context) |
---|
| 177 | |
---|
| 178 | @property |
---|
| 179 | def numEntries(self): |
---|
| 180 | return len(self.results()) + len(self.getAdditionalURLs()) |
---|