Context Navigation

adapters.py @ 1241

Last change on this file since 1241 was 1241, checked in by koval, 15 years ago
manifest datacorrector for Blog content type now chekcs for existance of folder with images
File size: 14.0 KB

Rev	Line
[532]	1	import re
	2	from xml.dom import minidom
	3	from types import ListType
	4	from types import TupleType
	5
[533]	6	from zope.interface import implements, classProvides
[554]	7	from zope.app.annotation.interfaces import IAnnotations
	8
[532]	9	from Products.CMFPlone.Portal import PloneSite
[533]	10	from Products.CMFCore import utils
[532]	11
[533]	12	from collective.transmogrifier.interfaces import ISection, ISectionBlueprint
	13	from collective.transmogrifier.utils import defaultMatcher
	14
[612]	15	from quintagroup.transmogrifier.interfaces import IExportDataCorrector, IImportDataCorrector
[532]	16	from quintagroup.transmogrifier.adapters.exporting import ReferenceExporter
[533]	17	from quintagroup.transmogrifier.manifest import ManifestExporterSection
[554]	18	from quintagroup.transmogrifier.logger import VALIDATIONKEY
[532]	19
	20	from quintagroup.transmogrifier.simpleblog2quills.interfaces import IExportItemManipulator, IBlog
	21
[554]	22	# URL of the site, where blog is located (this is needed to fix links in entries)
	23	SITE_URLS = []
[532]	24	IMAGE_FOLDER = 'images'
[1241]	25	IMAGE_FOLDER_TYPE = 'Large Plone Folder'
[554]	26	# this registries are needed to avoid loosing images with equal ids
	27	IMAGE_IDS = []
	28	IMAGE_PATHS = {}
[532]	29
[533]	30	class BlogManifest(object):
[532]	31	implements(IExportDataCorrector)
	32
	33	def __init__(self, context):
	34	self.context = context
	35
	36	def __call__(self, data):
[1241]	37	# flag that indicated whether 'images' folder must added to manifest
	38	need_to_add = True
	39
[586]	40	doc = minidom.parseString(data['data'])
	41	root = doc.documentElement
	42	for child in root.getElementsByTagName('record'):
	43	if child.getAttribute('type') not in ('BlogEntry', 'BlogFolder'):
	44	root.removeChild(child)
[1241]	45	elif str(child.firstChild.nodeValue.strip()) == IMAGE_FOLDER:
	46	# blog already contains object with IMAGE_FOLDER id
	47	need_to_add = False
	48
	49	if need_to_add:
	50	folder = doc.createElement('record')
	51	folder.setAttribute('type', IMAGE_FOLDER_TYPE)
	52	folder.appendChild(doc.createTextNode(IMAGE_FOLDER))
	53	root.appendChild(folder)
	54
[586]	55	data['data'] = doc.toxml('utf-8')
[532]	56	return data
	57
[586]	58	class BlogFolderManifest(object):
	59	implements(IExportDataCorrector)
	60
	61	def __init__(self, context):
	62	self.context = context
	63
	64	def __call__(self, data):
	65	doc = minidom.parseString(data['data'])
	66	root = doc.documentElement
	67	for child in root.getElementsByTagName('record'):
	68	if child.getAttribute('type') not in ('BlogEntry', 'BlogFolder'):
	69	root.removeChild(child)
	70	data['data'] = doc.toxml('utf-8')
	71	return data
	72
[533]	73	class BlogEntryManifest(object):
	74	implements(IExportItemManipulator)
	75
	76	def __init__(self, context):
	77	self.context = context
	78
	79	def __call__(self, item, **kw):
	80	# remove manifest data from item - content contained in BlogEntry isn't exported
	81	if '_files' in item and 'manifest' in item['_files']:
	82	del item['_files']['manifest']
	83	return item
	84
[532]	85	def recurseToInterface(item, ifaces):
	86	"""Recurse up the aq_chain until an object providing `iface' is found,
	87	and return that.
	88	"""
	89	if not isinstance(ifaces, (ListType, TupleType)):
	90	ifaces = [ifaces]
	91	parent = item.aq_parent
	92	for iface in ifaces:
	93	if iface.providedBy(item):
	94	return item
	95	for iface in ifaces:
	96	if iface.providedBy(parent):
	97	return parent
	98	if isinstance(parent, PloneSite):
	99	# Stop when we get to the portal root.
	100	return None
	101	return recurseToInterface(parent, ifaces)
	102
[554]	103	def getUniqueId(image_id):
	104	""" Generate id that is unique in IMAGE_IDS registry.
	105	"""
	106	if '.' in image_id:
	107	name, ext = image_id.rsplit('.', 1)
	108	ext = '.' + ext
	109	else:
	110	name, ext = image_id, ''
	111	if image_id in IMAGE_IDS:
	112	c = 1
	113	new_id = name + str(c) + ext
	114	while new_id in IMAGE_IDS:
	115	c += 1
	116	new_id = name + str(c) + ext
	117	image_id = new_id
	118
	119	return image_id
	120
[532]	121	class BlogEntryExporter(ReferenceExporter):
	122	implements(IExportDataCorrector)
	123
	124	SRC = re.compile(r'src="([^"]+)"')
	125
[533]	126	def __init__(self, context):
	127	self.context = context
	128	self.portal_url = utils.getToolByName(self.context, 'portal_url')
	129	self.portal = self.portal_url.getPortalObject()
	130
[532]	131	def __call__(self, data):
[533]	132	data = super(BlogEntryExporter, self).__call__(data)
[532]	133	doc = minidom.parseString(data['data'])
	134	try:
	135	elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'body'][0]
	136	except IndexError:
	137	return data
	138
	139	text = elem.firstChild.nodeValue
	140	urls = self.SRC.findall(text)
	141	blog = recurseToInterface(self.context, IBlog)
	142	blog_path = blog.getPhysicalPath()
[1193]	143	context_path = self.context.getPhysicalPath()
[532]	144	for url in urls:
[533]	145	url = str(url)
[532]	146	image_id = url.rsplit('/', 1)[-1]
[1193]	147	# skip links with illegal url schema
[533]	148	if '://' in url and not url.startswith('http://'):
	149	continue
[1193]	150	# convert all all links to relative
[532]	151	if url.startswith('http://'):
[554]	152	for site in SITE_URLS:
[533]	153	if url.startswith(site):
	154	# check whether image is stored in blog
	155	relative_url = url[len(site):]
	156	relative_url = relative_url.strip('/')
	157	# if link is broken we'll get an AttributeError
	158	try:
	159	image = self.portal.unrestrictedTraverse(relative_url)
	160	except AttributeError:
	161	break
	162	in_blog = recurseToInterface(image, IBlog) is not None and True or False
	163	if in_blog:
[554]	164	image_id = self.fixImageId(image, image_id, blog_path)
[1193]	165	level = len(context_path) - len(blog_path) - 1
	166	new_url = '/'.join(['..' for i in range(level)])
	167	new_url = '/'.join((new_url, IMAGE_FOLDER, image_id))
[533]	168	text = text.replace(url, new_url, 1)
[1193]	169	else:
	170	# find how many levels self.context is under portal root
	171	level = len(context_path) - 3
	172	new_url = '/'.join(['..' for i in range(level)])
	173	new_url = new_url + '/' + relative_url
	174	text = text.replace(url, new_url, 1)
[533]	175	break
[532]	176	else:
[533]	177	if url.startswith('/'):
	178	# if link is broken we'll get an AttributeError
	179	try:
	180	image = self.portal.unrestrictedTraverse(url.strip('/'))
	181	except AttributeError:
	182	continue
	183	else:
	184	# if link is broken we'll get an AttributeError
	185	try:
	186	image = self.context.unrestrictedTraverse(url)
	187	except AttributeError:
	188	continue
	189	in_blog = recurseToInterface(image, IBlog) is not None and True or False
	190	if in_blog:
[554]	191	image_id = self.fixImageId(image, image_id, blog_path)
[1193]	192	level = len(context_path) - len(blog_path) - 1
[533]	193	new_url = '/'.join(['..' for i in range(level)])
	194	new_url = '/'.join([new_url, IMAGE_FOLDER, image_id])
	195	text = text.replace(url, new_url, 1)
[586]	196	elif url.startswith('../'):
[1193]	197	# remove '../' from the start of string
[586]	198	new_url = url[3:]
	199	text = text.replace(url, new_url, 1)
	200	elif url.startswith('/'):
	201	# these links didn't work so rewrite them with '..'
[1193]	202	# find how many levels self.context is under portal root
	203	level = len(context_path) - 3
[586]	204	new_url = '/'.join(['..' for i in range(level)])
	205	new_url = new_url + url
	206	text = text.replace(url, new_url, 1)
[532]	207
	208	elem.firstChild.nodeValue = text
	209	data['data'] = doc.toxml('utf-8')
	210	return data
	211
[554]	212	def fixImageId(self, image, image_id, blog_path):
	213	""" Check whether image is good or generate new if it's bad.
	214	"""
	215	image_path = '/'.join(image.getPhysicalPath())
	216	if image_id in IMAGE_IDS and image_path not in IMAGE_PATHS:
	217	image_id = getUniqueId(image_id)
	218	if image_id not in IMAGE_IDS:
	219	IMAGE_IDS.append(image_id)
	220	IMAGE_PATHS[image_path] = '/'.join(blog_path[2:] + (IMAGE_FOLDER, image_id))
	221
	222	return image_id
	223
[532]	224	class PathRewriter(object):
	225	implements(IExportItemManipulator)
	226
	227	def __init__(self, context):
	228	self.context = context
	229
	230	def __call__(self, item, **kw):
	231	pathkey = kw.get('path')
	232	if pathkey is None:
	233	return item
	234
	235	path = item[pathkey]
	236	blog = recurseToInterface(self.context, IBlog)
	237	if blog is None:
	238	return item
	239
[554]	240	blog_path = blog.getPhysicalPath()
	241	full_path = '/'.join(self.context.getPhysicalPath())
	242	image_id = path.rsplit('/', 1)[-1]
	243	modified = False
	244
	245	if full_path in IMAGE_PATHS:
	246	new_path = IMAGE_PATHS[full_path]
	247	else:
	248	unique_id = getUniqueId(image_id)
	249	modified = image_id != unique_id
	250	new_path = '/'.join(blog_path[2:] + (IMAGE_FOLDER, unique_id))
	251
	252	IMAGE_IDS.append(image_id)
	253	IMAGE_PATHS[full_path] = new_path
	254
	255	# change item's path
[532]	256	item[pathkey] = new_path
[554]	257	item['_oldpath'] = path
[532]	258
[554]	259	# now we need to fix object id in .marshall.xml
	260	if modified:
	261	if '_files' in item and 'marshall' in item['_files']:
	262	doc = minidom.parseString(item['_files']['marshall']['data'])
	263	elem = [i for i in doc.getElementsByTagName('field') if i.getAttribute('name') == 'id'][0]
	264	elem.firstChild.nodeValue = '\n\t\t%s\n\t' % unique_id
	265	item['_files']['marshall']['data'] = doc.toxml('utf-8')
	266
[532]	267	return item
[533]	268
	269	class ImageFolderSection(object):
	270	""" This section will generate manifest files for image folders in blog.
	271	"""
	272	classProvides(ISectionBlueprint)
	273	implements(ISection)
	274
	275	def __init__(self, transmogrifier, name, options, previous):
	276	self.previous = previous
	277	self.transmogrifier = transmogrifier
	278
[554]	279	self.flagkey = defaultMatcher(options, 'old-path-key', name, 'oldpath')
[533]	280	self.typekey = defaultMatcher(options, 'type-key', name, 'type')
	281	self.pathkey = defaultMatcher(options, 'path-key', name, 'path')
	282
[554]	283
	284	site_urls = options.get('site-urls', '')
	285	site_urls = filter(None, [i.strip() for i in site_urls.splitlines()])
	286	for i in site_urls:
	287	SITE_URLS.append(i)
	288
	289	self.anno = IAnnotations(transmogrifier)
	290
[533]	291	def __iter__(self):
	292	folders = {}
	293
[554]	294	# safely get logging storage
	295	if VALIDATIONKEY in self.anno:
	296	log_storage = self.anno[VALIDATIONKEY]
	297	else:
	298	log_storage = None
	299
[533]	300	for item in self.previous:
[554]	301	item_keys = item.keys()
	302	pathkey = self.pathkey(*item_keys)[0]
	303	typekey = self.typekey(*item_keys)[0]
	304	oldpathkey = self.flagkey(*item_keys)[0]
[533]	305
	306	# collect data about images moved to folders
[554]	307	if pathkey and typekey and oldpathkey:
[533]	308	path = item[pathkey]
[554]	309	old_path = item[oldpathkey]
[533]	310	type_ = item[typekey]
	311	folder_path, image_id = path.rsplit('/', 1)
	312	folders.setdefault(folder_path, []).append((image_id, type_))
	313
[554]	314	# update logging data (path) for this item
	315	if log_storage and log_storage[-1] == old_path:
	316	log_storage.pop()
	317	log_storage.append(path)
	318
[533]	319	yield item
	320
	321	# generate manifests for those image folders
	322	items = []
	323	for folder, entries in folders.items():
	324	items.append({'_entries': entries, pathkey: folder})
	325	exporter = ManifestExporterSection(self.transmogrifier, 'manifest', {'blueprint': 'manifest'}, iter(items))
	326	for item in exporter:
	327	yield item
[554]	328
	329	# clean registries
	330	while IMAGE_IDS: IMAGE_IDS.pop()
	331	while SITE_URLS: SITE_URLS.pop()
	332	IMAGE_PATHS.clear()
[612]	333
	334	class WorkflowImporter(object):
	335	""" This adapter tries to convert all possible workflow histories to
	336	simple_publication_workflow history.
	337	"""
	338	implements(IImportDataCorrector)
	339
	340	def __init__(self, context):
	341	self.context = context
	342
	343	def __call__(self, data):
	344	doc = minidom.parseString(data['data'])
	345	wh = [i for i in doc.getElementsByTagName('cmf:workflow')]
	346	if not wh:
	347	# we don't have such workflow history
	348	return data
	349
	350	wh = wh[0]
	351	workflow_id = wh.getAttribute('id')
	352	if workflow_id == 'simple_publication_workflow':
	353	return data
	354	wh.setAttribute('id', 'simple_publication_workflow')
	355	if workflow_id == 'simpleblog_workflow':
	356	self.fixSimpleBlogWorkflow(wh)
	357	else:
	358	self.fixWorkflow(wh)
	359
	360	data['data'] = doc.toxml('utf-8')
	361	return data
	362
	363	def fixSimpleBlogWorkflow(self, wh):
	364	for history in wh.getElementsByTagName('cmf:history'):
	365	for var in history.getElementsByTagName('cmf:var'):
	366	id_ = var.getAttribute('id')
	367	value = var.getAttribute('value')
	368	if id_ == 'review_state' and value == 'draft':
	369	var.setAttribute('value', 'private')
	370
	371	def fixWorkflow(self, wh):
	372	for history in wh.getElementsByTagName('cmf:history'):
	373	for var in history.getElementsByTagName('cmf:var'):
	374	id_ = var.getAttribute('id')
	375	value = var.getAttribute('value')
	376	if id_ == 'review_state' and value == 'visible':
	377	var.setAttribute('value', 'published')

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: products/quintagroup.transmogrifier.simpleblog2quills/trunk/quintagroup/transmogrifier/simpleblog2quills/adapters.py @ 1241

Download in other formats: