source: Products.ecmigration/trunk/Products/ecmigration/migrate.py @ 757

Revision 757, 19.3 KB checked in by brent, 3 years ago (diff)

Saving tar file attempt

Line 
1# -*- coding: us-ascii -*-
2# _______________________________________________________________________
3#              __________                      .__       
4#   ____   ____\______   \____________  ___  __|__| ______
5# _/ __ \ /    \|     ___/\_  __ \__  \ \  \/  /  |/  ___/
6# \  ___/|   |  \    |     |  | \// __ \_>    <|  |\___ \
7#  \___  >___|  /____|     |__|  (____  /__/\_ \__/____  >
8#      \/     \/                      \/      \/       \/
9# _______________________________________________________________________
10#
11#    This file is part of the eduCommons software package.
12#
13#    Copyright (c) 2011 enPraxis, LLC
14#    http://enpraxis.net
15#
16#    This program is free software; you can redistribute it and/or modify
17#    it under the terms of the GNU General Public License as published by
18#    the Free Software Foundation, version 2.8 
19#
20#    This program is distributed in the hope that it will be useful,
21#    but WITHOUT ANY WARRANTY; without even the implied warranty of
22#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23#    GNU General Public License for more details.
24#
25#    You should have received a copy of the GNU General Public License
26#    along with this program; if not, write to the Free Software
27#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28# _______________________________________________________________________
29
30__author__ = 'Brent Lambert'
31__version__ = '$ Revision 0.0 $'[11:-2]
32
33
34from Products.CMFPlone.utils import _createObjectByType
35from Products.CMFCore.WorkflowCore import WorkflowException
36from cStringIO import StringIO
37from DateTime import DateTime
38import transaction
39import exceptions
40import cPickle as Pickle
41import Globals
42import os
43import tarfile
44import tempfile
45
46
47# Try to import annotation interfaces
48
49try:
50    from collective.contentlicensing.DublinCoreExtensions.interfaces import ILicensable, ILicense
51except ImportError:
52    from Products.ContentLicensing.DublinCoreExtensions.interfaces import ILicensable, ILicense
53
54try:
55    from enpraxis.educommons.annotations.interfaces import IClearCopyrightable, IClearCopyright
56    has_clearcopyrightable = True
57except ImportError:
58    try:
59        from enpraxis.educommons.interfaces import IClearCopyrightable, IClearCopyright
60    except ImportError:
61        has_clearcopyrightable = False
62   
63
64try:
65    from enpraxis.educommons.annotations.interfaces import IAccessibilityCompliantable, IAccessibilityCompliant
66    has_accessibilitycompliance = True
67except ImportError:
68    try:
69        from enpraxis.educommons.interfaces import IAccessibilityCompliantable, IAccessibilityCompliant
70    except ImportError:
71        has_accessibilitycompliance = False
72
73try:
74    from enpraxis.educommons.annotations.interfaces import ICourseOrderable, ICourseOrder
75    has_courseorderable = True
76except ImportError:
77    try:
78        from enpraxis.educommons.interfaces import ICourseOrderable, ICourseOrder
79    except ImportError:
80        has_courseorderable = False
81
82
83class MigrationException(exceptions.Exception):
84    """ Migration Error Exception """
85
86    def __init__(self, value):
87        self.value = value
88
89    def __str__(self):
90        return repr(self.value)
91
92
93class TarArchiveManager(object):
94    """ Archive data using tarfile """
95
96    def __init__(self, archname, mode='w:bz2'):
97        """ Open the tar file """
98        self.tar_archive = tarfile.open(archname, mode)
99
100    def addFileFromString(self, fn, data, lmod):
101        """ Write a file from a string to the archive """
102        tinfo = tarfile.TarInfo(fn)
103        fdata = StringIO(data)
104        fdata.seek(0, 2)
105        tinfo.size = fdata.tell()
106        fdata.seek(0)
107        tinfo.mtime = lmod
108        self.tar_archive.addfile(tinfo, fdata)
109        # Circumvent caching for large file handling
110        self.tar_archive.members = []
111        print fn
112
113    def addFileFromDisk(self, fn, arcname):
114        """ Write a file from the filesystemto the archive """
115        self.tar_archive.add(fn, arcname)
116        print arcname
117
118    def getNextFileInfo(self):
119        """ Return file information or None if no files left """
120        return self.tar_archive.next()
121
122    def readFile(self, info):
123        f = self.tar_archive.extractfile(info)
124        data = f.read()
125        f.close()
126        # Circumvent caching for large file handling
127        self.tar_archive.members = []
128        return data
129
130    def getFile(self, info):
131        f = self.tar_archive.extractfile(info)
132        self.tar_archive.members = []
133        return f
134
135    def close(self):
136        """ Close the tar file """
137        self.tar_archive.close()
138       
139
140class ECMigration:
141    """ Migrate an eduCommons site """
142
143    transforms = {
144        'ECDepartment':'Division',
145        'ECCourse':'Course',
146        'ECDocument':'Document',
147        'ECFile':'File',
148        'ECFolder':'Folder',
149        'ECImage':'Image',
150        'ECLink':'Link',
151        'GFolder':'Folder',
152        'GDocument':'Document',
153        'GFile':'File',
154        'GImage':'Image',
155        'GLink':'Link',
156        }
157
158    id_transforms = {
159        'About':'about',
160        'Help':'help',
161        'terms_of_use':'terms-of-use',
162        'privacy_policy':'privacy-policy'
163        }
164
165    state_transforms = {
166        'Visible':'Published',
167        'published':'Published',
168        'Hidden':'InProgress',
169        }
170       
171
172    def __init__(self, context):
173        self.context = context
174        self.pw = context.portal_workflow
175        self.imp = 0
176        self.exp = 0
177
178    def importContent(self):
179        """ Import a migration """
180        fn = Globals.os.getcwd()
181        fn = os.path.join(fn, 'var', 'migration')
182        fn = os.path.join(fn, 'eduCommons-20110216.tar.bz2')
183        tf = TarArchiveManager(fn, 'r:bz2')
184        self.imp = 0
185        while 1:
186            info = tf.getNextFileInfo()
187            if info:
188                if info.isfile():
189                    ext = info.name.split('.')[-1]
190                    if 'ecmigration_settings' == ext:
191                        data = self._getMetadata(info, tf)
192                        self.importSettings(data)
193                    elif 'ecmigration_userinfo' == ext:
194                        data = self._getMetadata(info, tf)
195                        self.importUsers(data)
196                    elif 'ecmigration_directory' == ext:
197                        data = self._getMetadata(info, tf)
198                        if data['type'] not in ['ECLogFolder', 'ECLog']:
199                            obj = self.importObject(data)
200                    elif 'ecmigration_metadata' == ext:
201                        data = self._getMetadata(info, tf)
202                        obj = self.importObject(data)
203                        info = tf.getNextFileInfo()
204                        if info and obj:
205                            data = self._getFileData(info, tf)
206                            self.importObjectData(obj, data)
207                    else:
208                        import pdb; pdb.set_trace()
209                        raise MigrationException('Out of sync error')
210                else:
211                    import pdb; pdb.set_trace()
212            else:
213                import pdb; pdb.set_trace()
214                break
215
216        print 'Imported %d objects' %self.imp
217
218    def _getMetadata(self, info, archive):
219        """ Unpickle metadata and return it. """
220        f = archive.getFile(info)
221        if f:
222            fd = f.read()
223            f.close()
224            return Pickle.loads(fd)
225        else:
226            import pdb; pdb.set_trace()
227            print '### Could not get metadata file from archive!!! ###'
228            return ''
229
230    def _getFileData(self, info, archive):
231        """ Get file data and return it """
232        f = archive.getFile(info)
233        fd = f.read()
234        f.close()
235        return fd
236
237    def importSettings(self, data):
238        """ Import Settings """
239        portal = self.context.portal_url.getPortalObject()
240        props = portal.portal_properties.site_properties
241        props.manage_changeProperties(default_language=data['default_language'])
242        transaction.savepoint(optimistic=True)
243
244    def importUsers(self, data):
245        """ Import Users """
246        users = data['users']
247        portal = self.context.portal_url.getPortalObject()
248        pr = portal.portal_registration
249        for x in users:
250            if not users[x]['email']:
251                users[x]['email'] = 'a@b.com'
252            pr.addMember(id=x,
253                         password=pr.generatePassword(),
254                         roles=users[x]['roles'],
255                         properties = {'fullname':users[x]['fullname'],
256                                       'username':x,
257                                       'email':users[x]['email']})
258        for x in users:
259            portal.acl_users.source_users._user_passwords[x] = users[x]['password']
260        transaction.savepoint(optimistic=True)
261
262    def importObject(self, data):
263        """ Import an object from data out of the tar archive """
264        oid = '/'.join(data['filename'].split(os.sep))
265        print oid
266        obj = self.getObjectByPath(oid)
267        if not obj:
268            # Object does not exist, create it
269            parent = self.getObjectByPath('/'.join(oid.split('/')[:-1]))
270            if parent:
271                nid = oid.split('/')[-1]
272                _createObjectByType(data['type'], parent, id=nid)
273                obj = getattr(parent, nid)
274            else:
275                import pdb; pdb.set_trace()
276        if obj:
277            if getattr(obj, 'getId', None):
278                if obj.portal_type == data['type']:
279                    self.updateObject(obj, data)
280                else:
281                    obj = None #    import pdb;pdb.set_trace()
282        return obj
283
284    def importObjectData(self, obj, data):
285        """ Import file data into the object. """
286        pf = obj.getPrimaryField()
287        pf.set(obj, data)
288
289    def updateObject(self, obj, data):
290        """ Update the object with the new settings """
291        for x in data['fields']:
292            field = obj.getField(x)
293            if field:
294                field.set(obj, data['fields'][x])
295
296        obj.workflow_history = data['workflowhistory']
297        try:
298            self._setWorkflow(obj, data['review_state'])
299        except WorkflowException:
300            pass
301        if data.has_key('rightsholder'):
302            if ILicensable.providedBy(obj):
303                lic = ILicense(obj)
304                lic.setRightsHolder(data['rightsholder'])
305                lic.setRightsLicense(data['rightslicense'])
306        if data.has_key('clearedcopyright'):
307            if IClearCopyrightable.providedBy(obj):
308                cc = IClearCopyright(obj)
309                cc.setClearedCopyright(data['clearedcopyright'])
310        if data.has_key('accessibilitycompliant'):
311            if IAccessibilityCompliantable.providedBy(obj):
312                acc = IAccessibilityCompliant(obj)
313                acc.setAccessible(data['accessibilitycompliant'])
314        if data.has_key('courseorder'):
315            if ICourseOrderable.providedBy(obj):
316                order = ICourseOrder(obj)
317                order.setPositionInCourse(data['courseorder'])
318        try:
319            obj.reindexObject()
320        except AttributeError:
321            import pdb; pdb.set_trace()
322        transaction.savepoint(optimistic=True)
323        self.imp += 1
324
325    def _setWorkflow(self, obj, state):
326        pw = obj.portal_workflow
327        if state != pw.getInfoFor(obj, 'review_state'):
328            pw.doActionFor(obj, 'submit')
329        if state != pw.getInfoFor(obj, 'review_state'):
330            pw.doActionFor(obj, 'release')
331        if state != pw.getInfoFor(obj, 'review_state'):
332            pw.doActionFor(obj, 'publish')
333
334    def getObjectByPath(self, path):
335        """ Return an object via its path """
336        opath = path.split('/')[1:]
337        obj = self.context
338        for x in opath:
339            if getattr(obj.aq_base, x, None):
340                obj = obj[x]
341            else:
342                obj = None
343                break
344        return obj
345
346    def exportContent(self, filename=None):
347        """ Export eduCommons content out of site ready for migration. """
348        fn = Globals.os.getcwd()
349        fn = os.path.join(fn, 'var', 'migration')
350        if not os.path.exists(fn):
351            os.mkdir(fn)
352        fn = os.path.join(fn, 'eduCommons-20110216.tar.bz2')
353        brains = self.context.portal_catalog(path={'query':'/', 'depth':2,},)
354        tf = TarArchiveManager(fn, 'w:bz2')
355        self.exportSettings(tf)
356        self.exportUsers(tf)
357        self.exp = 0
358        self.exportObjects(brains, tf, 1000)
359        tf.close()
360        print 'Exported %d objects' %self.exp
361
362    def exportSettings(self, archive):
363        props = self.context.portal_properties.site_properties
364        fn = '%s/settings.ecmigration_settings' %self.context.getId()
365        objstore = {'filename':fn, 'type':'Settings_ecmigration',}
366        objstore['default_language'] = props.getProperty('default_language')
367        data = Pickle.dumps(objstore)
368        archive.addFileFromString(fn, data, float(DateTime()))
369
370    def exportUsers(self, archive):
371        fn = '%s/userinfo.ecmigration_userinfo' %self.context.getId()
372        objstore = {'filename':fn, 'type':'UserInfo_ecmigration', 'users':{},}
373        for user in self.context.acl_users.getUsers():
374            username = user.getName()
375            password = self.context.acl_users.source_users._user_passwords[username]
376            objstore['users'][username] = {
377                'password':password,
378                'fullname':user.getProperty('fullname'),
379                'email':user.getProperty('email'),
380                'roles':user.getRoles(),
381                }
382        data = Pickle.dumps(objstore)
383        archive.addFileFromString(fn, data, float(DateTime()))
384
385    def exportObjects(self, brains, tf, depth=0):
386        for x in brains:
387            if x.getId not in ['Feedback', 'feedback', 'Courses_listing', 'courselist']:
388                self.exportObject(x, tf)
389                if depth and x.is_folderish:
390                    brains = self.context.portal_catalog(path={'query':x.getPath(), 'depth':1,},)
391                    self.exportObjects(brains, tf, depth-1)
392
393    def exportObject(self, brain, archive):
394        """ Export an object """
395        if 'Download this Course' == brain.Title:
396            return
397        fn = self._getPath(brain)
398        obj = brain.getObject()
399        if obj.portal_type not in ['ECLogFolder', 'ECLog']:
400            # Get the metadata and store it
401            if obj.isPrincipiaFolderish:
402                self.exportMetadata(obj, fn, '.ecmigration_directory', archive)
403            else:
404                self.exportMetadata(obj, fn, '.ecmigration_metadata', archive)
405                self.exportFile(obj, fn, archive)
406        self.exp += 1
407       
408    def exportMetadata(self, obj, fn, ftype, archive):
409        """ Export an object's metadata """
410        objstore = {'filename':fn}
411        self._storeMetaData(obj, objstore)
412        lmod = obj.getRawModification_date()
413        data = Pickle.dumps(objstore)
414#        if 'ocwuniversia/derecho-social-e-internacional-privado/teoria-de-las-relaciones-laborales/programa' in fn:
415#            import pdb; pdb.set_trace()
416#        if 'ocwuniversia/derecho-social-e-internacional-privado/teoria-de-las-relaciones-laborales/practicas-1' in fn:
417#            import pdb; pdb.set_trace()
418#        if 'ocwuniversia/derecho-privado/la-tutela-de-los-consumidores-y-usuarios-ante-la-comunicacion-ilicita' in fn:
419#            import pdb; pdb.set_trace()
420        tf = tempfile.NamedTemporaryFile()
421        tf.write(data)
422        tf.seek(0)
423        archive.addFileFromDisk(tf.name, fn+ftype)
424        tf.close()
425        #archive.addFileFromString(fn + ftype, data, float(lmod))
426
427    def exportFile(self, obj, fn, archive):
428        """ Export the primary field of an object as a file in the archive """
429        pfield = obj.getPrimaryField()
430        fd = pfield.get(obj)
431        if type(fd) == type(''):
432            data = fd
433        else:
434            data = fd.data
435            if type(data) != type(''):
436                data = fd.data.data
437        tf = tempfile.NamedTemporaryFile()
438        tf.write(data)
439        tf.seek(0)
440        archive.addFileFromDisk(tf.name, fn)
441        tf.close()
442
443    def _getPath(self, brain):
444        opath = brain.getPath()
445        fn = opath.split('/')
446        self._transformId(fn)
447        return os.sep.join(fn[1:])
448
449    def _storeMetaData(self, obj, objstore):
450        """ Export Object Metadata """
451        objstore['type'] = self._transformType(obj.portal_type)
452        # Get field data
453        pfield = obj.getPrimaryField()
454        if objstore['type'] in ['School', 'Division', 'Course']:
455            pfield = None
456        objstore['fields'] = {}
457        for x in obj.Schema().fields():
458            if x != pfield:
459                fid = x.getName()
460                if fid in ['id', 'locallyAllowedTypes', 'immediatelyAddableTypes', 'constrainTypesMode', ]:
461                    pass # Do not store the ID, get it from the filename instead
462                elif 'clearedCopyright' == fid:
463                    # if clearedcopyright flag is stored as a field
464                    # move it to annotations instead
465                    objstore['clearedcopyright'] = x.get(obj)
466                elif fid in ['file', 'image']:
467                    pass
468                elif 'text' == fid:
469                    # If html then remove all resolveUIDs
470                    data = x.get(obj)
471                    ds = self.context.portal_transforms.convert('fck_ruid_to_url', data, context=obj)
472                    objstore['fields'][fid] = ds.getData()
473                else:
474                    objstore['fields'][fid] = x.get(obj)
475        # Get non field related data
476        objstore['owner'] = obj.getOwner().getId()
477        objstore['workflowhistory'] = obj.workflow_history
478        objstore['review_state'] = self._transformState(obj.portal_workflow.getInfoFor(obj, 'review_state'))
479        #print objstore['review_state']
480        # Get annotation related data
481        if ILicensable.providedBy(obj):
482            lic = ILicense(obj)
483            objstore['rightsholder'] = lic.getRightsHolder()
484            objstore['rightslicense'] = lic.getRightsLicense()
485        if has_clearcopyrightable:
486            if IClearCopyrightable.providedBy(obj):
487                cc = IClearCopyright(obj)
488                objstore['clearedcopyright'] = cc.getClearedCopyright()
489        if has_accessibilitycompliance:
490            if IAccessibilityCompliantable.providedBy(obj):
491                acc = IAccessibilityCompliant(obj)
492                objstore['accessibilitycompliant'] = acc.getAccessible()
493        if has_courseorderable:
494            if ICourseOrderable.providedBy(obj):
495                order = ICourseOrder(obj)
496                objstore['courseorder'] = order.getPositionInCourse()
497
498    def _transformType(self, ptype):
499        """ Change type """
500        if self.transforms.has_key(ptype):
501            return self.transforms[ptype]
502        else:
503            return ptype
504
505    def _transformId(self, fn):
506        """ Change IDs """
507        if len(fn) > 2 and self.id_transforms.has_key(fn[2]):
508            fn[2] = self.id_transforms[fn[2]]
509            if len(fn) > 3 and 'about' == fn[2] and 'index_html' == fn[3]:
510                fn[3] = 'abouttext_text'
511            if len(fn) > 3 and 'help' == fn[2] and 'index_html' == fn[3]:
512                fn[3] = 'help_text'
513        if len(fn) > 3 and self.id_transforms.has_key(fn[3]):
514            fn[3] = self.id_transforms[fn[3]]
515
516    def _transformState(self, state):
517        if state in self.state_transforms:
518            state = self.state_transforms[state]
519        return state
Note: See TracBrowser for help on using the repository browser.