source: Products.ecmigration/trunk/Products/ecmigration/migrate.py @ 757

Revision 757, 19.3 KB checked in by brent, 3 years ago (diff)

Saving tar file attempt

RevLine 
[754]1# -*- coding: us-ascii -*-
2# _______________________________________________________________________
3#              __________                      .__       
4#   ____   ____\______   \____________  ___  __|__| ______
5# _/ __ \ /    \|     ___/\_  __ \__  \ \  \/  /  |/  ___/
6# \  ___/|   |  \    |     |  | \// __ \_>    <|  |\___ \
7#  \___  >___|  /____|     |__|  (____  /__/\_ \__/____  >
8#      \/     \/                      \/      \/       \/
9# _______________________________________________________________________
10#
11#    This file is part of the eduCommons software package.
12#
13#    Copyright (c) 2011 enPraxis, LLC
14#    http://enpraxis.net
15#
16#    This program is free software; you can redistribute it and/or modify
17#    it under the terms of the GNU General Public License as published by
18#    the Free Software Foundation, version 2.8 
19#
20#    This program is distributed in the hope that it will be useful,
21#    but WITHOUT ANY WARRANTY; without even the implied warranty of
22#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23#    GNU General Public License for more details.
24#
25#    You should have received a copy of the GNU General Public License
26#    along with this program; if not, write to the Free Software
27#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
28# _______________________________________________________________________
29
30__author__ = 'Brent Lambert'
31__version__ = '$ Revision 0.0 $'[11:-2]
32
33
34from Products.CMFPlone.utils import _createObjectByType
[757]35from Products.CMFCore.WorkflowCore import WorkflowException
[754]36from cStringIO import StringIO
37from DateTime import DateTime
[757]38import transaction
39import exceptions
[754]40import cPickle as Pickle
41import Globals
42import os
43import tarfile
[756]44import tempfile
[754]45
46
47# Try to import annotation interfaces
48
49try:
50    from collective.contentlicensing.DublinCoreExtensions.interfaces import ILicensable, ILicense
51except ImportError:
52    from Products.ContentLicensing.DublinCoreExtensions.interfaces import ILicensable, ILicense
53
54try:
55    from enpraxis.educommons.annotations.interfaces import IClearCopyrightable, IClearCopyright
56    has_clearcopyrightable = True
57except ImportError:
[757]58    try:
59        from enpraxis.educommons.interfaces import IClearCopyrightable, IClearCopyright
60    except ImportError:
61        has_clearcopyrightable = False
[754]62   
63
64try:
65    from enpraxis.educommons.annotations.interfaces import IAccessibilityCompliantable, IAccessibilityCompliant
66    has_accessibilitycompliance = True
67except ImportError:
[757]68    try:
69        from enpraxis.educommons.interfaces import IAccessibilityCompliantable, IAccessibilityCompliant
70    except ImportError:
71        has_accessibilitycompliance = False
[754]72
73try:
74    from enpraxis.educommons.annotations.interfaces import ICourseOrderable, ICourseOrder
75    has_courseorderable = True
76except ImportError:
[757]77    try:
78        from enpraxis.educommons.interfaces import ICourseOrderable, ICourseOrder
79    except ImportError:
80        has_courseorderable = False
[754]81
82
[757]83class MigrationException(exceptions.Exception):
84    """ Migration Error Exception """
85
86    def __init__(self, value):
87        self.value = value
88
89    def __str__(self):
90        return repr(self.value)
91
92
[754]93class TarArchiveManager(object):
94    """ Archive data using tarfile """
95
96    def __init__(self, archname, mode='w:bz2'):
97        """ Open the tar file """
98        self.tar_archive = tarfile.open(archname, mode)
99
[756]100    def addFileFromString(self, fn, data, lmod):
101        """ Write a file from a string to the archive """
[754]102        tinfo = tarfile.TarInfo(fn)
[757]103        fdata = StringIO(data)
104        fdata.seek(0, 2)
105        tinfo.size = fdata.tell()
106        fdata.seek(0)
[754]107        tinfo.mtime = lmod
[757]108        self.tar_archive.addfile(tinfo, fdata)
[754]109        # Circumvent caching for large file handling
110        self.tar_archive.members = []
[756]111        print fn
[754]112
[756]113    def addFileFromDisk(self, fn, arcname):
114        """ Write a file from the filesystemto the archive """
115        self.tar_archive.add(fn, arcname)
116        print arcname
117
[754]118    def getNextFileInfo(self):
119        """ Return file information or None if no files left """
120        return self.tar_archive.next()
121
122    def readFile(self, info):
123        f = self.tar_archive.extractfile(info)
[757]124        data = f.read()
[754]125        f.close()
126        # Circumvent caching for large file handling
127        self.tar_archive.members = []
128        return data
129
[757]130    def getFile(self, info):
131        f = self.tar_archive.extractfile(info)
132        self.tar_archive.members = []
133        return f
134
[754]135    def close(self):
136        """ Close the tar file """
137        self.tar_archive.close()
138       
139
140class ECMigration:
141    """ Migrate an eduCommons site """
142
143    transforms = {
144        'ECDepartment':'Division',
145        'ECCourse':'Course',
146        'ECDocument':'Document',
147        'ECFile':'File',
148        'ECFolder':'Folder',
149        'ECImage':'Image',
150        'ECLink':'Link',
151        'GFolder':'Folder',
152        'GDocument':'Document',
153        'GFile':'File',
154        'GImage':'Image',
155        'GLink':'Link',
156        }
157
[755]158    id_transforms = {
159        'About':'about',
160        'Help':'help',
[756]161        'terms_of_use':'terms-of-use',
162        'privacy_policy':'privacy-policy'
[755]163        }
164
[757]165    state_transforms = {
166        'Visible':'Published',
167        'published':'Published',
168        'Hidden':'InProgress',
169        }
[755]170       
171
[754]172    def __init__(self, context):
173        self.context = context
[757]174        self.pw = context.portal_workflow
175        self.imp = 0
176        self.exp = 0
[754]177
178    def importContent(self):
179        """ Import a migration """
[757]180        fn = Globals.os.getcwd()
181        fn = os.path.join(fn, 'var', 'migration')
[754]182        fn = os.path.join(fn, 'eduCommons-20110216.tar.bz2')
183        tf = TarArchiveManager(fn, 'r:bz2')
[757]184        self.imp = 0
185        while 1:
[754]186            info = tf.getNextFileInfo()
187            if info:
[757]188                if info.isfile():
189                    ext = info.name.split('.')[-1]
190                    if 'ecmigration_settings' == ext:
191                        data = self._getMetadata(info, tf)
192                        self.importSettings(data)
193                    elif 'ecmigration_userinfo' == ext:
194                        data = self._getMetadata(info, tf)
195                        self.importUsers(data)
196                    elif 'ecmigration_directory' == ext:
197                        data = self._getMetadata(info, tf)
198                        if data['type'] not in ['ECLogFolder', 'ECLog']:
199                            obj = self.importObject(data)
200                    elif 'ecmigration_metadata' == ext:
201                        data = self._getMetadata(info, tf)
202                        obj = self.importObject(data)
203                        info = tf.getNextFileInfo()
204                        if info and obj:
205                            data = self._getFileData(info, tf)
206                            self.importObjectData(obj, data)
207                    else:
208                        import pdb; pdb.set_trace()
209                        raise MigrationException('Out of sync error')
[754]210                else:
[757]211                    import pdb; pdb.set_trace()
[754]212            else:
[757]213                import pdb; pdb.set_trace()
[754]214                break
215
[757]216        print 'Imported %d objects' %self.imp
217
218    def _getMetadata(self, info, archive):
219        """ Unpickle metadata and return it. """
220        f = archive.getFile(info)
221        if f:
222            fd = f.read()
223            f.close()
224            return Pickle.loads(fd)
225        else:
226            import pdb; pdb.set_trace()
227            print '### Could not get metadata file from archive!!! ###'
228            return ''
229
230    def _getFileData(self, info, archive):
231        """ Get file data and return it """
232        f = archive.getFile(info)
233        fd = f.read()
234        f.close()
235        return fd
236
237    def importSettings(self, data):
238        """ Import Settings """
239        portal = self.context.portal_url.getPortalObject()
240        props = portal.portal_properties.site_properties
241        props.manage_changeProperties(default_language=data['default_language'])
242        transaction.savepoint(optimistic=True)
243
[754]244    def importUsers(self, data):
245        """ Import Users """
246        users = data['users']
[757]247        portal = self.context.portal_url.getPortalObject()
[754]248        pr = portal.portal_registration
249        for x in users:
[757]250            if not users[x]['email']:
251                users[x]['email'] = 'a@b.com'
[754]252            pr.addMember(id=x,
253                         password=pr.generatePassword(),
254                         roles=users[x]['roles'],
255                         properties = {'fullname':users[x]['fullname'],
256                                       'username':x,
257                                       'email':users[x]['email']})
258        for x in users:
259            portal.acl_users.source_users._user_passwords[x] = users[x]['password']
[757]260        transaction.savepoint(optimistic=True)
[754]261
262    def importObject(self, data):
263        """ Import an object from data out of the tar archive """
264        oid = '/'.join(data['filename'].split(os.sep))
265        print oid
[757]266        obj = self.getObjectByPath(oid)
267        if not obj:
268            # Object does not exist, create it
269            parent = self.getObjectByPath('/'.join(oid.split('/')[:-1]))
270            if parent:
271                nid = oid.split('/')[-1]
272                _createObjectByType(data['type'], parent, id=nid)
273                obj = getattr(parent, nid)
274            else:
275                import pdb; pdb.set_trace()
276        if obj:
277            if getattr(obj, 'getId', None):
278                if obj.portal_type == data['type']:
[755]279                    self.updateObject(obj, data)
[757]280                else:
281                    obj = None #    import pdb;pdb.set_trace()
282        return obj
[754]283
[757]284    def importObjectData(self, obj, data):
285        """ Import file data into the object. """
286        pf = obj.getPrimaryField()
287        pf.set(obj, data)
288
[754]289    def updateObject(self, obj, data):
290        """ Update the object with the new settings """
291        for x in data['fields']:
[757]292            field = obj.getField(x)
[755]293            if field:
294                field.set(obj, data['fields'][x])
[757]295
[754]296        obj.workflow_history = data['workflowhistory']
[757]297        try:
298            self._setWorkflow(obj, data['review_state'])
299        except WorkflowException:
300            pass
[754]301        if data.has_key('rightsholder'):
302            if ILicensable.providedBy(obj):
303                lic = ILicense(obj)
304                lic.setRightsHolder(data['rightsholder'])
305                lic.setRightsLicense(data['rightslicense'])
306        if data.has_key('clearedcopyright'):
307            if IClearCopyrightable.providedBy(obj):
308                cc = IClearCopyright(obj)
309                cc.setClearedCopyright(data['clearedcopyright'])
310        if data.has_key('accessibilitycompliant'):
311            if IAccessibilityCompliantable.providedBy(obj):
312                acc = IAccessibilityCompliant(obj)
313                acc.setAccessible(data['accessibilitycompliant'])
314        if data.has_key('courseorder'):
315            if ICourseOrderable.providedBy(obj):
316                order = ICourseOrder(obj)
317                order.setPositionInCourse(data['courseorder'])
[755]318        try:
319            obj.reindexObject()
320        except AttributeError:
321            import pdb; pdb.set_trace()
[757]322        transaction.savepoint(optimistic=True)
323        self.imp += 1
[754]324
[757]325    def _setWorkflow(self, obj, state):
326        pw = obj.portal_workflow
327        if state != pw.getInfoFor(obj, 'review_state'):
328            pw.doActionFor(obj, 'submit')
329        if state != pw.getInfoFor(obj, 'review_state'):
330            pw.doActionFor(obj, 'release')
331        if state != pw.getInfoFor(obj, 'review_state'):
332            pw.doActionFor(obj, 'publish')
333
[755]334    def getObjectByPath(self, path):
[754]335        """ Return an object via its path """
[757]336        opath = path.split('/')[1:]
337        obj = self.context
338        for x in opath:
339            if getattr(obj.aq_base, x, None):
340                obj = obj[x]
341            else:
342                obj = None
343                break
[754]344        return obj
345
346    def exportContent(self, filename=None):
347        """ Export eduCommons content out of site ready for migration. """
[757]348        fn = Globals.os.getcwd()
349        fn = os.path.join(fn, 'var', 'migration')
[754]350        if not os.path.exists(fn):
351            os.mkdir(fn)
352        fn = os.path.join(fn, 'eduCommons-20110216.tar.bz2')
353        brains = self.context.portal_catalog(path={'query':'/', 'depth':2,},)
354        tf = TarArchiveManager(fn, 'w:bz2')
[757]355        self.exportSettings(tf)
[756]356        self.exportUsers(tf)
[757]357        self.exp = 0
358        self.exportObjects(brains, tf, 1000)
[754]359        tf.close()
[757]360        print 'Exported %d objects' %self.exp
[754]361
[757]362    def exportSettings(self, archive):
363        props = self.context.portal_properties.site_properties
364        fn = '%s/settings.ecmigration_settings' %self.context.getId()
365        objstore = {'filename':fn, 'type':'Settings_ecmigration',}
366        objstore['default_language'] = props.getProperty('default_language')
367        data = Pickle.dumps(objstore)
368        archive.addFileFromString(fn, data, float(DateTime()))
369
[756]370    def exportUsers(self, archive):
[757]371        fn = '%s/userinfo.ecmigration_userinfo' %self.context.getId()
372        objstore = {'filename':fn, 'type':'UserInfo_ecmigration', 'users':{},}
[756]373        for user in self.context.acl_users.getUsers():
[754]374            username = user.getName()
[756]375            password = self.context.acl_users.source_users._user_passwords[username]
[754]376            objstore['users'][username] = {
377                'password':password,
378                'fullname':user.getProperty('fullname'),
379                'email':user.getProperty('email'),
380                'roles':user.getRoles(),
381                }
[757]382        data = Pickle.dumps(objstore)
383        archive.addFileFromString(fn, data, float(DateTime()))
[754]384
[757]385    def exportObjects(self, brains, tf, depth=0):
[755]386        for x in brains:
[757]387            if x.getId not in ['Feedback', 'feedback', 'Courses_listing', 'courselist']:
388                self.exportObject(x, tf)
389                if depth and x.is_folderish:
390                    brains = self.context.portal_catalog(path={'query':x.getPath(), 'depth':1,},)
391                    self.exportObjects(brains, tf, depth-1)
[755]392
[754]393    def exportObject(self, brain, archive):
394        """ Export an object """
[756]395        if 'Download this Course' == brain.Title:
396            return
[754]397        fn = self._getPath(brain)
398        obj = brain.getObject()
[757]399        if obj.portal_type not in ['ECLogFolder', 'ECLog']:
400            # Get the metadata and store it
401            if obj.isPrincipiaFolderish:
402                self.exportMetadata(obj, fn, '.ecmigration_directory', archive)
403            else:
404                self.exportMetadata(obj, fn, '.ecmigration_metadata', archive)
405                self.exportFile(obj, fn, archive)
406        self.exp += 1
[756]407       
[757]408    def exportMetadata(self, obj, fn, ftype, archive):
[756]409        """ Export an object's metadata """
410        objstore = {'filename':fn}
411        self._storeMetaData(obj, objstore)
[754]412        lmod = obj.getRawModification_date()
[756]413        data = Pickle.dumps(objstore)
[757]414#        if 'ocwuniversia/derecho-social-e-internacional-privado/teoria-de-las-relaciones-laborales/programa' in fn:
415#            import pdb; pdb.set_trace()
416#        if 'ocwuniversia/derecho-social-e-internacional-privado/teoria-de-las-relaciones-laborales/practicas-1' in fn:
417#            import pdb; pdb.set_trace()
418#        if 'ocwuniversia/derecho-privado/la-tutela-de-los-consumidores-y-usuarios-ante-la-comunicacion-ilicita' in fn:
419#            import pdb; pdb.set_trace()
420        tf = tempfile.NamedTemporaryFile()
421        tf.write(data)
422        tf.seek(0)
423        archive.addFileFromDisk(tf.name, fn+ftype)
424        tf.close()
425        #archive.addFileFromString(fn + ftype, data, float(lmod))
[754]426
[756]427    def exportFile(self, obj, fn, archive):
428        """ Export the primary field of an object as a file in the archive """
429        pfield = obj.getPrimaryField()
430        fd = pfield.get(obj)
431        if type(fd) == type(''):
[757]432            data = fd
433        else:
[756]434            data = fd.data
435            if type(data) != type(''):
436                data = fd.data.data
437        tf = tempfile.NamedTemporaryFile()
438        tf.write(data)
439        tf.seek(0)
440        archive.addFileFromDisk(tf.name, fn)
441        tf.close()
442
[754]443    def _getPath(self, brain):
444        opath = brain.getPath()
445        fn = opath.split('/')
[755]446        self._transformId(fn)
[754]447        return os.sep.join(fn[1:])
448
[756]449    def _storeMetaData(self, obj, objstore):
[754]450        """ Export Object Metadata """
[755]451        objstore['type'] = self._transformType(obj.portal_type)
452        # Get field data
[756]453        pfield = obj.getPrimaryField()
[757]454        if objstore['type'] in ['School', 'Division', 'Course']:
455            pfield = None
[754]456        objstore['fields'] = {}
457        for x in obj.Schema().fields():
[756]458            if x != pfield:
459                fid = x.getName()
[757]460                if fid in ['id', 'locallyAllowedTypes', 'immediatelyAddableTypes', 'constrainTypesMode', ]:
[756]461                    pass # Do not store the ID, get it from the filename instead
[757]462                elif 'clearedCopyright' == fid:
[756]463                    # if clearedcopyright flag is stored as a field
464                    # move it to annotations instead
465                    objstore['clearedcopyright'] = x.get(obj)
[757]466                elif fid in ['file', 'image']:
467                    pass
468                elif 'text' == fid:
469                    # If html then remove all resolveUIDs
470                    data = x.get(obj)
471                    ds = self.context.portal_transforms.convert('fck_ruid_to_url', data, context=obj)
472                    objstore['fields'][fid] = ds.getData()
[754]473                else:
[756]474                    objstore['fields'][fid] = x.get(obj)
[755]475        # Get non field related data
[754]476        objstore['owner'] = obj.getOwner().getId()
477        objstore['workflowhistory'] = obj.workflow_history
[757]478        objstore['review_state'] = self._transformState(obj.portal_workflow.getInfoFor(obj, 'review_state'))
479        #print objstore['review_state']
[755]480        # Get annotation related data
[754]481        if ILicensable.providedBy(obj):
482            lic = ILicense(obj)
483            objstore['rightsholder'] = lic.getRightsHolder()
484            objstore['rightslicense'] = lic.getRightsLicense()
485        if has_clearcopyrightable:
486            if IClearCopyrightable.providedBy(obj):
487                cc = IClearCopyright(obj)
488                objstore['clearedcopyright'] = cc.getClearedCopyright()
489        if has_accessibilitycompliance:
490            if IAccessibilityCompliantable.providedBy(obj):
491                acc = IAccessibilityCompliant(obj)
492                objstore['accessibilitycompliant'] = acc.getAccessible()
493        if has_courseorderable:
494            if ICourseOrderable.providedBy(obj):
495                order = ICourseOrder(obj)
496                objstore['courseorder'] = order.getPositionInCourse()
497
[755]498    def _transformType(self, ptype):
[754]499        """ Change type """
500        if self.transforms.has_key(ptype):
501            return self.transforms[ptype]
502        else:
503            return ptype
[755]504
505    def _transformId(self, fn):
506        """ Change IDs """
[756]507        if len(fn) > 2 and self.id_transforms.has_key(fn[2]):
508            fn[2] = self.id_transforms[fn[2]]
509            if len(fn) > 3 and 'about' == fn[2] and 'index_html' == fn[3]:
510                fn[3] = 'abouttext_text'
511            if len(fn) > 3 and 'help' == fn[2] and 'index_html' == fn[3]:
512                fn[3] = 'help_text'
513        if len(fn) > 3 and self.id_transforms.has_key(fn[3]):
514            fn[3] = self.id_transforms[fn[3]]
[757]515
516    def _transformState(self, state):
517        if state in self.state_transforms:
518            state = self.state_transforms[state]
519        return state
Note: See TracBrowser for help on using the repository browser.