source: collective.imstransport/trunk/collective/imstransport/utilities/bb/imsbbreader.py @ 383

Revision 383, 9.9 KB checked in by jon, 4 years ago (diff)

Adding license information

Line 
1##################################################################################
2#    Copyright (c) 2004-2009 Utah State University, All rights reserved.
3#    Portions copyright 2009 Massachusetts Institute of Technology, All rights reserved.
4#                                                                                 
5#    This program is free software; you can redistribute it and/or modify         
6#    it under the terms of the GNU General Public License as published by         
7#    the Free Software Foundation; either version 2 of the License, or           
8#    (at your option) any later version.                                         
9#                                                                                 
10#    This program is distributed in the hope that it will be useful,             
11#    but WITHOUT ANY WARRANTY; without even the implied warranty of               
12#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the               
13#    GNU General Public License for more details.                                 
14#                                                                                 
15#    You should have received a copy of the GNU General Public License           
16#    along with this program; if not, write to the Free Software                 
17#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA   
18#                                                                                 
19##################################################################################
20
21__author__  = '''Brent Lambert, David Ray, Jon Thomas'''
22__version__   = '$ Revision 0.0 $'[11:-2]
23
24from collective.imstransport.utilities.imsinterchange import IMSReader
25from collective.imstransport.utilities.packagingio import ZipfileReader
26from collective.imstransport.utilities.bb.bbreader import BBReader
27from zope.component import getUtility
28from collective.imstransport.utilities.interfaces import IIMSObjectCreator
29from xml.parsers.expat import ExpatError
30from collective.imstransport.IMS_exceptions import ManifestError
31from BeautifulSoup import BeautifulSoup
32from Products.CMFCore.interfaces import ISiteRoot
33import htmlentitydefs
34from configbb import EMBEDDED_STRING
35import re
36
37class IMSBBReader(IMSReader):
38    """ Create objects from IMS manifest. """
39
40    def readPackage(self, file, context):
41        """ Read the manifest """
42        source = ZipfileReader(file)
43        objDict = {}
44        if not source:
45             return False, 'Internal error. No source object specified'
46        bbreader = BBReader()
47        manifest = source.readManifest()
48        if not manifest:
49            raise ManifestError, 'Could not locate manifest file "imsmanifest.xml" in the zip archive.'
50        try:
51            doc = bbreader.parseManifest(manifest)
52        except ExpatError, e:
53            raise ManifestError, str(e)
54        tocpages = []
55        orgs = bbreader.readOrganizations(doc)
56        resources = bbreader.readResources(doc)
57        for x in resources:
58            resid, restype, bbfile, bbtitle, bbase = bbreader.readResourceAttributes(x)
59            doctext = filetoc = restoc = ''
60            metadata = {}
61            if restype == 'resource/x-bb-document':
62                metadata = {}
63                # read the data file
64                if bbfile:
65                    dataxml = source.readFile(bbfile)
66                    resnode = bbreader.parseDataFile(dataxml)
67                    metadata = bbreader.readMetadata(resnode)
68                if metadata.has_key('text') and metadata['text']:
69                    mtext = metadata['text']                   
70                    if type(mtext) == type(u''):
71                        mtext = mtext.encode('utf-8')
72                    ptext = unquotehtml(mtext)
73                    utils = getUtility(ISiteRoot).plone_utils
74                    soup = BeautifulSoup(ptext)
75                    doctext = bbreader.runDocumentFilters(utils, soup, [(EMBEDDED_STRING, 'embedded'),], bbase)
76                # Handle Files
77                files = bbreader.readFiles(x, bbase)
78                entries = []
79                for y in files:
80                    dhash = resid + y
81                    objDict[dhash] = {}
82                    dexcludeFromNav = True
83                    dfile = y
84                    dfileparts = y.split('/')
85                    # File is embedded
86                    if 'embedded' in y:
87                        # Link is encrypted
88                        if len(dfileparts) > 0 and dfileparts[-1][0] == '!' and doctext:
89                            soup = BeautifulSoup(doctext)
90                            embeddedpath = bbreader.readEmbeddedTags(soup)
91                            if embeddedpath:
92                                dfile = embeddedpath
93                    dfilepath = self.createPathFromFile(dfile)
94                    did = self.createIdFromFile(dfile)
95                    dtype = self.determineType(objDict[dhash], dfile)
96                    portal = getUtility(ISiteRoot)
97                    utils = portal.plone_utils
98                    did = utils.normalizeString(did)
99                    dtitle = did
100                    if dfilepath:
101                        linkpath = '%s/%s' %(dfilepath, did)
102                    else:
103                        linkpath = did
104                    binfile = y
105                    folder_files = source.getFolderFiles(dfilepath)
106                    if len(folder_files) == 1:
107                        binfile = folder_files[0]
108                    entries.append((linkpath, dtitle))
109                    self.applyCoreMetadata(objDict[dhash], did, dfilepath, dexcludeFromNav, dtype, dtitle, file=binfile)
110                if entries:
111                    filetoc = bbreader.createTocPage(entries)
112            # Handle links
113            if metadata.has_key('bbtype') and metadata['bbtype'] == 'Link':
114                hash = resid
115                objDict[hash] = metadata
116                filepath = ''
117                id = resid + '.link'
118                excludeFromNav = True
119                ptype = metadata['bbtype']
120                if orgs.has_key(resid):
121                    title = orgs[resid]
122                else:
123                    title = id
124                self.applyCoreMetadata(objDict[hash], id, filepath, excludeFromNav, ptype, title)
125            elif restype in ['resource/x-bb-document', 'course/x-bb-coursetoc']:
126                # Handle normal bb-documents
127                hash = resid
128                objDict[hash] = metadata
129                filepath = ''
130                excludeFromNav = True
131                ptype = 'Document'
132                id = resid + '.html'
133                if orgs.has_key(resid):
134                    title = orgs[resid]
135                else:
136                    title = id
137                # It's a folder object:
138                isFolder = metadata.has_key('bbtype') and metadata['bbtype'] == 'Folder'
139                if isFolder or restype == 'course/x-bb-coursetoc':
140                    tocpages.append(resid)
141                # It's a table of contents object
142                if restype == 'course/x-bb-coursetoc':
143                    if orgs.has_key(resid):
144                        excludeFromNav = False
145                        orgstitle = orgs[resid].split('.')
146                        # Rewrite label tag
147                        if len(orgstitle) > 1 and orgstitle[-1] == 'label':
148                            title = re.sub('(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))', ' \\1', orgs[resid].split('.')[1])
149                text = doctext
150                if type(filetoc) == type(u''):
151                    text += filetoc.encode('utf-8')
152                self.applyCoreMetadata(objDict[hash], id, filepath, excludeFromNav, ptype, title, text=text)
153        # Build table of contents pages
154        for z in tocpages:
155            tocitems = bbreader.readTocItem(doc, z)
156            entries = []
157            for titem in tocitems:
158                if objDict.has_key(titem):
159                    met = objDict[titem]
160                    path = met['path']
161                    if path:
162                        linkpath = '%s/%s' %(path, objDict[titem]['id'])
163                    else:
164                        linkpath = objDict[titem]['id']
165                    entries.append((linkpath, met['title']))
166            if entries:
167                if objDict[z].has_key('text') and objDict[z]['text']:
168                    gtext = objDict[z]['text']
169                    if type(gtext) == type(u''):
170                        gtext = gtext.encode('utf-8')
171                    objDict[z]['text'] = bbreader.createTocPage(entries).encode('utf-8') + gtext
172                else:
173                    objDict[z]['text'] = bbreader.createTocPage(entries).encode('utf-8')
174            else:
175                objDict[z]['excludeFromNav'] = True
176        objcreator = getUtility(IIMSObjectCreator)
177        objcreator.createObjects(objDict, context, source)
178
179    def applyCoreMetadata(self, metadata, id, path, excludeFromNav, type, title, file=None, text=None):
180        """ Helper function for applying metadata """
181        portal = getUtility(ISiteRoot)
182        utils = portal.plone_utils
183        metadata['id'] = utils.normalizeString(id)
184        metadata['path'] = path
185        metadata['excludeFromNav'] = excludeFromNav
186        metadata['type'] = type
187        if not (metadata.has_key('title') and metadata['title']):
188            metadata['title'] = title
189        if file:
190            metadata['file'] =  file
191        if text:
192            metadata['text'] = text
193
194def convertentity(m):
195    """Convert a HTML entity into normal string (ISO-8859-1)"""
196    if m.group(1)=='#':
197        try:
198            return chr(int(m.group(2)))
199        except ValueError:
200            return '&#%s;' % m.group(2)
201    try:
202        return htmlentitydefs.entitydefs[m.group(2)]
203    except KeyError:
204        return '&%s;' % m.group(2)
205
206def unquotehtml(s):
207    """Convert a HTML quoted string into normal string (ISO-8859-1).
208
209    Works with &#XX; and with &nbsp; &gt; etc."""
210    return re.sub(r'&(#?)(.+?);',convertentity,s)
Note: See TracBrowser for help on using the repository browser.