Changeset 199
- Timestamp:
- 05/14/09 23:41:20 (4 years ago)
- Location:
- collective.imstransport/trunk/collective/imstransport
- Files:
-
- 3 deleted
- 4 edited
-
tests/testCC.py (modified) (2 diffs)
-
utilities/bboard.py (deleted)
-
utilities/imscc/ccreader.py (modified) (2 diffs)
-
utilities/imscc/ccresourcereader.py (deleted)
-
utilities/imscc/imsccreader.py (modified) (3 diffs)
-
utilities/imsinterchange.py (modified) (2 diffs)
-
utilities/mitdspace.py (deleted)
Legend:
- Unmodified
- Added
- Removed
-
collective.imstransport/trunk/collective/imstransport/tests/testCC.py
r175 r199 9 9 <item identifier="1001"> 10 10 <item identifier="1002" identifierref="2000" /> 11 <item identifier="1003" identifierref="2001" /> 11 <item identifier="1003" identifierref="2001"> 12 <title>Test Title</title> 13 </item> 12 14 </item> 13 15 </organization> … … 20 22 """ 21 23 24 general = """ 25 <general> 26 <identifier> 27 <catalog>http://somewhere.com</catalog> 28 <entry>asdf</entry> 29 </identifier> 30 <title> 31 <string xml:lang="en">asdf</string> 32 </title> 33 <language>en</language> 34 <description> 35 <string xml:lang="en">this is a test</string> 36 </description> 37 <keyword> 38 <string xml:lang="en">Hello</string> 39 <string xml:lang="en">There</string> 40 </keyword> 41 </general> 42 """ 43 44 lifecycle = """<lifecycle> 45 </lifecycle> 46 """ 47 48 metadata = general + lifecycle 49 50 22 51 class TestCCReader(IMSTransportTestCase): 23 52 """ 24 53 """ 25 54 26 def testOrganizations(self): 55 def testParseManifest(self): 56 ccr = CCReader() 57 doc = ccr.parseManifest('<test>Hello</test>') 58 assert(doc.getElementsByTagName('test')) 59 60 def testReadOrganizations(self): 27 61 """ Test organization handling """ 28 62 ccr = CCReader() 29 ccr.document = minidom.parseString(organizations) 30 ccr.readOrganizations() 31 assert(ccr.orgdata) 32 self.assertEqual(ccr.orgdata['2001'], 3) 63 doc = minidom.parseString(organizations) 64 orgs = ccr.readOrganizations(doc) 65 assert(orgs) 66 self.assertEqual(len(orgs), 3) 67 self.assertEqual(orgs['2000'], (2, None)) 68 self.assertEqual(orgs['2001'], (3, 'Test Title')) 33 69 34 def testRe sources(self):70 def testReadResources(self): 35 71 """ Test resource handling """ 36 72 ccr = CCReader() 37 ccr.document = minidom.parseString(resources) 38 resids = ccr.readResources() 39 assert(resids) 40 self.assertEqual(resids, ['2000']) 73 doc = minidom.parseString(resources) 74 res = ccr.readResources(doc) 75 assert(res) 76 self.assertEqual('2000', res[0].getAttribute('identifier')) 77 self.assertEqual('test.html', res[0].getAttribute('href')) 78 self.assertEqual('webcontent', res[0].getAttribute('type')) 41 79 42 80 def testGetTextValue(self): 43 81 """ Test removal of text from node """ 44 document = minidom.parseString( "<test>Hello</test>")82 document = minidom.parseString('<test>Hello</test>') 45 83 textnode = document.getElementsByTagName('test')[0] 46 84 ccr = CCReader() 47 85 self.assertEqual(ccr.getTextValue(textnode), 'Hello') 86 87 def testReadResourceAttributes(self): 88 ccr = CCReader() 89 doc = ccr.parseManifest(resources) 90 res = doc.getElementsByTagName('resource') 91 results = ccr.readResourceAttributes(res[0]) 92 self.assertEqual(results[0], '2000') 93 self.assertEqual(results[1], 'webcontent') 94 self.assertEqual(results[2], 'test.html') 95 96 def testReadMetadata(self): 97 pass 98 99 def testReadGeneral(self): 100 ccr = CCReader() 101 manifest = '<lom xmlns="http://ltsc.ieee.org/xsd/LOM">' + general + '</lom>' 102 doc = ccr.parseManifest(manifest) 103 md = {} 104 ccr.readGeneral(doc, md) 105 assert(md.has_key('title')) 106 self.assertEqual(md['title'], 'asdf') 107 assert(md.has_key('language')) 108 self.assertEqual(md['language'], 'en') 109 assert(md.has_key('description')) 110 self.assertEqual(md['description'], 'this is a test') 111 assert(md.has_key('subject')) 112 self.assertEqual(md['subject'], ['Hello', 'There']) 113 114 115 def testLifecycle(self): 116 pass 117 48 118 49 119 -
collective.imstransport/trunk/collective/imstransport/utilities/imscc/ccreader.py
r197 r199 2 2 from collective.imstransport.IMS_exceptions import ManifestError 3 3 4 LOM_namespace = 'http://ltsc.ieee.org/xsd/LOM' 5 WL_namespace = 'http://www.imsglobal.org/xsd/imswl_v1p0' 6 4 7 class CCReader(object): 5 6 def __init__(self):7 """8 """9 self.document = None10 self.orgdata = {}11 self.resids = []12 self.objdict = {}13 8 14 9 def parseManifest(self, manifest): 15 10 """ parse the manifest """ 16 17 self.document = minidom.parseString(manifest) 18 19 def readOrganizations(self): 11 return minidom.parseString(manifest) 12 13 def readOrganizations(self, manifest): 20 14 """ Read the organizations section of the manifest. """ 21 self.org= {}22 organizations = self.document.getElementsByTagName('organizations')15 orgs = {} 16 organizations = manifest.getElementsByTagName('organizations') 23 17 if organizations: 24 return self._readItems(organizations[0]) 25 else: 26 raise ManifestError, 'Manifest file has no "organizations" section.' 27 28 def _readItems(self, orgs): 29 """ Read items from the manifest. """ 30 31 organization_nodes = orgs.getElementsByTagName('organization') 32 if organization_nodes: 33 organization_node = organization_nodes[0] 34 35 item_nodes = organization_nodes[0].getElementsByTagName('item') 36 itemnum = 1 37 for item in item_nodes: 38 idref = item.getAttribute('identifierref') 39 self.orgdata[idref] = itemnum 40 itemnum += 1 18 organization_nodes = organizations[0].getElementsByTagName('organization') 19 if organization_nodes: 20 organization_node = organization_nodes[0] 21 item_nodes = organization_nodes[0].getElementsByTagName('item') 22 itemnum = 1 23 for item in item_nodes: 24 idref = item.getAttribute('identifierref') 25 titlenodes = item.getElementsByTagName('title') 26 if titlenodes: 27 orgs[idref] = (itemnum, self.getTextValue(titlenodes[0])) 28 else: 29 orgs[idref] = (itemnum, None) 30 itemnum += 1 31 32 return orgs 41 33 42 return self.orgdata 43 44 def getTitle(self, resourceid): 45 """ Read title from organization element """ 46 47 organization_nodes = self.document.getElementsByTagName('organization') 48 if organization_nodes: 49 organization_node = organization_nodes[0] 50 51 item_nodes = organization_nodes[0].getElementsByTagName('item') 52 53 for item in item_nodes: 54 idref = item.getAttribute('identifierref') 55 if idref == resourceid: 56 title_nodes = item.getElementsByTagName('title') 57 if title_nodes: 58 return getTextValue(title_nodes[0]) 59 60 return '' 61 62 def readResources(self): 34 def readResources(self, manifest): 63 35 """ Read all resources. """ 64 resources = self.document.getElementsByTagName('resources') 36 reslist = [] 37 resources = manifest.getElementsByTagName('resources') 65 38 if resources: 66 for res in resources[0].getElementsByTagName('resource'): 67 id = res.getAttribute('identifier') 68 self.resids.append(id) 69 70 return self.resids 71 72 def getResourceIds(self): 73 """ Get a list of resource identifiers """ 74 return self.resids 39 reslist = resources[0].getElementsByTagName('resource') 40 return reslist 75 41 76 42 def getTextValue(self, node): 77 43 """ Removes the text from the text_node of a node """ 44 78 45 for x in node.childNodes: 79 46 if x.nodeType == x.TEXT_NODE: … … 81 48 return None 82 49 83 50 def readResourceAttributes(self, resource): 51 """ Return attributes on resource node. """ 52 return (resource.getAttribute('identifier'), 53 resource.getAttribute('type'), 54 resource.getAttribute('href')) 55 56 def readMetadata(self, metadata): 57 md = {} 58 self.readGeneral(metadata, md) 59 self.readLifecycle(metadata, md) 60 self.readMetaMetadata(metadata, md) 61 self.readTechnical(metadata, md) 62 self.readRights(metadata, md) 63 return md 64 65 def readGeneral(self, metadata, md): 66 """ Read general node """ 67 68 gen_node = None 69 gen_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'general') 70 71 if gen_nodes: 72 gen_node = gen_nodes[0] 73 74 title_nodes = gen_node.getElementsByTagNameNS(LOM_namespace, 'title') 75 if title_nodes: 76 langstring_nodes = title_nodes[0].getElementsByTagNameNS(LOM_namespace, 77 'string') 78 if langstring_nodes: 79 title = self.getTextValue(langstring_nodes[0]) 80 if title: 81 md['title'] = title 82 83 if not md.has_key('title'): 84 raise ManifestError, 'Required tag "title" missing in lom/general metadata section for resource %s.' %resid 85 86 language_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'language') 87 if language_nodes: 88 lang = self.getTextValue(language_nodes[0]) 89 if lang: 90 md['language'] = lang 91 92 desc_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'description') 93 if desc_nodes: 94 langstring_nodes = desc_nodes[0].getElementsByTagNameNS(LOM_namespace, 95 'string') 96 if langstring_nodes: 97 description = self.getTextValue(langstring_nodes[0]) 98 if description: 99 md['description'] = description 100 101 kw_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'keyword') 102 if kw_nodes: 103 kw_lang_nodes = kw_nodes[0].getElementsByTagNameNS(LOM_namespace,'string') 104 kw_list = [] 105 if kw_lang_nodes: 106 for lang_node in kw_lang_nodes: 107 kw = self.getTextValue(lang_node) 108 if kw: 109 kw_list.append(kw) 110 if kw_list: 111 md['subject'] = kw_list 112 113 114 def readLifecycle(self, metadata, md): 115 """ Read Lifecycle node """ 116 117 lc_node = None 118 lc_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'lifeCycle') 119 120 if lc_nodes: 121 lc_node = lc_nodes[0] 122 123 if lc_node: 124 # Lifecycle Node 125 contribute_nodes = lc_node.getElementsByTagNameNS(LOM_namespace, 'contribute') 126 127 # For each contribute node there is a role node, a centity node, and possibly a date node 128 for contribute_node in contribute_nodes: 129 source = '' 130 value = '' 131 vlist = [] 132 date = '' 133 134 role_nodes = contribute_node.getElementsByTagNameNS(LOM_namespace, 'role') 135 if role_nodes: 136 source_nodes = role_nodes[0].getElementsByTagNameNS(LOM_namespace, 'source') 137 if source_nodes: 138 langstring_nodes = source_nodes[0].getElementsByTagNameNS(LOM_namespace, 'string') 139 if langstring_nodes: 140 source = self.getTextValue(langstring_nodes[0]) 141 value_nodes = role_nodes[0].getElementsByTagNameNS(LOM_namespace, 'value') 142 if value_nodes: 143 langstring_nodes = value_nodes[0].getElementsByTagNameNS(LOM_namespace, 'string') 144 if langstring_nodes: 145 value = self.getTextValue(langstring_nodes[0]) 146 147 centity_nodes = contribute_node.getElementsByTagNameNS(LOM_namespace, 'entity') 148 for centity_node in centity_nodes: 149 for cnode in centity_node.childNodes: 150 if cnode.nodeType == cnode.ELEMENT_NODE: 151 name, email = self.reader.getVcardValues(cnode, resid) 152 if value: 153 vlist.append((name, email)) 154 155 date_nodes = contribute_node.getElementsByTagNameNS(LOM_namespace, 'date') 156 if date_nodes: 157 datetime_nodes = date_nodes[0].getElementsByTagNameNS(LOM_namespace, 'datetime') 158 if datetime_nodes: 159 datetime = self.getTextValue(datetime_nodes[0]) 160 161 # Creator 162 if 'author' == value.lower() and vlist: 163 metadata['creators'] = [x[0] for x in vlist] 164 if date: 165 md['creation_date'] = date 166 167 # Contributors 168 if 'unknown' == value.lower() and vlist: 169 md['contributors'] = [x[0] for x in vlist] 170 171 172 def readMetaMetadata(self, metadata, md): 173 """ Read Meta-metadata node """ 174 pass 175 176 177 def readTechnical(self, metadata, md): 178 """ Read Technical node """ 179 tec_node = None 180 tec_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'technical') 181 182 if tec_nodes: 183 tec_node = tec_nodes[0] 184 185 format_nodes = tec_node.getElementsByTagNameNS(LOM_namespace, 'format') 186 if format_nodes: 187 format = self.getTextValue(format_nodes[0]) 188 if format: 189 md['Format'] = format 190 191 def readRights(self, metadata, md): 192 """ Read Rights node """ 193 194 rights_node = None 195 rights_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'rights') 196 197 if rights_nodes: 198 rights_node = rights_nodes[0] 199 description_nodes = rights_node.getElementsByTagNameNS(LOM_namespace,'description') 200 if description_nodes: 201 langstring_nodes = description_nodes[0].getElementsByTagNameNS(LOM_namespace,'langstring') 202 if langstring_nodes: 203 description = self.getTextValue(langstring_nodes[0]) 204 if description: 205 md['rights'] = description 206 207 def getLinkInfo(self, linkfile): 208 """ Read an anchor tag and return the link """ 209 linkdict = {} 210 doc = minidom.parseString(linkfile) 211 title = '' 212 link = '' 213 link_nodes = doc.getElementsByTagNameNS(WL_namespace, 'webLink') 214 if link_nodes: 215 title_nodes = link_nodes[0].getElementsByTagName('title') 216 url_nodes = link_nodes[0].getElementsByTagName('url') 217 if title_nodes: 218 title = self.getTextValue(title_nodes[0]) 219 if url_nodes: 220 link = url_nodes[0].getAttribute('href') 221 222 return title, link 223 224 -
collective.imstransport/trunk/collective/imstransport/utilities/imscc/imsccreader.py
r197 r199 5 5 from collective.imstransport.utilities.packagingio import ZipfileReader 6 6 from collective.imstransport.utilities.imscc.ccreader import CCReader 7 from collective.imstransport.utilities.imscc.ccresourcereader import CCResourceReader8 import re9 7 from collective.imstransport import IMSTransportMessageFactory as _ 10 8 … … 19 17 XML = 'http://www.w3.org/XML/1998/namespace' 20 18 IMSCP = 'http://www.imsglobal.org/xsd/imscc/imscp_v1p1' 21 LOM = 'http://ltsc.ieee.org/xsd/LOM'22 WL = 'http://www.imsglobal.org/xsd/imswl_v1p0'23 19 24 20 def readPackage(self, file, context): 25 21 """ Read the manifest """ 26 27 ## ccr = CCReader()28 ## doc = ccr.parseManifest(source)29 ## resids = ccr.readResources()30 ## for x in resids:31 ## ccrr = CCResourceReader()32 ## resdata = ccrr.parseResourceData(doc, x)33 22 34 23 source = ZipfileReader(file) … … 46 35 'Could not locate manifest file "imsmanifest.xml" in the zip archive.' 47 36 48 ccreader.parseManifest(manifest) 49 orgdata = ccreader.readOrganizations() 50 resourceids = ccreader.readResources() 37 doc = ccreader.parseManifest(manifest) 51 38 52 for resourceid in resourceids: 39 orgs = ccreader.readOrganizations(doc) 40 resources = ccreader.readResources(doc) 53 41 54 ccresourcereader = CCResourceReader(ccreader, resourceid) 55 resdata = self._parseResourceMetadata(ccresourcereader, resourceid) 56 reshref = ccresourcereader.getHref() 57 cctype = ccresourcereader.getCCType() 58 files = ccresourcereader.readFiles() 59 hashref = resourceid 42 for x in resources: 43 resid, restype, reshref = ccreader.readResourceAttributes(x) 44 metadata = ccreader.readMetadata(x) 45 files = ccreader.readFiles(x) 46 # If the type is a link 47 if restype == 'imswl_xmlv1p0': 48 for y in files: 49 hash = resid + y 50 objDict[hash] = metadata 51 id = self._createIdFromFile(y) 52 objDict[hash]['id'] = id.replace('.xml','') 53 objDict[hash]['path'] = self.createPathFromFile(y) 54 linkfile = source.readFile(y) 55 title, location = ccreader.getLinkInfo(linkfile) 56 objDict[hash]['type'] = 'Link' 57 objDict[hash]['title'] = title 58 objDict[hash]['remoteUrl'] = location 59 # If the type is a file 60 elif restype == 'webcontent': 61 for y in files: 62 hash = resid + y 63 # If there is only one file, or it matches the reshref 64 # add the metadata to it if it exists 65 if y == reshref or len(files) == 1: 66 objDict[hash] = metadata 67 # If it is listed in the org section 68 if orgs.has_key(resid): 69 objDict[hash]['position'] = orgs[resid][0] 70 if orgs[resid][1]: 71 objDict[hash]['title'] = orgs[resid][1] 72 objDict[hash]['excludeFromNav'] = False 73 else: 74 objDict[hash]['excludeFromNav'] = True 75 # If it is just a lowly file 76 else: 77 objDict[hash] = {} 78 objDict[hash]['excludeFromNav'] = True 79 objDict[hash]['file'] = file 80 objDict[hash]['type'] = self.determineType(objDict[hash], file) 81 # Add to all files 82 objDict[hash]['id'] = self._createIdFromFile(y) 83 objDict[hash]['path'] = self.createPathFromFile(y) 60 84 61 if files and cctype == 'imswl_xmlv1p0':62 file = files[0]63 64 id = self._createIdFromFile(file)65 id = id.replace('.xml','')66 path = self._createPathFromFile(file)67 68 objDict[hashref] = resdata69 objDict[hashref]['type'] = 'Link'70 if not objDict[hashref].has_key('path'):71 objDict[hashref]['path'] = path72 if not objDict[hashref].has_key('id'):73 objDict[hashref]['id'] = id74 75 linkxml = source.readFile(file)76 linktuple = ccresourcereader.getLink(self.WL, 'webLink', linkxml)77 78 objDict[hashref]['title'] = linktuple[0]79 objDict[hashref]['remoteUrl'] = linktuple[1]80 81 elif cctype == 'webcontent':82 if files:83 for file in files:84 85 id = self._createIdFromFile(file)86 path = self._createPathFromFile(file)87 88 if reshref == file or len(files) == 1:89 hashref = resourceid90 objDict[hashref] = resdata91 92 93 # Check if item is in organizations section94 orgs = [org for org in orgdata if org]95 if hashref in orgs:96 objDict[hashref]['excludeFromNav'] = False97 objDict[hashref]['title'] = orgdata[hashref]98 else:99 objDict[hashref]['excludeFromNav'] = True100 101 else:102 hashref = '%s%s' %(resourceid, file)103 objDict[hashref] = {}104 objDict[hashref]['excludeFromNav'] = True105 106 107 self._parseFile(file, objDict, hashref, id, path)108 109 110 85 objcreator = getUtility(IIMSObjectCreator) 111 86 objcreator.createObjects(objDict, context, source) 112 return87 113 88 114 89 -
collective.imstransport/trunk/collective/imstransport/utilities/imsinterchange.py
r197 r199 21 21 # Helper functions for readPackage 22 22 23 def _createIdFromFile(self, file):23 def createIdFromFile(self, file): 24 24 """ Get Id from file path """ 25 25 return file.split('/')[-1] 26 26 27 def _createPathFromFile(self, file):27 def createPathFromFile(self, file): 28 28 """ Get folder path from file path """ 29 29 return '/'.join(file.split('/')[:-1]) 30 30 31 def _parseResourceMetadata(self, resourcereader, resourceid): 32 """ Read the resource metadata """ 33 34 resourcereader.readGeneral() 35 resourcereader.readLifecycle() 36 resourcereader.readMetaMetadata() 37 resourcereader.readTechnical() 38 resourcereader.readRights() 39 customnode = resourcereader.getCustomData('', '') 40 if customnode: 41 metadict = resourcereader.readCustomMetadata(customnode) 42 resourcereader.appendCustomData(metadict) 43 return resourcereader.processResourceMetadata() 44 45 def _parseFile(self, file, objDict, hashref, id, path): 46 """ parse a file object and add data to it """ 47 objDict[hashref]['file'] = file 48 objDict[hashref]['id'] = id 49 objDict[hashref]['path'] = path 50 objDict[hashref]['type'] = self._determineType(hashref, objDict, file) 51 if objDict[hashref].has_key('title') == False: 52 objDict[hashref]['title'] = id 53 54 def _readCustomData(self, prefix, ns, location): 55 """ 56 Hook for reading custom metadata for additional metadata requirements. 57 Should return a dictionary of values representing attribute name and value. 58 """ 59 return {} 60 61 def _determineType(self, hashref, objDict, filename): 62 """ Determine the type of the incoming object """ 63 64 result = None 65 if objDict[hashref].has_key('type') and objDict[hashref]['type']: 66 result = objDict[hashref]['type'] 67 elif objDict[hashref].has_key('Format') and objDict[hashref]['Format'] in ['text/html', 'text/htm' 'text/plain' 'text/x-rst', 'text/structured']: 31 def determineType(self, item, fn): 32 result = 'File' 33 docmimetypes = ['text/html', 'text/htm' 'text/plain' 'text/x-rst', 'text/structured'] 34 35 if item.has_key('type'): 36 result = type 37 elif item.has_key('Format') and item['Format'] in docmimetypes: 68 38 result = 'Document' 69 elif objDict[hashref].has_key('Format') and re.match('^image', objDict[hashref]['Format']):39 elif item.has_key('Format') and 'image' in item['Format']: 70 40 result = 'Image' 71 41 else: … … 75 45 mimetype = mtr.lookupExtension(filename) 76 46 77 if mimetype and mimetype.major() == 'text' and mimetype.minor() == 'html':47 if mimetype in docmimetypes: 78 48 result = 'Document' 79 elif mimetype and mimetype.major() == 'image':49 elif 'image' in mimetype: 80 50 result = 'Image' 81 else: 82 result = 'File' 83 84 return result 85 51 return result 86 52 87 53 class IMSInterchangeReader(object):
Note: See TracChangeset
for help on using the changeset viewer.
