| 1 | from xml.dom import minidom |
|---|
| 2 | from collective.imstransport.IMS_exceptions import ManifestError |
|---|
| 3 | from configbb import LOM_BB_namespace, XML_namespace |
|---|
| 4 | |
|---|
| 5 | class BBReader(object): |
|---|
| 6 | |
|---|
| 7 | def parseManifest(self, manifest): |
|---|
| 8 | """ parse the manifest """ |
|---|
| 9 | return self.parseDataFile(manifest) |
|---|
| 10 | |
|---|
| 11 | def parseDataFile(self, dataxml): |
|---|
| 12 | """ Parse the datafile """ |
|---|
| 13 | return minidom.parseString(dataxml) |
|---|
| 14 | |
|---|
| 15 | def readOrganizations(self, manifest): |
|---|
| 16 | """ Read the organizations for the manifest """ |
|---|
| 17 | orgs = {} |
|---|
| 18 | organizations = manifest.getElementsByTagName('organizations') |
|---|
| 19 | if organizations: |
|---|
| 20 | organization_nodes = organizations[0].getElementsByTagName('organization') |
|---|
| 21 | if organization_nodes: |
|---|
| 22 | organization_node = organization_nodes[0] |
|---|
| 23 | item_nodes = organization_nodes[0].getElementsByTagName('item') |
|---|
| 24 | for item in item_nodes: |
|---|
| 25 | idref = item.getAttribute('identifierref') |
|---|
| 26 | titlenodes = item.getElementsByTagName('title') |
|---|
| 27 | if titlenodes: |
|---|
| 28 | title = self.getTextValue(titlenodes[0]) |
|---|
| 29 | orgs[idref] = title |
|---|
| 30 | return orgs |
|---|
| 31 | |
|---|
| 32 | def readResources(self, manifest): |
|---|
| 33 | """ Read all resources. """ |
|---|
| 34 | reslist = [] |
|---|
| 35 | resources = manifest.getElementsByTagName('resources') |
|---|
| 36 | if resources: |
|---|
| 37 | reslist = resources[0].getElementsByTagName('resource') |
|---|
| 38 | return reslist |
|---|
| 39 | |
|---|
| 40 | def getTextValue(self, node): |
|---|
| 41 | """ Removes the text from the text_node of a node """ |
|---|
| 42 | for x in node.childNodes: |
|---|
| 43 | if x.nodeType == x.TEXT_NODE: |
|---|
| 44 | return x.nodeValue.strip() |
|---|
| 45 | return None |
|---|
| 46 | |
|---|
| 47 | def readResourceAttributes(self, resource): |
|---|
| 48 | """ Return attributes on resource node. """ |
|---|
| 49 | return (resource.getAttribute('identifier'), |
|---|
| 50 | resource.getAttribute('type'), |
|---|
| 51 | resource.getAttributeNS(LOM_BB_namespace, 'file'), |
|---|
| 52 | resource.getAttributeNS(LOM_BB_namespace, 'title'), |
|---|
| 53 | resource.getAttributeNS(XML_namespace, 'base')) |
|---|
| 54 | |
|---|
| 55 | def readFiles(self, resource): |
|---|
| 56 | files = [] |
|---|
| 57 | flns = resource.getElementsByTagName('file') |
|---|
| 58 | if flns: |
|---|
| 59 | for fln in flns: |
|---|
| 60 | file = fln.getAttribute('href') |
|---|
| 61 | files.append(file) |
|---|
| 62 | return files |
|---|
| 63 | |
|---|
| 64 | def readMetadata(self, content): |
|---|
| 65 | """ Read metadata from data files """ |
|---|
| 66 | md = {} |
|---|
| 67 | self.readContentMetadata(content, md) |
|---|
| 68 | return md |
|---|
| 69 | |
|---|
| 70 | def readContentMetadata(self, metadata, md): |
|---|
| 71 | """ Read the metadata from a content file """ |
|---|
| 72 | content_nodes = metadata.getElementsByTagName('CONTENT') |
|---|
| 73 | if content_nodes: |
|---|
| 74 | content_node = content_nodes[0] |
|---|
| 75 | title_nodes = content_node.getElementsByTagName('TITLE') |
|---|
| 76 | if title_nodes: |
|---|
| 77 | title = title_nodes[0].getAttribute('value') |
|---|
| 78 | md['title'] = title |
|---|
| 79 | body_nodes = content_node.getElementsByTagName('BODY') |
|---|
| 80 | if body_nodes: |
|---|
| 81 | text_nodes = body_nodes[0].getElementsByTagName('TEXT') |
|---|
| 82 | if text_nodes: |
|---|
| 83 | md['text'] = self.getTextValue(text_nodes[0]) |
|---|
| 84 | date_nodes = content_node.getElementsByTagName('DATES') |
|---|
| 85 | if date_nodes: |
|---|
| 86 | created_nodes = date_nodes[0].getElementsByTagName('CREATED') |
|---|
| 87 | if created_nodes: |
|---|
| 88 | md['creation_date'] = created_nodes[0].getAttribute('value') |
|---|
| 89 | flag_nodes = content_node.getElementsByTagName('FLAGS') |
|---|
| 90 | if flag_nodes: |
|---|
| 91 | isfolder_nodes = flag_nodes[0].getElementsByTagName('ISFOLDER') |
|---|
| 92 | if isfolder_nodes: |
|---|
| 93 | value = isfolder_nodes[0].getAttribute('value') |
|---|
| 94 | if value == 'true': |
|---|
| 95 | md['type'] = 'Folder' |
|---|
| 96 | |
|---|
| 97 | def readTocItem(self, manifest, resid): |
|---|
| 98 | """ Read the toc page and find child nodes """ |
|---|
| 99 | tocitems = [] |
|---|
| 100 | organizations = manifest.getElementsByTagName('organizations') |
|---|
| 101 | if organizations: |
|---|
| 102 | organization_nodes = organizations[0].getElementsByTagName('organization') |
|---|
| 103 | if organization_nodes: |
|---|
| 104 | organization_node = organization_nodes[0] |
|---|
| 105 | item_nodes = organization_nodes[0].getElementsByTagName('item') |
|---|
| 106 | for item in item_nodes: |
|---|
| 107 | idref = item.getAttribute('identifierref') |
|---|
| 108 | if idref == resid: |
|---|
| 109 | childitems = item.getElementsByTagName('item') |
|---|
| 110 | for x in childitems: |
|---|
| 111 | itemid = x.getAttribute('identifierref') |
|---|
| 112 | tocitems.append(itemid) |
|---|
| 113 | return tocitems |
|---|
| 114 | |
|---|
| 115 | |
|---|
| 116 | |
|---|
| 117 | |
|---|
| 118 | |
|---|
| 119 | |
|---|
| 120 | |
|---|
| 121 | |
|---|
| 122 | |
|---|
| 123 | |
|---|
| 124 | |
|---|
| 125 | |
|---|
| 126 | |
|---|