| 1 | from xml.dom import minidom |
|---|
| 2 | from collective.imstransport.IMS_exceptions import ManifestError |
|---|
| 3 | from configcc import LOM_namespace, LOM_IMSCC_namespace, WL_namespace |
|---|
| 4 | |
|---|
| 5 | class CCReader(object): |
|---|
| 6 | |
|---|
| 7 | def parseManifest(self, manifest): |
|---|
| 8 | """ parse the manifest """ |
|---|
| 9 | return minidom.parseString(manifest) |
|---|
| 10 | |
|---|
| 11 | def readPackageMetadata(self, manifest, pmd): |
|---|
| 12 | """ Read the package metadata """ |
|---|
| 13 | metadata = manifest.getElementsByTagName('metadata') |
|---|
| 14 | if metadata: |
|---|
| 15 | lomcc = metadata[0].getElementsByTagNameNS(LOM_IMSCC_namespace, 'lom') |
|---|
| 16 | if lomcc: |
|---|
| 17 | gen_nodes = lomcc[0].getElementsByTagName('general') |
|---|
| 18 | if gen_nodes: |
|---|
| 19 | title_nodes = gen_nodes[0].getElementsByTagName('title') |
|---|
| 20 | if title_nodes: |
|---|
| 21 | langstring_nodes = title_nodes[0].getElementsByTagName('string') |
|---|
| 22 | if langstring_nodes: |
|---|
| 23 | title = self.getTextValue(langstring_nodes[0]) |
|---|
| 24 | if title: |
|---|
| 25 | pmd['title'] = title |
|---|
| 26 | language_nodes = gen_nodes[0].getElementsByTagName('language') |
|---|
| 27 | if language_nodes: |
|---|
| 28 | lang = self.getTextValue(language_nodes[0]) |
|---|
| 29 | if lang: |
|---|
| 30 | pmd['language'] = lang |
|---|
| 31 | desc_nodes = gen_nodes[0].getElementsByTagName('description') |
|---|
| 32 | if desc_nodes: |
|---|
| 33 | langstring_nodes = desc_nodes[0].getElementsByTagName('string') |
|---|
| 34 | if langstring_nodes: |
|---|
| 35 | description = self.getTextValue(langstring_nodes[0]) |
|---|
| 36 | if description: |
|---|
| 37 | pmd['description'] = description |
|---|
| 38 | kw_nodes = gen_nodes[0].getElementsByTagName('keyword') |
|---|
| 39 | if kw_nodes: |
|---|
| 40 | kw_lang_nodes = kw_nodes[0].getElementsByTagName('string') |
|---|
| 41 | kw_list = [] |
|---|
| 42 | if kw_lang_nodes: |
|---|
| 43 | for lang_node in kw_lang_nodes: |
|---|
| 44 | kw = self.getTextValue(lang_node) |
|---|
| 45 | if kw: |
|---|
| 46 | kw_list.append(kw) |
|---|
| 47 | if kw_list: |
|---|
| 48 | pmd['subject'] = kw_list |
|---|
| 49 | |
|---|
| 50 | def readOrganizations(self, manifest): |
|---|
| 51 | """ Read the organizations section of the manifest. """ |
|---|
| 52 | orgs = {} |
|---|
| 53 | organizations = manifest.getElementsByTagName('organizations') |
|---|
| 54 | if organizations: |
|---|
| 55 | organization_nodes = organizations[0].getElementsByTagName('organization') |
|---|
| 56 | if organization_nodes: |
|---|
| 57 | organization_node = organization_nodes[0] |
|---|
| 58 | item_nodes = organization_nodes[0].getElementsByTagName('item') |
|---|
| 59 | itemnum = 1 |
|---|
| 60 | for item in item_nodes: |
|---|
| 61 | idref = item.getAttribute('identifierref') |
|---|
| 62 | titlenodes = item.getElementsByTagName('title') |
|---|
| 63 | if titlenodes: |
|---|
| 64 | orgs[idref] = (itemnum, self.getTextValue(titlenodes[0])) |
|---|
| 65 | else: |
|---|
| 66 | orgs[idref] = (itemnum, None) |
|---|
| 67 | itemnum += 1 |
|---|
| 68 | return orgs |
|---|
| 69 | |
|---|
| 70 | def readResources(self, manifest): |
|---|
| 71 | """ Read all resources. """ |
|---|
| 72 | reslist = [] |
|---|
| 73 | resources = manifest.getElementsByTagName('resources') |
|---|
| 74 | if resources: |
|---|
| 75 | reslist = resources[0].getElementsByTagName('resource') |
|---|
| 76 | return reslist |
|---|
| 77 | |
|---|
| 78 | def getTextValue(self, node): |
|---|
| 79 | """ Removes the text from the text_node of a node """ |
|---|
| 80 | for x in node.childNodes: |
|---|
| 81 | if x.nodeType == x.TEXT_NODE: |
|---|
| 82 | return x.nodeValue.strip() |
|---|
| 83 | return None |
|---|
| 84 | |
|---|
| 85 | def readResourceAttributes(self, resource): |
|---|
| 86 | """ Return attributes on resource node. """ |
|---|
| 87 | return (resource.getAttribute('identifier'), |
|---|
| 88 | resource.getAttribute('type'), |
|---|
| 89 | resource.getAttribute('href')) |
|---|
| 90 | |
|---|
| 91 | def readMetadata(self, metadata): |
|---|
| 92 | md = {} |
|---|
| 93 | self.readGeneral(metadata, md) |
|---|
| 94 | self.readLifecycle(metadata, md) |
|---|
| 95 | self.readMetaMetadata(metadata, md) |
|---|
| 96 | self.readTechnical(metadata, md) |
|---|
| 97 | self.readRights(metadata, md) |
|---|
| 98 | return md |
|---|
| 99 | |
|---|
| 100 | def readFiles(self, resource): |
|---|
| 101 | files = [] |
|---|
| 102 | flns = resource.getElementsByTagName('file') |
|---|
| 103 | if flns: |
|---|
| 104 | for fln in flns: |
|---|
| 105 | file = fln.getAttribute('href') |
|---|
| 106 | files.append(file) |
|---|
| 107 | return files |
|---|
| 108 | |
|---|
| 109 | def readGeneral(self, metadata, md): |
|---|
| 110 | """ Read general node """ |
|---|
| 111 | gen_node = None |
|---|
| 112 | gen_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'general') |
|---|
| 113 | if gen_nodes: |
|---|
| 114 | gen_node = gen_nodes[0] |
|---|
| 115 | title_nodes = gen_node.getElementsByTagNameNS(LOM_namespace, 'title') |
|---|
| 116 | if title_nodes: |
|---|
| 117 | langstring_nodes = title_nodes[0].getElementsByTagNameNS(LOM_namespace,'string') |
|---|
| 118 | if langstring_nodes: |
|---|
| 119 | title = self.getTextValue(langstring_nodes[0]) |
|---|
| 120 | if title: |
|---|
| 121 | md['title'] = title |
|---|
| 122 | if not md.has_key('title'): |
|---|
| 123 | raise ManifestError, 'Required tag "title" missing in lom/general metadata section for resource.' |
|---|
| 124 | language_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'language') |
|---|
| 125 | if language_nodes: |
|---|
| 126 | lang = self.getTextValue(language_nodes[0]) |
|---|
| 127 | if lang: |
|---|
| 128 | md['language'] = lang |
|---|
| 129 | desc_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'description') |
|---|
| 130 | if desc_nodes: |
|---|
| 131 | langstring_nodes = desc_nodes[0].getElementsByTagNameNS(LOM_namespace,'string') |
|---|
| 132 | if langstring_nodes: |
|---|
| 133 | description = self.getTextValue(langstring_nodes[0]) |
|---|
| 134 | if description: |
|---|
| 135 | md['description'] = description |
|---|
| 136 | kw_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'keyword') |
|---|
| 137 | if kw_nodes: |
|---|
| 138 | kw_lang_nodes = kw_nodes[0].getElementsByTagNameNS(LOM_namespace,'string') |
|---|
| 139 | kw_list = [] |
|---|
| 140 | if kw_lang_nodes: |
|---|
| 141 | for lang_node in kw_lang_nodes: |
|---|
| 142 | kw = self.getTextValue(lang_node) |
|---|
| 143 | if kw: |
|---|
| 144 | kw_list.append(kw) |
|---|
| 145 | if kw_list: |
|---|
| 146 | md['subject'] = kw_list |
|---|
| 147 | |
|---|
| 148 | def readLifecycle(self, metadata, md): |
|---|
| 149 | """ Read Lifecycle node """ |
|---|
| 150 | lc_node = None |
|---|
| 151 | lc_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'lifeCycle') |
|---|
| 152 | |
|---|
| 153 | if lc_nodes: |
|---|
| 154 | lc_node = lc_nodes[0] |
|---|
| 155 | |
|---|
| 156 | if lc_node: |
|---|
| 157 | # Lifecycle Node |
|---|
| 158 | contribute_nodes = lc_node.getElementsByTagNameNS(LOM_namespace, 'contribute') |
|---|
| 159 | |
|---|
| 160 | # For each contribute node there is a role node, a centity node, and possibly a date node |
|---|
| 161 | for contribute_node in contribute_nodes: |
|---|
| 162 | source = '' |
|---|
| 163 | value = '' |
|---|
| 164 | vlist = [] |
|---|
| 165 | datetime = '' |
|---|
| 166 | |
|---|
| 167 | role_nodes = contribute_node.getElementsByTagNameNS(LOM_namespace, 'role') |
|---|
| 168 | if role_nodes: |
|---|
| 169 | source_nodes = role_nodes[0].getElementsByTagNameNS(LOM_namespace, 'source') |
|---|
| 170 | if source_nodes: |
|---|
| 171 | langstring_nodes = source_nodes[0].getElementsByTagNameNS(LOM_namespace, 'string') |
|---|
| 172 | if langstring_nodes: |
|---|
| 173 | source = self.getTextValue(langstring_nodes[0]) |
|---|
| 174 | value_nodes = role_nodes[0].getElementsByTagNameNS(LOM_namespace, 'value') |
|---|
| 175 | if value_nodes: |
|---|
| 176 | langstring_nodes = value_nodes[0].getElementsByTagNameNS(LOM_namespace, 'string') |
|---|
| 177 | if langstring_nodes: |
|---|
| 178 | value = self.getTextValue(langstring_nodes[0]) |
|---|
| 179 | |
|---|
| 180 | entity_nodes = contribute_node.getElementsByTagNameNS(LOM_namespace, 'entity') |
|---|
| 181 | for entity_node in entity_nodes: |
|---|
| 182 | name, email = self.getVcardValues(entity_node) |
|---|
| 183 | if value: |
|---|
| 184 | vlist.append((name, email)) |
|---|
| 185 | |
|---|
| 186 | date_nodes = contribute_node.getElementsByTagNameNS(LOM_namespace, 'date') |
|---|
| 187 | if date_nodes: |
|---|
| 188 | datetime_nodes = date_nodes[0].getElementsByTagNameNS(LOM_namespace, 'dateTime') |
|---|
| 189 | if datetime_nodes: |
|---|
| 190 | datetime = self.getTextValue(datetime_nodes[0]) |
|---|
| 191 | |
|---|
| 192 | # Creator |
|---|
| 193 | if 'author' == value.lower() and vlist: |
|---|
| 194 | md['creators'] = [x[0] for x in vlist] |
|---|
| 195 | if datetime: |
|---|
| 196 | md['creation_date'] = datetime |
|---|
| 197 | |
|---|
| 198 | # Contributors |
|---|
| 199 | if 'unknown' == value.lower() and vlist: |
|---|
| 200 | md['contributors'] = [x[0] for x in vlist] |
|---|
| 201 | |
|---|
| 202 | |
|---|
| 203 | def readMetaMetadata(self, metadata, md): |
|---|
| 204 | """ Read Meta-metadata node """ |
|---|
| 205 | pass |
|---|
| 206 | |
|---|
| 207 | |
|---|
| 208 | def readTechnical(self, metadata, md): |
|---|
| 209 | """ Read Technical node """ |
|---|
| 210 | tec_node = None |
|---|
| 211 | tec_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'technical') |
|---|
| 212 | |
|---|
| 213 | if tec_nodes: |
|---|
| 214 | tec_node = tec_nodes[0] |
|---|
| 215 | |
|---|
| 216 | format_nodes = tec_node.getElementsByTagNameNS(LOM_namespace, 'format') |
|---|
| 217 | if format_nodes: |
|---|
| 218 | format = self.getTextValue(format_nodes[0]) |
|---|
| 219 | if format: |
|---|
| 220 | md['Format'] = format |
|---|
| 221 | |
|---|
| 222 | def readRights(self, metadata, md): |
|---|
| 223 | """ Read Rights node """ |
|---|
| 224 | rights_node = None |
|---|
| 225 | rights_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'rights') |
|---|
| 226 | |
|---|
| 227 | if rights_nodes: |
|---|
| 228 | rights_node = rights_nodes[0] |
|---|
| 229 | description_nodes = rights_node.getElementsByTagNameNS(LOM_namespace,'description') |
|---|
| 230 | if description_nodes: |
|---|
| 231 | langstring_nodes = description_nodes[0].getElementsByTagNameNS(LOM_namespace,'string') |
|---|
| 232 | if langstring_nodes: |
|---|
| 233 | description = self.getTextValue(langstring_nodes[0]) |
|---|
| 234 | if description: |
|---|
| 235 | md['rights'] = description |
|---|
| 236 | |
|---|
| 237 | def getLinkInfo(self, linkfile): |
|---|
| 238 | """ Read an anchor tag and return the link """ |
|---|
| 239 | linkdict = {} |
|---|
| 240 | doc = minidom.parseString(linkfile) |
|---|
| 241 | title = '' |
|---|
| 242 | link = '' |
|---|
| 243 | link_nodes = doc.getElementsByTagNameNS(WL_namespace, 'webLink') |
|---|
| 244 | if link_nodes: |
|---|
| 245 | title_nodes = link_nodes[0].getElementsByTagName('title') |
|---|
| 246 | url_nodes = link_nodes[0].getElementsByTagName('url') |
|---|
| 247 | if title_nodes: |
|---|
| 248 | title = self.getTextValue(title_nodes[0]) |
|---|
| 249 | if url_nodes: |
|---|
| 250 | link = url_nodes[0].getAttribute('href') |
|---|
| 251 | |
|---|
| 252 | return title, link |
|---|
| 253 | |
|---|
| 254 | |
|---|
| 255 | def getVcardValues(self, node): |
|---|
| 256 | """ |
|---|
| 257 | Looks for the full name and email values in a VCARD element. |
|---|
| 258 | """ |
|---|
| 259 | text = self.getTextValue(node) |
|---|
| 260 | textlines = text.strip().split('\n') |
|---|
| 261 | |
|---|
| 262 | value = self.getVcardValue('BEGIN', [textlines[0]]) |
|---|
| 263 | if 'VCARD' != value.strip().upper(): |
|---|
| 264 | raise ManifestError, 'Missing VCARD BEGIN tag' |
|---|
| 265 | |
|---|
| 266 | value = self.getVcardValue('END', [textlines[-1]]) |
|---|
| 267 | if 'VCARD' != value.strip().upper(): |
|---|
| 268 | raise ManifestError, 'Missing VCARD END tag' |
|---|
| 269 | |
|---|
| 270 | name = self.getVcardValue('FN', textlines) |
|---|
| 271 | email = self.getVcardValue('EMAIL;INTERNET', textlines) |
|---|
| 272 | |
|---|
| 273 | return name, email |
|---|
| 274 | |
|---|
| 275 | |
|---|
| 276 | def getVcardValue(self, field, text): |
|---|
| 277 | """ Try to get a value for a VCARD field. """ |
|---|
| 278 | for textline in text: |
|---|
| 279 | textline = textline.strip() |
|---|
| 280 | if textline.find(':'): |
|---|
| 281 | tag = textline.split(':') |
|---|
| 282 | if field == tag[0].upper().strip(): |
|---|
| 283 | return tag[1].strip() |
|---|
| 284 | else: |
|---|
| 285 | return '' |
|---|
| 286 | |
|---|
| 287 | |
|---|