| 1 | from xml.dom import minidom |
|---|
| 2 | from collective.imstransport.IMS_exceptions import ManifestError |
|---|
| 3 | |
|---|
| 4 | LOM_namespace = 'http://ltsc.ieee.org/xsd/LOM' |
|---|
| 5 | LOM_IMSCC_namespace = 'http://ltsc.ieee.org/xsd/imscc/LOM' |
|---|
| 6 | WL_namespace = 'http://www.imsglobal.org/xsd/imswl_v1p0' |
|---|
| 7 | |
|---|
| 8 | class CCReader(object): |
|---|
| 9 | |
|---|
| 10 | def parseManifest(self, manifest): |
|---|
| 11 | """ parse the manifest """ |
|---|
| 12 | return minidom.parseString(manifest) |
|---|
| 13 | |
|---|
| 14 | def readPackageMetadata(self, manifest, pmd): |
|---|
| 15 | """ Read the package metadata """ |
|---|
| 16 | metadata = manifest.getElementsByTagName('metadata') |
|---|
| 17 | if metadata: |
|---|
| 18 | lomcc = metadata[0].getElementsByTagNameNS(LOM_IMSCC_namespace, 'lom') |
|---|
| 19 | if lomcc: |
|---|
| 20 | gen_nodes = lomcc[0].getElementsByTagName('general') |
|---|
| 21 | if gen_nodes: |
|---|
| 22 | title_nodes = gen_nodes[0].getElementsByTagName('title') |
|---|
| 23 | if title_nodes: |
|---|
| 24 | langstring_nodes = title_nodes[0].getElementsByTagName('string') |
|---|
| 25 | if langstring_nodes: |
|---|
| 26 | title = self.getTextValue(langstring_nodes[0]) |
|---|
| 27 | if title: |
|---|
| 28 | pmd['title'] = title |
|---|
| 29 | language_nodes = gen_nodes[0].getElementsByTagName('language') |
|---|
| 30 | if language_nodes: |
|---|
| 31 | lang = self.getTextValue(language_nodes[0]) |
|---|
| 32 | if lang: |
|---|
| 33 | pmd['language'] = lang |
|---|
| 34 | desc_nodes = gen_nodes[0].getElementsByTagName('description') |
|---|
| 35 | if desc_nodes: |
|---|
| 36 | langstring_nodes = desc_nodes[0].getElementsByTagName('string') |
|---|
| 37 | if langstring_nodes: |
|---|
| 38 | description = self.getTextValue(langstring_nodes[0]) |
|---|
| 39 | if description: |
|---|
| 40 | pmd['description'] = description |
|---|
| 41 | kw_nodes = gen_nodes[0].getElementsByTagName('keyword') |
|---|
| 42 | if kw_nodes: |
|---|
| 43 | kw_lang_nodes = kw_nodes[0].getElementsByTagName('string') |
|---|
| 44 | kw_list = [] |
|---|
| 45 | if kw_lang_nodes: |
|---|
| 46 | for lang_node in kw_lang_nodes: |
|---|
| 47 | kw = self.getTextValue(lang_node) |
|---|
| 48 | if kw: |
|---|
| 49 | kw_list.append(kw) |
|---|
| 50 | if kw_list: |
|---|
| 51 | pmd['subject'] = kw_list |
|---|
| 52 | |
|---|
| 53 | def readOrganizations(self, manifest): |
|---|
| 54 | """ Read the organizations section of the manifest. """ |
|---|
| 55 | orgs = {} |
|---|
| 56 | organizations = manifest.getElementsByTagName('organizations') |
|---|
| 57 | if organizations: |
|---|
| 58 | organization_nodes = organizations[0].getElementsByTagName('organization') |
|---|
| 59 | if organization_nodes: |
|---|
| 60 | organization_node = organization_nodes[0] |
|---|
| 61 | item_nodes = organization_nodes[0].getElementsByTagName('item') |
|---|
| 62 | itemnum = 1 |
|---|
| 63 | for item in item_nodes: |
|---|
| 64 | idref = item.getAttribute('identifierref') |
|---|
| 65 | titlenodes = item.getElementsByTagName('title') |
|---|
| 66 | if titlenodes: |
|---|
| 67 | orgs[idref] = (itemnum, self.getTextValue(titlenodes[0])) |
|---|
| 68 | else: |
|---|
| 69 | orgs[idref] = (itemnum, None) |
|---|
| 70 | itemnum += 1 |
|---|
| 71 | |
|---|
| 72 | return orgs |
|---|
| 73 | |
|---|
| 74 | def readResources(self, manifest): |
|---|
| 75 | """ Read all resources. """ |
|---|
| 76 | reslist = [] |
|---|
| 77 | resources = manifest.getElementsByTagName('resources') |
|---|
| 78 | if resources: |
|---|
| 79 | reslist = resources[0].getElementsByTagName('resource') |
|---|
| 80 | return reslist |
|---|
| 81 | |
|---|
| 82 | def getTextValue(self, node): |
|---|
| 83 | """ Removes the text from the text_node of a node """ |
|---|
| 84 | |
|---|
| 85 | for x in node.childNodes: |
|---|
| 86 | if x.nodeType == x.TEXT_NODE: |
|---|
| 87 | return x.nodeValue.strip() |
|---|
| 88 | return None |
|---|
| 89 | |
|---|
| 90 | def readResourceAttributes(self, resource): |
|---|
| 91 | """ Return attributes on resource node. """ |
|---|
| 92 | return (resource.getAttribute('identifier'), |
|---|
| 93 | resource.getAttribute('type'), |
|---|
| 94 | resource.getAttribute('href')) |
|---|
| 95 | |
|---|
| 96 | def readMetadata(self, metadata): |
|---|
| 97 | md = {} |
|---|
| 98 | self.readGeneral(metadata, md) |
|---|
| 99 | self.readLifecycle(metadata, md) |
|---|
| 100 | self.readMetaMetadata(metadata, md) |
|---|
| 101 | self.readTechnical(metadata, md) |
|---|
| 102 | self.readRights(metadata, md) |
|---|
| 103 | return md |
|---|
| 104 | |
|---|
| 105 | def readFiles(self, resource): |
|---|
| 106 | files = [] |
|---|
| 107 | flns = resource.getElementsByTagName('file') |
|---|
| 108 | if flns: |
|---|
| 109 | for fln in flns: |
|---|
| 110 | file = fln.getAttribute('href') |
|---|
| 111 | files.append(file) |
|---|
| 112 | return files |
|---|
| 113 | |
|---|
| 114 | def readGeneral(self, metadata, md): |
|---|
| 115 | """ Read general node """ |
|---|
| 116 | |
|---|
| 117 | gen_node = None |
|---|
| 118 | gen_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'general') |
|---|
| 119 | |
|---|
| 120 | if gen_nodes: |
|---|
| 121 | gen_node = gen_nodes[0] |
|---|
| 122 | |
|---|
| 123 | title_nodes = gen_node.getElementsByTagNameNS(LOM_namespace, 'title') |
|---|
| 124 | if title_nodes: |
|---|
| 125 | langstring_nodes = title_nodes[0].getElementsByTagNameNS(LOM_namespace, |
|---|
| 126 | 'string') |
|---|
| 127 | if langstring_nodes: |
|---|
| 128 | title = self.getTextValue(langstring_nodes[0]) |
|---|
| 129 | if title: |
|---|
| 130 | md['title'] = title |
|---|
| 131 | |
|---|
| 132 | if not md.has_key('title'): |
|---|
| 133 | raise ManifestError, 'Required tag "title" missing in lom/general metadata section for resource.' |
|---|
| 134 | |
|---|
| 135 | language_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'language') |
|---|
| 136 | if language_nodes: |
|---|
| 137 | lang = self.getTextValue(language_nodes[0]) |
|---|
| 138 | if lang: |
|---|
| 139 | md['language'] = lang |
|---|
| 140 | |
|---|
| 141 | desc_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'description') |
|---|
| 142 | if desc_nodes: |
|---|
| 143 | langstring_nodes = desc_nodes[0].getElementsByTagNameNS(LOM_namespace, |
|---|
| 144 | 'string') |
|---|
| 145 | if langstring_nodes: |
|---|
| 146 | description = self.getTextValue(langstring_nodes[0]) |
|---|
| 147 | if description: |
|---|
| 148 | md['description'] = description |
|---|
| 149 | |
|---|
| 150 | kw_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'keyword') |
|---|
| 151 | if kw_nodes: |
|---|
| 152 | kw_lang_nodes = kw_nodes[0].getElementsByTagNameNS(LOM_namespace,'string') |
|---|
| 153 | kw_list = [] |
|---|
| 154 | if kw_lang_nodes: |
|---|
| 155 | for lang_node in kw_lang_nodes: |
|---|
| 156 | kw = self.getTextValue(lang_node) |
|---|
| 157 | if kw: |
|---|
| 158 | kw_list.append(kw) |
|---|
| 159 | if kw_list: |
|---|
| 160 | md['subject'] = kw_list |
|---|
| 161 | |
|---|
| 162 | |
|---|
| 163 | def readLifecycle(self, metadata, md): |
|---|
| 164 | """ Read Lifecycle node """ |
|---|
| 165 | lc_node = None |
|---|
| 166 | lc_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'lifeCycle') |
|---|
| 167 | |
|---|
| 168 | if lc_nodes: |
|---|
| 169 | lc_node = lc_nodes[0] |
|---|
| 170 | |
|---|
| 171 | if lc_node: |
|---|
| 172 | # Lifecycle Node |
|---|
| 173 | contribute_nodes = lc_node.getElementsByTagNameNS(LOM_namespace, 'contribute') |
|---|
| 174 | |
|---|
| 175 | # For each contribute node there is a role node, a centity node, and possibly a date node |
|---|
| 176 | for contribute_node in contribute_nodes: |
|---|
| 177 | source = '' |
|---|
| 178 | value = '' |
|---|
| 179 | vlist = [] |
|---|
| 180 | datetime = '' |
|---|
| 181 | |
|---|
| 182 | role_nodes = contribute_node.getElementsByTagNameNS(LOM_namespace, 'role') |
|---|
| 183 | if role_nodes: |
|---|
| 184 | source_nodes = role_nodes[0].getElementsByTagNameNS(LOM_namespace, 'source') |
|---|
| 185 | if source_nodes: |
|---|
| 186 | langstring_nodes = source_nodes[0].getElementsByTagNameNS(LOM_namespace, 'string') |
|---|
| 187 | if langstring_nodes: |
|---|
| 188 | source = self.getTextValue(langstring_nodes[0]) |
|---|
| 189 | value_nodes = role_nodes[0].getElementsByTagNameNS(LOM_namespace, 'value') |
|---|
| 190 | if value_nodes: |
|---|
| 191 | langstring_nodes = value_nodes[0].getElementsByTagNameNS(LOM_namespace, 'string') |
|---|
| 192 | if langstring_nodes: |
|---|
| 193 | value = self.getTextValue(langstring_nodes[0]) |
|---|
| 194 | |
|---|
| 195 | entity_nodes = contribute_node.getElementsByTagNameNS(LOM_namespace, 'entity') |
|---|
| 196 | for entity_node in entity_nodes: |
|---|
| 197 | name, email = self.getVcardValues(entity_node) |
|---|
| 198 | if value: |
|---|
| 199 | vlist.append((name, email)) |
|---|
| 200 | |
|---|
| 201 | date_nodes = contribute_node.getElementsByTagNameNS(LOM_namespace, 'date') |
|---|
| 202 | if date_nodes: |
|---|
| 203 | datetime_nodes = date_nodes[0].getElementsByTagNameNS(LOM_namespace, 'dateTime') |
|---|
| 204 | if datetime_nodes: |
|---|
| 205 | datetime = self.getTextValue(datetime_nodes[0]) |
|---|
| 206 | |
|---|
| 207 | # Creator |
|---|
| 208 | if 'author' == value.lower() and vlist: |
|---|
| 209 | md['creators'] = [x[0] for x in vlist] |
|---|
| 210 | if datetime: |
|---|
| 211 | md['creation_date'] = datetime |
|---|
| 212 | |
|---|
| 213 | # Contributors |
|---|
| 214 | if 'unknown' == value.lower() and vlist: |
|---|
| 215 | md['contributors'] = [x[0] for x in vlist] |
|---|
| 216 | |
|---|
| 217 | |
|---|
| 218 | def readMetaMetadata(self, metadata, md): |
|---|
| 219 | """ Read Meta-metadata node """ |
|---|
| 220 | pass |
|---|
| 221 | |
|---|
| 222 | |
|---|
| 223 | def readTechnical(self, metadata, md): |
|---|
| 224 | """ Read Technical node """ |
|---|
| 225 | tec_node = None |
|---|
| 226 | tec_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'technical') |
|---|
| 227 | |
|---|
| 228 | if tec_nodes: |
|---|
| 229 | tec_node = tec_nodes[0] |
|---|
| 230 | |
|---|
| 231 | format_nodes = tec_node.getElementsByTagNameNS(LOM_namespace, 'format') |
|---|
| 232 | if format_nodes: |
|---|
| 233 | format = self.getTextValue(format_nodes[0]) |
|---|
| 234 | if format: |
|---|
| 235 | md['Format'] = format |
|---|
| 236 | |
|---|
| 237 | def readRights(self, metadata, md): |
|---|
| 238 | """ Read Rights node """ |
|---|
| 239 | |
|---|
| 240 | rights_node = None |
|---|
| 241 | rights_nodes = metadata.getElementsByTagNameNS(LOM_namespace, 'rights') |
|---|
| 242 | |
|---|
| 243 | if rights_nodes: |
|---|
| 244 | rights_node = rights_nodes[0] |
|---|
| 245 | description_nodes = rights_node.getElementsByTagNameNS(LOM_namespace,'description') |
|---|
| 246 | if description_nodes: |
|---|
| 247 | langstring_nodes = description_nodes[0].getElementsByTagNameNS(LOM_namespace,'string') |
|---|
| 248 | if langstring_nodes: |
|---|
| 249 | description = self.getTextValue(langstring_nodes[0]) |
|---|
| 250 | if description: |
|---|
| 251 | md['rights'] = description |
|---|
| 252 | |
|---|
| 253 | def getLinkInfo(self, linkfile): |
|---|
| 254 | """ Read an anchor tag and return the link """ |
|---|
| 255 | linkdict = {} |
|---|
| 256 | doc = minidom.parseString(linkfile) |
|---|
| 257 | title = '' |
|---|
| 258 | link = '' |
|---|
| 259 | link_nodes = doc.getElementsByTagNameNS(WL_namespace, 'webLink') |
|---|
| 260 | if link_nodes: |
|---|
| 261 | title_nodes = link_nodes[0].getElementsByTagName('title') |
|---|
| 262 | url_nodes = link_nodes[0].getElementsByTagName('url') |
|---|
| 263 | if title_nodes: |
|---|
| 264 | title = self.getTextValue(title_nodes[0]) |
|---|
| 265 | if url_nodes: |
|---|
| 266 | link = url_nodes[0].getAttribute('href') |
|---|
| 267 | |
|---|
| 268 | return title, link |
|---|
| 269 | |
|---|
| 270 | |
|---|
| 271 | def getVcardValues(self, node): |
|---|
| 272 | """ |
|---|
| 273 | Looks for the full name and email values in a VCARD |
|---|
| 274 | value. |
|---|
| 275 | """ |
|---|
| 276 | text = self.getTextValue(node) |
|---|
| 277 | textlines = text.strip().split('\n') |
|---|
| 278 | |
|---|
| 279 | value = self.getVcardValue('BEGIN', [textlines[0]]) |
|---|
| 280 | if 'VCARD' != value.strip().upper(): |
|---|
| 281 | raise ManifestError, 'Missing VCARD BEGIN tag' |
|---|
| 282 | |
|---|
| 283 | value = self.getVcardValue('END', [textlines[-1]]) |
|---|
| 284 | if 'VCARD' != value.strip().upper(): |
|---|
| 285 | raise ManifestError, 'Missing VCARD END tag' |
|---|
| 286 | |
|---|
| 287 | name = self.getVcardValue('FN', textlines) |
|---|
| 288 | email = self.getVcardValue('EMAIL;INTERNET', textlines) |
|---|
| 289 | |
|---|
| 290 | return name, email |
|---|
| 291 | |
|---|
| 292 | |
|---|
| 293 | def getVcardValue(self, field, text): |
|---|
| 294 | """ Try to get a value for a VCARD field. """ |
|---|
| 295 | for textline in text: |
|---|
| 296 | textline = textline.strip() |
|---|
| 297 | if textline.find(':'): |
|---|
| 298 | tag = textline.split(':') |
|---|
| 299 | if field == tag[0].upper().strip(): |
|---|
| 300 | return tag[1].strip() |
|---|
| 301 | else: |
|---|
| 302 | return '' |
|---|
| 303 | |
|---|
| 304 | |
|---|