| 1 | import os |
|---|
| 2 | import re |
|---|
| 3 | import string |
|---|
| 4 | from BeautifulSoup import BeautifulSoup |
|---|
| 5 | from Globals import BobobaseName |
|---|
| 6 | from os import makedirs as os_makedirs |
|---|
| 7 | from os.path import join as os_join |
|---|
| 8 | from os.path import lexists as os_lexists |
|---|
| 9 | from os.path import split as os_split |
|---|
| 10 | |
|---|
| 11 | from urllib2 import urlopen |
|---|
| 12 | from urllib2 import HTTPError |
|---|
| 13 | |
|---|
| 14 | from OFS.SimpleItem import SimpleItem |
|---|
| 15 | from zope.interface import implements |
|---|
| 16 | from interfaces import IStaticSiteUtility |
|---|
| 17 | from zope.component import getUtility, getMultiAdapter |
|---|
| 18 | |
|---|
| 19 | class StaticSiteUtility(SimpleItem): |
|---|
| 20 | """ Deploy a static site """ |
|---|
| 21 | |
|---|
| 22 | implements(IStaticSiteUtility) |
|---|
| 23 | |
|---|
| 24 | def deploySite(self, context): |
|---|
| 25 | """ Deploy the site """ |
|---|
| 26 | |
|---|
| 27 | ssprops = context.portal_url.portal_properties.staticsite_properties |
|---|
| 28 | domain = ssprops.getProperty('domain') |
|---|
| 29 | dpath = self._getDeploymentPath(ssprops.getProperty('deployment_path')) |
|---|
| 30 | pstates = ssprops.getProperty('published_states') |
|---|
| 31 | |
|---|
| 32 | self._deploySiteStructure(context, ssprops, domain, dpath) |
|---|
| 33 | self._deploySiteActions(context, dpath) |
|---|
| 34 | |
|---|
| 35 | brains = context.portal_catalog.searchResults( |
|---|
| 36 | path={'query':'/'.join(context.getPhysicalPath()), |
|---|
| 37 | 'depth':1,}, |
|---|
| 38 | review_state=pstates) |
|---|
| 39 | for x in brains: |
|---|
| 40 | url = x.getURL() |
|---|
| 41 | self.deployObject(url, x, dpath, domain) |
|---|
| 42 | self.traverse(x, dpath, domain, pstates) |
|---|
| 43 | |
|---|
| 44 | def traverse(self, brain, dpath, domain, pstates): |
|---|
| 45 | """ Traverse the site. """ |
|---|
| 46 | brains = brain.portal_catalog.searchResults( |
|---|
| 47 | path={'query':brain.getPath(), 'depth':1}, |
|---|
| 48 | review_state=pstates) |
|---|
| 49 | for x in brains: |
|---|
| 50 | url = x.getURL() |
|---|
| 51 | self.deployObject(url, x, dpath, domain) |
|---|
| 52 | if x.is_folderish: |
|---|
| 53 | self.traverse(x, dpath, domain, pstates) |
|---|
| 54 | |
|---|
| 55 | def deployObject(self, url, brain, dpath, domain): |
|---|
| 56 | """ Deploy an object """ |
|---|
| 57 | portal_url = brain.portal_url() |
|---|
| 58 | soup = '' |
|---|
| 59 | if brain.is_folderish: |
|---|
| 60 | #create dir before processing html |
|---|
| 61 | objpath = self._getObjPath(url, portal_url, dpath) |
|---|
| 62 | self._createDirectory(objpath) |
|---|
| 63 | path = '%s/index.html' % objpath |
|---|
| 64 | raw = self._httpget(url) |
|---|
| 65 | soup = BeautifulSoup(raw) |
|---|
| 66 | #process document actions |
|---|
| 67 | self._deployDocumentActions(brain, portal_url, path, soup) |
|---|
| 68 | self.runFilters(brain, soup, portal_url, dpath) |
|---|
| 69 | self._writeFile(path, soup.prettify()) |
|---|
| 70 | elif brain.Type in ['Page']: |
|---|
| 71 | path = self._getObjPath(url, portal_url, dpath) |
|---|
| 72 | if '.htm' not in path: |
|---|
| 73 | path += '.html' |
|---|
| 74 | raw = self._httpget(url) |
|---|
| 75 | soup = BeautifulSoup(raw) |
|---|
| 76 | #process document actions |
|---|
| 77 | self._deployDocumentActions(brain, portal_url, path, soup) |
|---|
| 78 | self.runFilters(brain, soup, portal_url, dpath) |
|---|
| 79 | self._writeFile(path, soup.prettify()) |
|---|
| 80 | else: |
|---|
| 81 | #get file object |
|---|
| 82 | path = self._getObjPath(url, portal_url, dpath) |
|---|
| 83 | self.linkTable[url] = {'dpath':path, 'type' : 'file'} |
|---|
| 84 | raw = self._httpget(url) |
|---|
| 85 | soup = BeautifulSoup(raw) |
|---|
| 86 | self.runFilters(brain, soup, portal_url, dpath) |
|---|
| 87 | self._writeFile(path, raw) |
|---|
| 88 | #process rdf for the object |
|---|
| 89 | self._deployRDF(brain, url, path) |
|---|
| 90 | |
|---|
| 91 | def _deployRDF(self, brain, url, path): |
|---|
| 92 | """ Deploy the RDF for an object """ |
|---|
| 93 | rdfurl = '%s/rdf' %url |
|---|
| 94 | rdfpath = '%s-rdf' %path |
|---|
| 95 | if brain.Type in ['Page', 'Course', 'Division', 'Folder', 'File', 'Image']: |
|---|
| 96 | rdfraw = self._httpget(rdfurl) |
|---|
| 97 | self._writeFile(rdfpath, rdfraw) |
|---|
| 98 | |
|---|
| 99 | def runFilters(self, brain, soup, portal_url, dpath): |
|---|
| 100 | """ Filter content """ |
|---|
| 101 | rel_path = './' |
|---|
| 102 | #overwrite file |
|---|
| 103 | soup.base.extract() |
|---|
| 104 | self._rewriteHREF(brain, soup, dpath) |
|---|
| 105 | |
|---|
| 106 | def _deploySiteStructure(self, context, ssprops, domain, dpath): |
|---|
| 107 | """ Get the base framework and resources needed to build the chrome locally """ |
|---|
| 108 | base_files = ssprops.base_files |
|---|
| 109 | css_images = ssprops.css_images |
|---|
| 110 | extra_views = ssprops.extra_views |
|---|
| 111 | |
|---|
| 112 | portal_url = context.portal_url() |
|---|
| 113 | |
|---|
| 114 | for x in base_files: |
|---|
| 115 | #Download files/images that are used sitewide in the chrome |
|---|
| 116 | url = '%s/%s' % (portal_url, x) |
|---|
| 117 | path = self._getObjPath(url, portal_url, dpath) |
|---|
| 118 | raw = self._httpget(url) |
|---|
| 119 | self._writeFile(path, raw) |
|---|
| 120 | |
|---|
| 121 | for x in extra_views: |
|---|
| 122 | #Download zope3 views and/or additional views that are not included in the site catalog search |
|---|
| 123 | url = '%s/%s' % (portal_url, x) |
|---|
| 124 | path = self._getObjPath(url, portal_url, dpath) |
|---|
| 125 | path += '.html' |
|---|
| 126 | raw = self._httpget(url) |
|---|
| 127 | self._writeFile(path, raw) |
|---|
| 128 | |
|---|
| 129 | #get soup of front page |
|---|
| 130 | fp = self._httpget(portal_url) |
|---|
| 131 | soup = BeautifulSoup(fp) |
|---|
| 132 | types = ['css', 'js'] |
|---|
| 133 | |
|---|
| 134 | for x in types: |
|---|
| 135 | if x == 'css': |
|---|
| 136 | for tag in soup.head.findAll('style'): |
|---|
| 137 | #analyze content and extract url to CSS |
|---|
| 138 | contents = tag.contents[0] |
|---|
| 139 | url = re.match(r"""[^(]*\((.*)\)[^)]*$""", contents, re.X) |
|---|
| 140 | url = url.group(1) |
|---|
| 141 | path = self._deployCompressedResources(url, portal_url, dpath) |
|---|
| 142 | css_path = os_split(path)[0] |
|---|
| 143 | for img in css_images: |
|---|
| 144 | #download css_images |
|---|
| 145 | url = '%s/%s' % (portal_url, img) |
|---|
| 146 | path = '%s/%s' % (css_path, img) |
|---|
| 147 | try: |
|---|
| 148 | raw = self._httpget(url) |
|---|
| 149 | except HTTPError: |
|---|
| 150 | continue |
|---|
| 151 | self._writeFile(path, raw) |
|---|
| 152 | |
|---|
| 153 | elif x == 'js': |
|---|
| 154 | for tag in soup.head.findAll('script'): |
|---|
| 155 | if tag.has_key('src'): |
|---|
| 156 | url = tag['src'] |
|---|
| 157 | self._deployCompressedResources(url, portal_url, dpath) |
|---|
| 158 | |
|---|
| 159 | |
|---|
| 160 | def _deploySiteActions(self, context, dpath): |
|---|
| 161 | """ deploys portal_actions site_actions content """ |
|---|
| 162 | excluded = ['plone_setup', 'deploy'] |
|---|
| 163 | site_actions = context.portal_actions.site_actions.listActions() |
|---|
| 164 | for x in site_actions: |
|---|
| 165 | if x.id not in excluded and x.visible == True: |
|---|
| 166 | portal_url = context.portal_url() |
|---|
| 167 | action_url = x.url_expr.split('/')[-1] |
|---|
| 168 | url = '%s/%s' % (portal_url, action_url) |
|---|
| 169 | path = self._getObjPath(url, portal_url, dpath) |
|---|
| 170 | path += '.html' |
|---|
| 171 | raw = self._httpget(url) |
|---|
| 172 | self._writeFile(path, raw) |
|---|
| 173 | |
|---|
| 174 | def _deployDocumentActions(self, brain, portal_url, path, soup): |
|---|
| 175 | """ Deploys the document actions for the site """ |
|---|
| 176 | doc_actions = brain.portal_actions.document_actions.listActions() |
|---|
| 177 | for doc_action in doc_actions: |
|---|
| 178 | if doc_action.id in ['skinless', 's5']: |
|---|
| 179 | action_url = self._findDocActionUrl(soup, doc_action.id) |
|---|
| 180 | if action_url: |
|---|
| 181 | path = '%s-%s.html' %(path, doc_action.id) |
|---|
| 182 | if action_url.find(portal_url) == -1: |
|---|
| 183 | action_url = '%s/%s' %(portal_url, action_url) |
|---|
| 184 | path = '%s-%s.html' %(path, doc_action.id) |
|---|
| 185 | raw = self._httpget(action_url) |
|---|
| 186 | self._writeFile(path, raw) |
|---|
| 187 | elif doc_action.id in ['rss', 'rss_front']: |
|---|
| 188 | action_url = self._findDocActionUrl(soup, doc_action.id) |
|---|
| 189 | if action_url: |
|---|
| 190 | path = '%s-%s' %(path, doc_action.id) |
|---|
| 191 | if action_url.find(portal_url) == -1: |
|---|
| 192 | action_url = '%s/%s' %(portal_url, action_url) |
|---|
| 193 | path = '%s-%s' %(path, doc_action.id) |
|---|
| 194 | raw = self._httpget(action_url) |
|---|
| 195 | self._writeFile(path, raw) |
|---|
| 196 | |
|---|
| 197 | |
|---|
| 198 | def _findDocActionUrl(self, soup, id): |
|---|
| 199 | """ Parse document soup to find a document action """ |
|---|
| 200 | docdivs = soup.body.findAll('div') |
|---|
| 201 | action_url = None |
|---|
| 202 | for x in docdivs: |
|---|
| 203 | if x.has_key('class') and x['class'] == 'documentActions': |
|---|
| 204 | action = x.find('li', id='document-action-%s' %id) |
|---|
| 205 | if action: |
|---|
| 206 | anchor = action.find('a') |
|---|
| 207 | if anchor and anchor.has_key('href'): |
|---|
| 208 | action_url = anchor['href'] |
|---|
| 209 | break |
|---|
| 210 | return action_url |
|---|
| 211 | |
|---|
| 212 | def _getURLPath(self, brain, url): |
|---|
| 213 | """ Retrieve the URL path on the filesystem. """ |
|---|
| 214 | |
|---|
| 215 | rel_path = self._getRelativePath('/'.join(brain.getURL().split('/')[:-1]), url) |
|---|
| 216 | rel_path = rel_path.split('#')[0] |
|---|
| 217 | urlbrains = brain.portal_catalog.searchResults(query={'path':rel_path,},id=rel_path.split('/')[0]) |
|---|
| 218 | for x in urlbrains: |
|---|
| 219 | if brains.is_folderish: |
|---|
| 220 | pass |
|---|
| 221 | elif brain.Type == 'Page': |
|---|
| 222 | pass |
|---|
| 223 | else: |
|---|
| 224 | pass |
|---|
| 225 | |
|---|
| 226 | def _deployCompressedResources(self, url, portal_url, dpath): |
|---|
| 227 | """ extracts resources from soup and writes to static path location """ |
|---|
| 228 | path = self._getObjPath(url, portal_url, dpath) |
|---|
| 229 | path = path.replace('%20','_') |
|---|
| 230 | raw = self._httpget(url) |
|---|
| 231 | self._writeFile(path, raw) |
|---|
| 232 | return path |
|---|
| 233 | |
|---|
| 234 | def _getDeploymentPath(self, sp): |
|---|
| 235 | """ Get the default static path location. """ |
|---|
| 236 | dpath = os_split(BobobaseName)[0] |
|---|
| 237 | return os_join(dpath, sp) |
|---|
| 238 | |
|---|
| 239 | def _getObjPath(self, url, portal_url, dpath): |
|---|
| 240 | """ Get the object path based on the deployment path. """ |
|---|
| 241 | objpath = url.replace(portal_url + '/', '') |
|---|
| 242 | path = dpath |
|---|
| 243 | for x in objpath.split('/'): |
|---|
| 244 | path = os_join(path, x) |
|---|
| 245 | return path |
|---|
| 246 | |
|---|
| 247 | def _createDirectory(self, path): |
|---|
| 248 | """ Create a directory on the filesystem """ |
|---|
| 249 | if not os_lexists(path): |
|---|
| 250 | os_makedirs(path) |
|---|
| 251 | |
|---|
| 252 | def _httpget(self, url): |
|---|
| 253 | """ Get html for the url """ |
|---|
| 254 | f = urlopen(url) |
|---|
| 255 | data = f.read() |
|---|
| 256 | f.close() |
|---|
| 257 | return data |
|---|
| 258 | |
|---|
| 259 | def _writeFile(self, fn, data): |
|---|
| 260 | self._createDirectory(os_split(fn)[0]) |
|---|
| 261 | f = open(fn, 'w') |
|---|
| 262 | f.write(data) |
|---|
| 263 | f.close() |
|---|
| 264 | |
|---|
| 265 | def _getRelativePath(self, path1, path2): |
|---|
| 266 | common = os.path.commonprefix ([path1, path2]) |
|---|
| 267 | p1 = path1[len(common):] |
|---|
| 268 | p2 = path2[len(common):] |
|---|
| 269 | if os.path.isfile (path1): |
|---|
| 270 | p1 = os.path.dirname (p1) |
|---|
| 271 | dirs = string.split (p1, os.sep) |
|---|
| 272 | dotdot = map (lambda x: '..', dirs) |
|---|
| 273 | dotpath = string.join (dotdot, os.sep) |
|---|
| 274 | rel_path = os.path.join (dotpath, p2) |
|---|
| 275 | if rel_path.startswith(os.sep): |
|---|
| 276 | rel_path = '.%s' % rel_path |
|---|
| 277 | return rel_path |
|---|
| 278 | |
|---|
| 279 | def _rewriteHREF(self, brain, soup, dpath): |
|---|
| 280 | tags = soup.findAll('a') |
|---|
| 281 | for tag in tags: |
|---|
| 282 | if tag.has_key('href'): |
|---|
| 283 | self._getURLPath(brain, tag['href']) |
|---|
| 284 | return soup |
|---|
| 285 | |
|---|
| 286 | |
|---|