I also had this problem. The HTML that the Document HTML Export pulls out is really ugly, so this was my solution:
/** * Takes in a Google Doc ID, gets that doc in HTML format, cleans up the markup, and returns the resulting HTML string. * * @param {string} the id of the google doc * @param {boolean} [useCaching] enable or disable caching. default true. * @return {string} the doc body in html format */ function getContent(id, useCaching) { if (!id) { throw "Please call this API with a valid Google Doc ID"; } if (useCaching == null) { useCaching = true; } if (typeof useCaching != "boolean") { throw "If you're going to specify useCaching, it must be boolean."; } var cache = CacheService.getScriptCache(); var cached = cache.get(id); // see if we have a cached version of our parsed html if (cached && useCaching) { var html = cached; Logger.log("Pulling doc html from cache..."); } else { Logger.log("Grabbing and parsing fresh html from the doc..."); try { var doc = DriveApp.getFileById(id); } catch (err) { throw "Please call this API with a valid Google Doc ID. " + err.message; } var docName = doc.getName(); var forDriveScope = DriveApp.getStorageUsed(); // needed to get Drive Scope requested in ScriptApp.getOAuthToken(); var url = "https://docs.google.com/feeds/download/documents/export/Export?id=" + id + "&exportFormat=html"; var param = { method: "get", headers: {"Authorization": "Bearer " + ScriptApp.getOAuthToken()}, muteHttpExceptions:true, }; var html = UrlFetchApp.fetch(url, param).getContentText(); // nuke the whole head section, including the stylesheet and meta tag html = html.replace(/<head>.*<\/head>/, ''); // remove almost all html attributes html = html.replace(/ (id|class|style|start|colspan|rowspan)="[^"]*"/g, ''); // remove all of the spans, as well as the outer html and body html = html.replace(/<(span|\/span|body|\/body|html|\/html)>/g, ''); // clearly the superior way of denoting line breaks html = html.replace(/<br>/g, '<br />'); cache.put(id, html, 900) // cache doc contents for 15 minutes, in case we get a lot of requests } Logger.log(html); return html; }
https://gist.github.com/xd1936/cc229d14a89e6327336177bb07ac2980
xd1936
source share