Get Google Doc as HTML

I had a wild idea that I can create a website blog for an inexperienced friend-friend using Google Drive Docs to support it. I was able to create a contentService that compiles a list of documents. However, I see no way to convert the document to HTML. I know that Google can display documents on a web page, so I wondered if it was possible to get a render version for use in my content service.

Is it possible?

+7
source share
7 answers

There is no direct method in GAS to get the HTML version of the document, and this is a rather old improvement request , but the workaround described initially , Henrique Abreu works very well, I use it all the time ...

The only unpleasant thing in the authorization process that needs to be called from the script editor, which makes it difficult to use in a common application (with "script incapable" users), but this happens only once;).

There is also a Library created by Romain Vialard that makes things (a bit) simpler ... and adds some other interesting features.

+3
source

You can try this code:

function getGoogleDocumentAsHTML(){ var id = DocumentApp.getActiveDocument().getId() ; var forDriveScope = DriveApp.getStorageUsed(); //needed to get Drive Scope requested var url = "https://docs.google.com/feeds/download/documents/export/Export?id="+id+"&exportFormat=html"; var param = { method : "get", headers : {"Authorization": "Bearer " + ScriptApp.getOAuthToken()}, muteHttpExceptions:true, }; var html = UrlFetchApp.fetch(url,param).getContentText(); Logger.log(html); } 
+11
source

Here is a little bit broken for the new goole AOuth version following the idea posted by Enrique:

 function exportAsHTML(){ var forDriveScope = DriveApp.getStorageUsed(); //needed to get Drive Scope requested var docID = DocumentApp.getActiveDocument().getId(); var url = "https://docs.google.com/feeds/download/documents/export/Export?id="+docID+"&exportFormat=html"; var param = { method : "get", headers : {"Authorization": "Bearer " + ScriptApp.getOAuthToken()}, muteHttpExceptions:true, }; var html = UrlFetchApp.fetch(url,param).getContentText(); return html; } 

and then use regular mailApp:

 function mailer(){ var docbody = exportAsHTML(); MailApp.sendEmail({ to: " email@mail.com ", subject: "document emailer", htmlBody: docbody }); } 

Hope the new workaround helps

Jd

+1
source

Node.js Solution

Here you can get google doc as html using google drive node.js. client library

 // import googleapis npm package var google = require('googleapis'); // variables var fileId = '<google drive doc file id>', accessToken = '<oauth access token>'; // oauth setup var OAuth2 = google.auth.OAuth2, OAuth2Client = new OAuth2(); // set oauth credentials OAuth2Client.setCredentials({access_token: accessToken}); // google drive setup var drive = google.drive({version: 'v3', auth: OAuth2Client}); // download file as text/html var buffers = []; drive.files.export( { fileId: fileId, mimeType: 'text/html' } ) .on('error', function(err) { // handle error }) .on('data', function(data) { buffers.push(data); // data is a buffer }) .on('end', function() { var buffer = Buffer.concat(buffers), googleDocAsHtml = buffer.toString(); console.log(googleDocAsHtml); }); 

Check out Google Docs for more languages ​​and options.

Please note that the Google API node.js Client is in alpha (January 2017).

+1
source

You can use the solution here.

 /** * Converts a file to HTML. The Advanced Drive service must be enabled to use * this function. */ function convertToHtml(fileId) { var file = Drive.Files.get(fileId); var htmlExportLink = file.exportLinks['text/html']; if (!htmlExportLink) { throw 'File cannot be converted to HTML.'; } var oAuthToken = ScriptApp.getOAuthToken(); var response = UrlFetchApp.fetch(htmlExportLink, { headers:{ 'Authorization': 'Bearer ' + oAuthToken }, muteHttpExceptions: true }); if (!response.getResponseCode() == 200) { throw 'Error converting to HTML: ' + response.getContentText(); } return response.getContentText(); } 

Submitting as fileId, google doc id, and enabling advanced disk services follow the instructions here .

0
source

I also had this problem. The HTML that the Document HTML Export pulls out is really ugly, so this was my solution:

 /** * Takes in a Google Doc ID, gets that doc in HTML format, cleans up the markup, and returns the resulting HTML string. * * @param {string} the id of the google doc * @param {boolean} [useCaching] enable or disable caching. default true. * @return {string} the doc body in html format */ function getContent(id, useCaching) { if (!id) { throw "Please call this API with a valid Google Doc ID"; } if (useCaching == null) { useCaching = true; } if (typeof useCaching != "boolean") { throw "If you're going to specify useCaching, it must be boolean."; } var cache = CacheService.getScriptCache(); var cached = cache.get(id); // see if we have a cached version of our parsed html if (cached && useCaching) { var html = cached; Logger.log("Pulling doc html from cache..."); } else { Logger.log("Grabbing and parsing fresh html from the doc..."); try { var doc = DriveApp.getFileById(id); } catch (err) { throw "Please call this API with a valid Google Doc ID. " + err.message; } var docName = doc.getName(); var forDriveScope = DriveApp.getStorageUsed(); // needed to get Drive Scope requested in ScriptApp.getOAuthToken(); var url = "https://docs.google.com/feeds/download/documents/export/Export?id=" + id + "&exportFormat=html"; var param = { method: "get", headers: {"Authorization": "Bearer " + ScriptApp.getOAuthToken()}, muteHttpExceptions:true, }; var html = UrlFetchApp.fetch(url, param).getContentText(); // nuke the whole head section, including the stylesheet and meta tag html = html.replace(/<head>.*<\/head>/, ''); // remove almost all html attributes html = html.replace(/ (id|class|style|start|colspan|rowspan)="[^"]*"/g, ''); // remove all of the spans, as well as the outer html and body html = html.replace(/<(span|\/span|body|\/body|html|\/html)>/g, ''); // clearly the superior way of denoting line breaks html = html.replace(/<br>/g, '<br />'); cache.put(id, html, 900) // cache doc contents for 15 minutes, in case we get a lot of requests } Logger.log(html); return html; } 

https://gist.github.com/xd1936/cc229d14a89e6327336177bb07ac2980

0
source

Maybe this will work for you ...

 function doGet() { var blob = DriveApp.getFileById('myFileId').getAsHTML(); return HtmlService.createHtmlOutput(blob); } 
-2
source

All Articles