首页 > 代码库 > phantomjs html to PDF
phantomjs html to PDF
using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Configuration; using System.IO; /// <summary> ///PDFCommon 的摘要说明 /// </summary> public class PDFCommon { public PDFCommon() { } public static string CreatePDF(string url) { string fileNameWithOutExtention = Guid.NewGuid().ToString(); string path = System.Web.HttpContext.Current.Server.MapPath(@"~\bin\phantomjs\"); //string paths = System.Web.HttpContext.Current.Server.MapPath(@"~\chartPdfFile"); string savePath = Path.Combine(@"D:/", string.Format("{0}.pdf", Guid.NewGuid())); string filePath = Path.Combine(path, "phantomjs.exe"); string jsPath = Path.Combine(path, "generate_pdf.js"); string argument = string.Format(" --ignore-ssl-errors=yes {0} \"{1}\" {2}", jsPath, url, savePath); using (System.Diagnostics.Process exep = new System.Diagnostics.Process()) { System.Diagnostics.ProcessStartInfo startInfo = new System.Diagnostics.ProcessStartInfo(); startInfo.FileName = filePath; startInfo.Arguments = argument; startInfo.CreateNoWindow = true; startInfo.UseShellExecute = false; startInfo.RedirectStandardInput = true; startInfo.RedirectStandardOutput = true; startInfo.RedirectStandardError = true; exep.StartInfo = startInfo; exep.Start(); exep.WaitForExit(); } return savePath; } }
// This file is NOT a browser-run javascript but PhantonJS script var system = require(‘system‘); var address = system.args[1]; var output = system.args[2]; var page = require(‘webpage‘).create(); page.paperSize = { format: ‘A4‘, orientation: ‘landscape‘, border: ‘1cm‘ }; page.open(address, function (status) { if (status !== ‘success‘) { console.log(‘Unable to load the address!‘); phantom.exit(); } else { window.setTimeout(function () { // Remove all low-opacity paths. see PhantomJS issue #364 page.evaluate(function () { var paths = document.getElementsByTagName("path"); for (var i = paths.length - 1; i >= 0; i--) { var path = paths[i]; var strokeOpacity = path.getAttribute(‘stroke-opacity‘); if (strokeOpacity != null && strokeOpacity < 0.2) path.parentNode.removeChild(path); } }); page.render(output); phantom.exit(); }, 5000); } });
开篇
最近使用 Phantomjs 生成PDF,其中遇到一些问题,导致PDF生成失败,如出现空白文件或一页数据量太大,都是由于没有设置好格式导致。特别是分页问题,感觉资料很少,除了在 StackOverflow 上看到些许资料外,中文社区基本看不到,附上修改后的 rasterize.js 来做讲解:
var page = require(‘webpage‘).create(),
system = require(‘system‘),
address, output, size;
if (system.args.length < 3 || system.args.length > 5) {
console.log(‘Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]‘);
console.log(‘ paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"‘);
phantom.exit(1);
} else {
address = system.args[1];
output = system.args[2];
/*size of browser*/
page.viewportSize = { width: 600, height: 600 };
/*
if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
size = system.args[3].split(‘*‘);
page.paperSize = size.length === 2 ? { width: size[0], height: size[1], margin: ‘0px‘ }
: { format: ‘A4‘, orientation: ‘portrait‘, margin: ‘1cm‘ };
}
*/
/* ie and chrome view diffrent format of pdf */
page.settings.userAgent = ‘Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36‘;
page.paperSize = { format: ‘A4‘, orientation: ‘portrait‘, margin: ‘0.8cm‘ };
page.zoomFactor = 1;
page.settings.loadImages = true;
//some question about the page language
page.open(address, function (status) {
if (status !== ‘success‘) {
console.log(‘Unable to load the address!‘);
} else {
//page.render(output);
//phantom.exit();
window.setTimeout(function () {
page.render(output);
phantom.exit();
}, 200); //setting the time is enough to loading the page. document.ready
}
});
}
1 var page = require(‘webpage‘).create(), 2 system = require(‘system‘), 3 address, output, size; 4 5 if (system.args.length < 3 || system.args.length > 5) { 6 console.log(‘Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom]‘); 7 console.log(‘ paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"‘); 8 phantom.exit(1); 9 } else { 10 address = system.args[1]; 11 output = system.args[2]; 12 /*size of browser*/ 13 page.viewportSize = { width: 600, height: 600 }; 14 /* 15 if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") { 16 size = system.args[3].split(‘*‘); 17 page.paperSize = size.length === 2 ? { width: size[0], height: size[1], margin: ‘0px‘ } 18 : { format: ‘A4‘, orientation: ‘portrait‘, margin: ‘1cm‘ }; 19 } 20 */ 21 /* ie and chrome view diffrent format of pdf */ 22 page.settings.userAgent = ‘Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36‘; 23 page.paperSize = { format: ‘A4‘, orientation: ‘portrait‘, margin: ‘0.8cm‘ }; 24 page.zoomFactor = 1; 25 page.settings.loadImages = true; 26 //some question about the page language 27 page.open(address, function (status) { 28 if (status !== ‘success‘) { 29 console.log(‘Unable to load the address!‘); 30 } else { 31 //page.render(output); 32 //phantom.exit(); 33 34 window.setTimeout(function () { 35 page.render(output); 36 phantom.exit(); 37 }, 200); //setting the time is enough to loading the page. document.ready 38 39 } 40 }); 41 }
PDF 格式设置
关于其中 page 的设置属性,这里可以了解,更深入可以了解 WebPage Module。
我们需要的设置,基本上就是页面格式、缩放、加载图片等,但有些例外,下面一一讲解。
1 page.paperSize = { format: ‘A4‘, orientation: ‘portrait‘, margin: ‘0.8cm‘ };
注释掉了官方例子的设置代码,因为传入的参数只有3个,到 .pdf 为止,如果写成通用模式,当然可以作为外部参数传入。
format :A4 纸,可以设置 "5in*7.5in", "10cm*20cm", "Letter" 等
orientation :纸方向是竖着的,或者 landscape
margin :与纸四边间距,可自定义,也可详细设置 margin : { left: ‘0.8cm‘, top : ‘0.8cm‘, right : ‘0.8cm‘, bottom : ‘0.8cm‘ }
1 page.zoomFactor = 1; 2 page.settings.loadImages = true;
zoomFactor :页面缩放比例
loadImages :页面加载图片
1 page.settings.userAgent = ‘Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Safari/537.36‘;
这个设置比较不常见,一般的示例中都没有提及,因为发现用 chrome 和 IE 打开生成的 pdf 时格式有点不一样(表现在分页方面),由于偏向 Chrome 浏览格式,故设置此值,解决这个不一致问题。
page.open 里面的 setTimeout 方法作用:等待页面执行完 js ,再生成 pdf。当然对于 js 要执行多久(要等多久),这个就不知道怎么预算了。其实我有试过 ajax 方式加载内容,但因此问题而作罢了。
更多的信息,关于页眉和页脚及页码标注问题,可以参考这里。
PDF 分页
分页来说,更好控制,不需要代码(js)设置,页面使用样式即可:
1
|
style = “ page-break-after : always ;” |
控制每页内容的大小,使用 <div style="page-break-after: always;">content</div> 就行。
更多选择 style=“page-break-before: always;” , style="page-break-inside: avoid;" 这个可以避免内容散到两页中
phantomjs html to PDF