因个人需求,需要将html格式转换成PDF并加上水印图片。于是乎第一次接触这种需求的小菜鸟博主我,在某度搜索引擎上不断的查阅关键字资料、踩坑,终于有了一个相应的解决方案。以下是解决步骤,记录下来方便以后的回顾,以及各位大神们的品鉴。
1、在 NuGet 搜索 itextsharp 关键字 下载以下截图圈中的两个包,一般下载完后项目会自引用。
2、在项目文件中引入以下命名空间(建议下面提及的代码封装成类库,方便项目间调用,个人取舍)
3、Html字符串转pdf文件流,加水印图片以及未加水印重载 精简帮助类(由博主踩坑整理,仅完成个人业务需求)
/// <summary>
/// Html字符串转PDF输出帮助类
/// </summary>
public class HtmlToPdfHelper
{
#region HtmlToPDF /// <summary>
/// 判断是否有乱码
/// </summary>
/// <param name="txt"></param>
/// <returns></returns>
private static bool IsMessyCode(string txt)
{
var bytes = Encoding.UTF8.GetBytes(txt);
for (var i = ; i < bytes.Length; i++)
{
if (i < bytes.Length - )
if (bytes[i] == && bytes[i + ] == && bytes[i + ] == )
{
return true;
}
}
return false;
} /// <summary>
/// 将Html字符串 输出到PDF档里
/// </summary>
/// <param name="htmlText"></param>
/// <returns></returns>
public static byte[] ConvertHtmlTextToPdf(string htmlText)
{
return ConvertHtmlTextToPdf(htmlText, "", , , , );
} /// <summary>
/// 将Html字符串 输出到PDF档里,并添加水印
/// </summary>
/// <param name="htmlText">网页代码</param>
/// <param name="picPath">水印路径</param>
/// <param name="left">距离左边距离</param>
/// <param name="top">距顶部距离</param>
/// <param name="width">水印宽度</param>
/// <param name="height">水印高度</param>
/// <returns></returns>
public static byte[] ConvertHtmlTextToPdf(string htmlText, string picPath, int left, int top, int width, int height)
{
if (string.IsNullOrEmpty(htmlText))
{
return null;
}
//避免当htmlText无任何html tag标签的纯文字时,转PDF时会挂掉,所以一律加上<p>标签
htmlText = "<p>" + htmlText + "</p>";
MemoryStream outputStream = new MemoryStream();//要把PDF写到哪个串流
byte[] data = Encoding.UTF8.GetBytes(htmlText);//字串转成byte[]
MemoryStream msInput = new MemoryStream(data);
Document doc = new Document();//要写PDF的文件,建构子没填的话预设直式A4
PdfWriter writer = PdfWriter.GetInstance(doc, outputStream);
//指定文件预设开档时的缩放为100%
PdfDestination pdfDest = new PdfDestination(PdfDestination.XYZ, , doc.PageSize.Height, 1f);
//开启Document文件
doc.Open(); //写入水印图片
if (!string.IsNullOrEmpty(picPath))
{
Image img = Image.GetInstance(picPath);
//设置图片的位置
img.SetAbsolutePosition(width + left, (doc.PageSize.Height - height) - top);
//设置图片的大小
img.ScaleAbsolute(width, height);
doc.Add(img);
}
try
{
//使用XMLWorkerHelper把Html parse到PDF档里
XMLWorkerHelper.GetInstance().ParseXHtml(writer, doc, msInput, null, Encoding.UTF8);
//将pdfDest设定的资料写到PDF档
PdfAction action = PdfAction.GotoLocalPage(, pdfDest, writer);
writer.SetOpenAction(action);
}
catch (Exception)
{
return null;
}
doc.Close();
msInput.Close();
outputStream.Close();
//回传PDF档案
return outputStream.ToArray();
} #endregion }
4、获取网页字符串方法
/// <summary>
/// 获取网站内容,包含了 HTML+CSS+JS
/// </summary>
/// <returns>String返回网页信息</returns>
public static string GetWebContent(string inpath)
{
try
{
WebClient myWebClient = new WebClient();
//获取或设置用于向Internet资源的请求进行身份验证的网络凭据
myWebClient.Credentials = CredentialCache.DefaultCredentials;
//从指定网站下载数据
Byte[] pageData = myWebClient.DownloadData(inpath);
//如果获取网站页面采用的是GB2312,则使用这句
string pageHtml = Encoding.UTF8.GetString(pageData);
bool isBool = IsMessyCode(pageHtml);//判断使用哪种编码 读取网页信息
if (!isBool)
{
string pageHtml1 = Encoding.UTF8.GetString(pageData);
pageHtml = pageHtml1;
}
else
{
string pageHtml2 = Encoding.Default.GetString(pageData);
pageHtml = pageHtml2;
}
return pageHtml;
}
catch (WebException webEx)
{
return webEx.Message;
}
}
5、MVC设计模式下获取控制器视图Html方法,很XX的一个问题就是只能获取调用此方法的控制器下所有视图,不能跨控制器获取视图,有待优化
/// <summary>
/// 获取MVC视图Html
/// </summary>
/// <param name="context">控制器上下文</param>
/// <param name="viewName">视图名称</param>
/// <param name="param"></param>
/// <returns></returns>
public static string GetViewHtml(ControllerContext context, string viewName)
{
if (string.IsNullOrEmpty(viewName))
viewName = context.RouteData.GetRequiredString("action");
using (var sw = new StringWriter())
{
ViewEngineResult viewResult = ViewEngines.Engines.FindPartialView(context, viewName);
var viewContext = new ViewContext(context, viewResult.View, context.Controller.ViewData, context.Controller.TempData, sw);
try
{
viewResult.View.Render(viewContext, sw);
}
catch (Exception ex)
{
throw;
} return sw.GetStringBuilder().ToString();
}
}
6、将pdf流输出至客户浏览器下载方法
/// <summary>
/// 将pdf文件流输出至浏览器下载
/// </summary>
/// <param name="pdfFile">PDF文件流</param>
public static void PdfDownload(byte[] pdfFile)
{
byte[] buffer = pdfFile;
Stream iStream = new MemoryStream(buffer);
try
{
int length;
long dataToRead;
string filename = DateTime.Now.ToString("yyyyMMddHHmmss") + ".pdf";//保存的文件名称
dataToRead = iStream.Length;
HttpContext.Current.Response.Clear();
HttpContext.Current.Response.ClearHeaders();
HttpContext.Current.Response.ClearContent();
HttpContext.Current.Response.ContentType = "application/pdf"; //文件类型
HttpContext.Current.Response.AddHeader("Content-Length", dataToRead.ToString());//添加文件长度,进而显示进度
HttpContext.Current.Response.AddHeader("Content-Disposition", "attachment; filename=" + HttpUtility.UrlEncode(filename, Encoding.UTF8));
while (dataToRead > )
{
if (HttpContext.Current.Response.IsClientConnected)
{
length = buffer.Length;
HttpContext.Current.Response.OutputStream.Write(buffer, , length);
HttpContext.Current.Response.Flush();
buffer = new Byte[length];
dataToRead = dataToRead - length;
}
else
{
dataToRead = -;
}
}
}
catch (Exception ex)
{
HttpContext.Current.Response.Write("文件下载时出现错误!");
}
finally
{
if (iStream != null)
{
iStream.Close();
}
//结束响应,否则将导致网页内容被输出到文件,进而文件无法打开
HttpContext.Current.Response.Flush();
HttpContext.Current.Response.End(); }
}
7、MVC控制器下调用Demo(步骤4、6 方法封装至帮助类)
public class HomeController : Controller
{
//
// GET: /Home/ public ActionResult Index()
{
//从网址下载Html字符串(方法一)
string inpath = System.Web.HttpContext.Current.Server.MapPath("~/PDFTemplate/test.html");
string htmlText = HtmlToPdfHelper.GetWebContent(inpath);//此处调用步骤4方法 //获取MVC视图Html字符串(方法二)
//string htmlText = GetViewHtml(ControllerContext, "Test");//此处调用步骤5方法 //水印图片路径
string picPath = Server.MapPath("~/PDFTemplate/TemplateImg/authentication-iocn.png");
//html转pdf并加上水印
byte[] pdfFile = HtmlToPdfHelper.ConvertHtmlTextToPdf(htmlText, picPath, , , , );
//输出至客户端
HtmlToPdfHelper.PdfDownload(pdfFile);//此处调用步骤6方法 return View();
} public ActionResult Test()
{
return View();
} /// <summary>
/// 获取MVC视图Html
/// </summary>
/// <param name="context"></param>
/// <param name="viewName">视图名称</param>
/// <returns></returns>
public static string GetViewHtml(ControllerContext context, string viewName)
{
if (string.IsNullOrEmpty(viewName))
viewName = context.RouteData.GetRequiredString("action");
using (var sw = new StringWriter())
{
ViewEngineResult viewResult = ViewEngines.Engines.FindPartialView(context, viewName);
var viewContext = new ViewContext(context, viewResult.View, context.Controller.ViewData,
context.Controller.TempData, sw);
try
{
viewResult.View.Render(viewContext, sw);
}
catch (Exception ex)
{
throw;
} return sw.GetStringBuilder().ToString();
}
}
}
总结:我理解的解决思路是将html读取转换成字符串,之后再通过 itextsharp 转换成 pdf 比特流 传输至客户端或直接保存至服务器生成链接供用户下载。(新手上路,不妥之处,欢迎各位大神指教)
以上代码仅满足个人业务逻辑需求,如侵删,谢谢浏览。