C#/VB.NET如何创建、读取PDF文档
这篇文章将为大家详细讲解有关C#/VB.NET如何创建、读取PDF文档,小编觉得挺实用的,因此分享给大家做个参考,希望大家阅读完这篇文章后可以有所收获。
创新互联是一家专注于成都做网站、成都网站制作与策划设计,马鞍山网站建设哪家好?创新互联做网站,专注于网站建设十多年,网设计领域的专业建站公司;建站业务涵盖:马鞍山等地区。马鞍山做网站价格咨询:18982081108
一、创建PDF文档
C#
using Spire.Pdf; using Spire.Pdf.Graphics; using System.Drawing; namespace CreatePDF_PDF { class Program { static void Main(string[] args) { //初始化一个PdfDocument类实例 PdfDocument document = new PdfDocument(); //声明 PdfUnitConvertor和PdfMargins类对象 PdfUnitConvertor unitCvtr = new PdfUnitConvertor(); PdfMargins margins = new PdfMargins(); //设置页边距 margins.Top = unitCvtr.ConvertUnits(2.54f, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point); margins.Bottom = margins.Top; margins.Left = unitCvtr.ConvertUnits(3.17f, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point); margins.Right = margins.Left; //新添加一个A4大小的页面 PdfPageBase page = document.Pages.Add(PdfPageSize.A4, margins); //自定义PdfTrueTypeFont、PdfPen实例,设置字体类型、字号和字体颜色 PdfTrueTypeFont font = new PdfTrueTypeFont(new Font("楷体", 11f),true); PdfPen pen = new PdfPen(Color.Black); //调用DrawString()方法在指定位置写入文本 string text = ("《蝶恋花 送春》 \n 楼外垂杨千万缕,欲系青春,少住春还去。犹自风前飘柳絮,随春且看归何处?\n 绿满山川闻杜宇,便作无情,莫也愁人苦。把酒送春春不语,黄昏却下潇潇雨。"); page.Canvas.DrawString(text, font, pen, 15, 13); //加载图片,并调用DrawImage()方法在指定位置绘入图片 PdfImage image = PdfImage.FromFile("image1.jpg"); float width = image.Width * 0.55f; float height = image.Height * 0.55f; float y = (page.Canvas.ClientSize.Width - width) / 3; page.Canvas.DrawImage(image, y, 60, width, height); //保存并打开文档 document.SaveToFile("PDF创建.pdf"); System.Diagnostics.Process.Start("PDF创建.pdf"); } } }
创建结果:
VB.NET
Imports Spire.Pdf Imports Spire.Pdf.Graphics Imports System.Drawing Namespace CreatePDF_PDF Class Program Private Shared Sub Main(ByVal args As String()) Dim document As PdfDocument = New PdfDocument() Dim unitCvtr As PdfUnitConvertor = New PdfUnitConvertor() Dim margins As PdfMargins = New PdfMargins() margins.Top = unitCvtr.ConvertUnits(2.54F, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point) margins.Bottom = margins.Top margins.Left = unitCvtr.ConvertUnits(3.17F, PdfGraphicsUnit.Centimeter, PdfGraphicsUnit.Point) margins.Right = margins.Left Dim page As PdfPageBase = document.Pages.Add(PdfPageSize.A4, margins) Dim font As PdfTrueTypeFont = New PdfTrueTypeFont(New Font("楷体", 11F), True) Dim pen As PdfPen = New PdfPen(Color.Black) Dim text As String =("《蝶恋花 送春》 " & vbLf & " 楼外垂杨千万缕,欲系青春,少住春还去。犹自风前飘柳絮,随春且看归何处?" & vbLf & " 绿满山川闻杜宇,便作无情,莫也愁人苦。把酒送春春不语,黄昏却下潇潇雨。") page.Canvas.DrawString(text, font, pen, 15, 13) Dim image As PdfImage = PdfImage.FromFile("image1.jpg") Dim width As Single = image.Width * 0.55F Dim height As Single = image.Height * 0.55F Dim y As Single =(page.Canvas.ClientSize.Width - width) / 3 page.Canvas.DrawImage(image, y, 60, width, height) document.SaveToFile("PDF创建.pdf") System.Diagnostics.Process.Start("PDF创建.pdf") End Sub End Class End Namespace
这里创建PDF文档,Spire.PDF支持直接生成PDF文档并同时实现添加文本、图片、图形、水印、表格、页眉页脚、页码等操作,这里示例代码以添加文本、图片为例,有需要可以参见以下博客:
C# 添加PDF水印
C# 创建PDF表格
二、 读取PDF文档
测试文档:
1. 读取PDF文本
1.1读取全部文本
C#
using Spire.Pdf; using System; using System.IO; using System.Text; namespace ExtractText_PDF { class Program { static void Main(string[] args) { //实例化PdfDocument类对象,并加载PDF文档 PdfDocument doc = new PdfDocument(); doc.LoadFromFile("sample.pdf"); //实例化一个StringBuilder 对象 StringBuilder content = new StringBuilder(); //遍历文档所有PDF页面,提取文本 foreach (PdfPageBase page in doc.Pages) { content.Append(page.ExtractText()); } //将提取到的文本写为.txt格式并保存到本地路径 String fileName = "获取文本.txt"; File.WriteAllText(fileName, content.ToString()); System.Diagnostics.Process.Start("获取文本.txt"); } } }
读取结果:
VB.NET
Imports Spire.Pdf Imports System Imports System.IO Imports System.Text Namespace ExtractText_PDF Class Program Private Shared Sub Main(ByVal args As String()) Dim doc As PdfDocument = New PdfDocument() doc.LoadFromFile("sample.pdf") Dim content As StringBuilder = New StringBuilder() For Each page As PdfPageBase In doc.Pages content.Append(page.ExtractText()) Next Dim fileName As String = "获取文本.txt" File.WriteAllText(fileName, content.ToString()) System.Diagnostics.Process.Start("获取文本.txt") End Sub End Class End Namespace
1.2 读取指定区域文本
C#
using Spire.Pdf; using System.IO; using System.Text; using System.Drawing; namespace ExtractText1_PDF { class Program { static void Main(string[] args) { //创建PdfDocument类实例,并加载PDF文档 PdfDocument pdf = new PdfDocument(); pdf.LoadFromFile("sample.pdf"); //获取PDF第一页 PdfPageBase page = pdf.Pages[0]; //从第一页的指定矩形区域内提取文本 string text = page.ExtractText(new RectangleF(50, 50, 500, 170)); //保存文本到.txt文件,并打开文档 StringBuilder sb = new StringBuilder(); sb.AppendLine(text); File.WriteAllText("Extract.txt", sb.ToString()); System.Diagnostics.Process.Start("Extract.txt"); } } }
读取结果:
(此时读取的就只是指定区域内的文本)
VB.NET
Imports Spire.Pdf Imports System.IO Imports System.Text Imports System.Drawing Namespace ExtractText1_PDF Class Program Private Shared Sub Main(ByVal args As String()) Dim pdf As PdfDocument = New PdfDocument() pdf.LoadFromFile("sample.pdf") Dim page As PdfPageBase = pdf.Pages(0) Dim text As String = page.ExtractText(New RectangleF(50, 50, 500, 170)) Dim sb As StringBuilder = New StringBuilder() sb.AppendLine(text) File.WriteAllText("Extract.txt", sb.ToString()) System.Diagnostics.Process.Start("Extract.txt") End Sub End Class End Namespace
2. 读取PDF图片
C#
using Spire.Pdf; using System.Collections.Generic; using System.Drawing; namespace ExtractImages_PDF { class Program { static void Main(string[] args) { //创建一个PdfDocument类对象,加载PDF测试文档 PdfDocument doc = new PdfDocument(); doc.LoadFromFile("sample.pdf"); //声明List类对象 ListListImage = new List (); //遍历PDF文档所有页面 for (int i = 0; i < doc.Pages.Count; i++) { //获取文档所有页,并提取页面中的所有图片 PdfPageBase page = doc.Pages[i]; Image[] images = page.ExtractImages(); if (images != null && images.Length > 0) { ListImage.AddRange(images); } } //将获取到的图片保存到本地路径 if (ListImage.Count > 0) { for (int i = 0; i < ListImage.Count; i++) { Image image = ListImage[i]; image.Save("image" + (i + 1).ToString() + ".png", System.Drawing.Imaging.ImageFormat.Png); } //打开获取到的图片 System.Diagnostics.Process.Start("image1.png"); } } } }
读取结果:
VB.NET
Imports Spire.Pdf Imports System.Collections.Generic Imports System.Drawing Namespace ExtractImages_PDF Class Program Private Shared Sub Main(ByVal args As String()) Dim doc As PdfDocument = New PdfDocument() doc.LoadFromFile("sample.pdf") Dim ListImage As List(Of Image) = New List(Of Image)() For i As Integer = 0 To doc.Pages.Count - 1 Dim page As PdfPageBase = doc.Pages(i) Dim images As Image() = page.ExtractImages() If images IsNot Nothing AndAlso images.Length > 0 Then ListImage.AddRange(images) End If Next If ListImage.Count > 0 Then For i As Integer = 0 To ListImage.Count - 1 Dim image As Image = ListImage(i) image.Save("image" & (i + 1).ToString() & ".png", System.Drawing.Imaging.ImageFormat.Png) Next System.Diagnostics.Process.Start("image1.png") End If End Sub End Class End Namespace
关于“C#/VB.NET如何创建、读取PDF文档”这篇文章就分享到这里了,希望以上内容可以对大家有一定的帮助,使各位可以学到更多知识,如果觉得文章不错,请把它分享出去让更多的人看到。
文章名称:C#/VB.NET如何创建、读取PDF文档
文章出自:http://scpingwu.com/article/ghejgd.html