Yesterday I released a small project I was working on to merge multiple image files into a single, multi-page PDF (one image per page). It wasn't too difficult, yet I had to deal with the following issues:
- Some nasty GDI+ issues when dealing with multi-page TIFF files (read this post for further details on that).
- Some nasty GDI+ issues when trying to resize/resample each image to make it fit to the container PDF page size.
Yeah, you can easily guess I really don't like GDI+. Luckily enough, I found a great open-source alternative to deal with these issues: I'm talking about the iTextSharp library, freely available through NuGet or SourceForge, which can flawlessly do these kind of tasks.
Using it proved to be really simple; here's what I did to fullfill my specific scenario (WARNING! big amount of source code incoming):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
using System; using iTextSharp.text; using iTextSharp.text.pdf; using System.IO; using System.Drawing.Imaging; namespace Ryadel.Components.Media { public static class PDFHelper { /// <summary> /// Merge one or more image or document files into a single PDF /// Supported Formats: bmp, gif, jpg, jpeg, png, tif, tiff, pdf (including multi-page tiff and pdf files) /// </summary> public static byte[] MergeIntoPDF(params ByteArrayInfo[] infoArray) { // If we do have a single PDF file, return it without doing anything if (infoArray.Length == 1 && infoArray[0].FileExtension.Trim('.').ToLower() == "pdf") return infoArray[0].Data; // patch to fix the "PdfReader not opened with owner password" error. // ref.: https://stackoverflow.com/questions/17691013/pdfreader-not-opened-with-owner-password-error-in-itext PdfReader.unethicalreading = true; using (Document doc = new Document()) { doc.SetPageSize(PageSize.A4); using (var ms = new MemoryStream()) { // PdfWriter wri = PdfWriter.GetInstance(doc, ms); using (PdfCopy pdf = new PdfCopy(doc, ms)) { doc.Open(); foreach (ByteArrayInfo info in infoArray) { try { doc.NewPage(); Document imageDocument = null; PdfWriter imageDocumentWriter = null; switch (info.FileExtension.Trim('.').ToLower()) { case "bmp": case "gif": case "jpg": case "jpeg": case "png": using (imageDocument = new Document()) { using (var imageMS = new MemoryStream()) { using (imageDocumentWriter = PdfWriter.GetInstance(imageDocument, imageMS)) { imageDocument.Open(); if (imageDocument.NewPage()) { var image = iTextSharp.text.Image.GetInstance(info.Data); image.Alignment = Element.ALIGN_CENTER; image.ScaleToFit(doc.PageSize.Width - 10, doc.PageSize.Height - 10); if (!imageDocument.Add(image)) { throw new Exception("Unable to add image to page!"); } imageDocument.Close(); imageDocumentWriter.Close(); using (PdfReader imageDocumentReader = new PdfReader(imageMS.ToArray())) { var page = pdf.GetImportedPage(imageDocumentReader, 1); pdf.AddPage(page); imageDocumentReader.Close(); } } } } } break; case "tif": case "tiff": //Get the frame dimension list from the image of the file using (var imageStream = new MemoryStream(info.Data)) { using (System.Drawing.Image tiffImage = System.Drawing.Image.FromStream(imageStream)) { //get the globally unique identifier (GUID) Guid objGuid = tiffImage.FrameDimensionsList[0]; //create the frame dimension FrameDimension dimension = new FrameDimension(objGuid); //Gets the total number of frames in the .tiff file int noOfPages = tiffImage.GetFrameCount(dimension); //get the codec for tiff files ImageCodecInfo ici = null; foreach (ImageCodecInfo i in ImageCodecInfo.GetImageEncoders()) if (i.MimeType == "image/tiff") ici = i; foreach (Guid guid in tiffImage.FrameDimensionsList) { for (int index = 0; index < noOfPages; index++) { FrameDimension currentFrame = new FrameDimension(guid); tiffImage.SelectActiveFrame(currentFrame, index); using (MemoryStream tempImg = new MemoryStream()) { tiffImage.Save(tempImg, ImageFormat.Tiff); using (imageDocument = new Document()) { using (var imageMS = new MemoryStream()) { using (imageDocumentWriter = PdfWriter.GetInstance(imageDocument, imageMS)) { imageDocument.Open(); if (imageDocument.NewPage()) { var image = iTextSharp.text.Image.GetInstance(tempImg.ToArray()); image.Alignment = Element.ALIGN_CENTER; image.ScaleToFit(doc.PageSize.Width - 10, doc.PageSize.Height - 10); if (!imageDocument.Add(image)) { throw new Exception("Unable to add image to page!"); } imageDocument.Close(); imageDocumentWriter.Close(); using (PdfReader imageDocumentReader = new PdfReader(imageMS.ToArray())) { var page = pdf.GetImportedPage(imageDocumentReader, 1); pdf.AddPage(page); imageDocumentReader.Close(); } } } } } } } } } } break; case "pdf": using (var reader = new PdfReader(info.Data)) { for (int i = 0; i < reader.NumberOfPages; i++) { pdf.AddPage(pdf.GetImportedPage(reader, i + 1)); } pdf.FreeReader(reader); reader.Close(); } break; default: // not supported image format: // skip it (or throw an exception if you prefer) break; } } catch (Exception e) { e.Data["FileName"] = info.FileName; throw e; } } if (doc.IsOpen()) doc.Close(); return ms.ToArray(); } } } } } } |
This is the code that defines the ByteArrayInfo class, which is used as the input parameter of the above method: as you can easily understand, the main purpose of this class is to feed the MergeIntoPDF method with either the file name and the byte array of each file we want to "merge".
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
namespace Ryadel.Components.Media { /// <summary> /// POCO class to store byte[] and other useful informations regarding the data. /// </summary> public class ByteArrayInfo { public ByteArrayInfo(byte[] fileData, string fileName) { Data = fileData; FileName = fileName; FileExtension = System.IO.Path.GetExtension(FileName).ToLower(); } public byte[] Data { get; set; } /// <summary> /// The File Name (es. "TestFile.pdf") /// </summary> public string FileName { get; set; } /// <summary> /// The File Extension, including the dot (es. ".pdf") /// </summary> public string FileExtension { get; set; } } } |
The source code of the MergeIntoPDF method is pretty much self-explanatory. You will notice a wide amount of nested (and non-nested) using blocks, which often happens when working with GDI+ image types (most of them implement the IDisposable interface, hence we have to manually dispose them) and also some Bitmap-into-Bitmap transformations which could seem rather odd at first: these are nothing less than attempts to properly deal with GDI+, which often throws the generic GDI+ error otherwise (in some evironments): if you are confident you can securely shrink the code feel free to do that... but be wary of the fact that it could break on other machines!
It's worth noting that the main method returns a byte array, which I needed in my specific scenario because I had to put the resulting PDF file into a DB blob column: you can modify the return value to get the MemoryStream, force an IO write somewhere in your hard-drive or anything else that might suit you better.
However, that's it for now: happy converting!