Monthly Archives: June 2010

ITextSharp PDF/A with embedded font

Here’s my method for parsing an HTML file and creating a PDF/A archive document from it, also with embedding fonts by using a stylesheet (in order to avoid the error: “All the fonts must be embedded. This one isn’t: Helvetica” 

Hope this helps someone..

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text.pdf;
using iTextSharp.text;
using iTextSharp.text.html.simpleparser;
using System.IO;
 
namespace Umea.se.DataCollector.Utilities
{
    public class PDFConverter
    {
        public static string RenderedFormFileNamePrefix = "WBS ";
 
        /// <summary>
        /// Takes rendered HTML of a form and creates PDF/A 1a file
        /// </summary>
        /// <param name="rendererWebFormHtml">rendered html</param>
        /// <param name="inputDispatchID">used for naming the created file</param>
        /// <returns>PDF Archive file</returns>
        public Umea.se.DataCollector.DataObjectLibrary.File CreatePDFArchive(string rendererWebFormHtml, int inputDispatchID, System.Net.NetworkCredential networkCredential)
        {
            //create stylesheet (used to change font from Helvetica in order to embed font in PDF/A)
            StyleSheet styles = new StyleSheet();
            string fontPath = Environment.GetEnvironmentVariable("SystemRoot") + "\\fonts\\verdana.ttf";
            FontFactory.Register(fontPath);
            styles.LoadTagStyle("body", "face", "Verdana");
 
            //prepare html (clean up som tags for nicer formatting in PDF)                
            string html = rendererWebFormHtml;
            html = RemoveTag(html, "<title>", "</title>");
            html = RemoveTag(html, "<td id=\"ctl00_TableCell4\">", "</td>");
            html = RemoveTag(html, "<h5 id=\"WaitText\">", "</h5>");
            html = RemoveTag(html, "//<![CDATA[", "//]]>");
 
            //convert html string to stream
            byte[] byteArray = Encoding.UTF8.GetBytes(html);
            MemoryStream msHTML = new MemoryStream(byteArray);
 
            //Create empty document
            Document document = new Document(PageSize.A4);
            MemoryStream msPDF = new MemoryStream(); //Memorystream to hold the PDF document
 
            //we create a writer that listens to the document
            PdfWriter writer = PdfWriter.GetInstance(document, msPDF);
            writer.PDFXConformance = PdfWriter.PDFA1B; //set document arhive format
            document.Open();
 
            // Copied verbatim from the itext mailing list, see: 
            // http://article.gmane.org/gmane.comp.java.lib.itext.general/31582/
            PdfDictionary outputIntent = new PdfDictionary(PdfName.OUTPUTINTENT);
            outputIntent.Put(PdfName.OUTPUTCONDITIONIDENTIFIER, new PdfString("sRGB IEC61966-2.1"));
            outputIntent.Put(PdfName.INFO, new PdfString("sRGB IEC61966-2.1"));
            outputIntent.Put(PdfName.S, PdfName.GTS_PDFA1);            
            PdfICCBased ib = new PdfICCBased(ICC_Profile.GetInstance(Properties.Resources.srgb));
            ib.Remove(PdfName.ALTERNATE);
            outputIntent.Put(PdfName.DESTOUTPUTPROFILE, writer.AddToBody(ib).IndirectReference);
            writer.ExtraCatalog.Put(PdfName.OUTPUTINTENTS, new PdfArray(outputIntent));
 
            //parse html
            HTMLWorker htmlWorker = new HTMLWorker(document);
            iTextSharp.text.CustomNetworkCredentials.CustomCredentials = networkCredential;
            System.Collections.Generic.List<IElement> elements;
            elements = HTMLWorker.ParseToList(new StreamReader(msHTML), styles);
            foreach (IElement item in elements)
            {
                document.Add(item);
            }
 
            //close up pdf document
            writer.CreateXmpMetadata();
            document.Close();
 
            //create PDF file object to return
            Umea.se.DataCollector.DataObjectLibrary.File pdfFile = new Umea.se.DataCollector.DataObjectLibrary.File();
            pdfFile.Data = msPDF.ToArray();
            pdfFile.FileID = inputDispatchID;
            pdfFile.Name = RenderedFormFileNamePrefix + inputDispatchID.ToString() + ".pdf";
            pdfFile.Type = "PDF";
 
            //TODO: Add error handling for closing up streams (catch--finally)
            //clean up
            msPDF.Close();
            msPDF.Dispose();
            msHTML.Close();
            msHTML.Dispose();
            writer.Close();
            htmlWorker.Close();
 
            return pdfFile;            
        }
 
        private string RemoveTag(string html, string startTag, string endTag)
        {
            int startPos = html.IndexOf(startTag);
            if (startPos >= 0)
            {
                int endPos = html.IndexOf(endTag, startPos);
                if (endPos > startPos)
                {
                    html = html.Remove(startPos, (endPos + endTag.Length) - startPos);
                    html = RemoveTag(html, startTag, endTag); //Recursive call to remove any duplicate tags
                }
            }
            return html;
        }
    }
}

 

Advertisements