Monthly Archives: June 2010

ITextSharp PDF/A with embedded font

Here’s my method for parsing an HTML file and creating a PDF/A archive document from it, also with embedding fonts by using a stylesheet (in order to avoid the error: “All the fonts must be embedded. This one isn’t: Helvetica” 

Hope this helps someone..

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using iTextSharp.text.pdf;
using iTextSharp.text;
using iTextSharp.text.html.simpleparser;
using System.IO;
    public class PDFConverter
        public static string RenderedFormFileNamePrefix = "WBS ";
        /// <summary>
        /// Takes rendered HTML of a form and creates PDF/A 1a file
        /// </summary>
        /// <param name="rendererWebFormHtml">rendered html</param>
        /// <param name="inputDispatchID">used for naming the created file</param>
        /// <returns>PDF Archive file</returns>
        public CreatePDFArchive(string rendererWebFormHtml, int inputDispatchID, System.Net.NetworkCredential networkCredential)
            //create stylesheet (used to change font from Helvetica in order to embed font in PDF/A)
            StyleSheet styles = new StyleSheet();
            string fontPath = Environment.GetEnvironmentVariable("SystemRoot") + "\\fonts\\verdana.ttf";
            styles.LoadTagStyle("body", "face", "Verdana");
            //prepare html (clean up som tags for nicer formatting in PDF)                
            string html = rendererWebFormHtml;
            html = RemoveTag(html, "<title>", "</title>");
            html = RemoveTag(html, "<td id=\"ctl00_TableCell4\">", "</td>");
            html = RemoveTag(html, "<h5 id=\"WaitText\">", "</h5>");
            html = RemoveTag(html, "//<![CDATA[", "//]]>");
            //convert html string to stream
            byte[] byteArray = Encoding.UTF8.GetBytes(html);
            MemoryStream msHTML = new MemoryStream(byteArray);
            //Create empty document
            Document document = new Document(PageSize.A4);
            MemoryStream msPDF = new MemoryStream(); //Memorystream to hold the PDF document
            //we create a writer that listens to the document
            PdfWriter writer = PdfWriter.GetInstance(document, msPDF);
            writer.PDFXConformance = PdfWriter.PDFA1B; //set document arhive format
            // Copied verbatim from the itext mailing list, see: 
            PdfDictionary outputIntent = new PdfDictionary(PdfName.OUTPUTINTENT);
            outputIntent.Put(PdfName.OUTPUTCONDITIONIDENTIFIER, new PdfString("sRGB IEC61966-2.1"));
            outputIntent.Put(PdfName.INFO, new PdfString("sRGB IEC61966-2.1"));
            outputIntent.Put(PdfName.S, PdfName.GTS_PDFA1);            
            PdfICCBased ib = new PdfICCBased(ICC_Profile.GetInstance(Properties.Resources.srgb));
            outputIntent.Put(PdfName.DESTOUTPUTPROFILE, writer.AddToBody(ib).IndirectReference);
            writer.ExtraCatalog.Put(PdfName.OUTPUTINTENTS, new PdfArray(outputIntent));
            //parse html
            HTMLWorker htmlWorker = new HTMLWorker(document);
            iTextSharp.text.CustomNetworkCredentials.CustomCredentials = networkCredential;
            System.Collections.Generic.List<IElement> elements;
            elements = HTMLWorker.ParseToList(new StreamReader(msHTML), styles);
            foreach (IElement item in elements)
            //close up pdf document
            //create PDF file object to return
   pdfFile = new;
            pdfFile.Data = msPDF.ToArray();
            pdfFile.FileID = inputDispatchID;
            pdfFile.Name = RenderedFormFileNamePrefix + inputDispatchID.ToString() + ".pdf";
            pdfFile.Type = "PDF";
            //TODO: Add error handling for closing up streams (catch--finally)
            //clean up
            return pdfFile;            
        private string RemoveTag(string html, string startTag, string endTag)
            int startPos = html.IndexOf(startTag);
            if (startPos >= 0)
                int endPos = html.IndexOf(endTag, startPos);
                if (endPos > startPos)
                    html = html.Remove(startPos, (endPos + endTag.Length) - startPos);
                    html = RemoveTag(html, startTag, endTag); //Recursive call to remove any duplicate tags
            return html;