Imports System Imports System.IO Imports DocumentFormat.OpenXml.Packaging Imports DocumentFormat.OpenXml.Spreadsheet Public Class PWCUtils Private m_PdfFI As FileInfo Private m_BoxingFI As FileInfo Private m_OriginalFile As FileInfo Private m_NewFile As FileInfo Const c_adj_vert As Integer = 1.1 Const c_adj_horz As Integer = 2.8 Public Sub New(ByVal Pdf As FileInfo, ByVal Boxing As FileInfo, ByVal OriginalFile As FileInfo, Optional OutputFile As FileInfo = Nothing) m_PdfFI = Pdf m_BoxingFI = Boxing m_OriginalFile = OriginalFile If OutputFile Is Nothing Then m_NewFile = New FileInfo(OriginalFile.FullName.Replace(".zip", ".pdf")) Else m_NewFile = OutputFile End If End Sub Public Function MakePWCPDF() As FileInfo If m_PdfFI.Exists And m_BoxingFI.Exists Then Dim alBoxes As New ArrayList Dim htMap As New Hashtable ' Load the box data alBoxes = MakeBoxesArray() htMap = LoadPwCMap() AddBoxesToPDF(alBoxes, htMap) End If Return m_NewFile End Function Private Function MakeBoxesArray() As ArrayList Dim BoxingFile As New XmlDocument Dim al As New ArrayList Try With BoxingFile ' Need to set xml resolver to nothing or it will pop errors ' for old xml schemas that were on files.stf.com .XmlResolver = Nothing ' Read the xml into memory .Load(m_BoxingFI.FullName) For Each Box As XmlNode In .GetElementsByTagName("text") Dim bd As New BoxingData bd.BoxName = Box.Attributes("ref").Value bd.Page = CType(Box.ParentNode.Attributes("ref").Value.Remove(0, 2), Integer) 'bd.Page = CType(bd.BoxName.Split("_")(0).Remove(0, 3), Integer) Dim be As XmlElement = Box If be.HasAttribute("justification") Then bd.Justification = Box.Attributes("justification").Value End If If be.HasAttribute("visable") Then bd.Visible = Box.Attributes("visable").Value Else bd.Visible = True End If If be.HasAttribute("case") Then bd.CaseType = Box.Attributes("case").Value End If If be.HasAttribute("insert_commas") Then bd.Commas = Box.Attributes("insert_commas").Value.ToString.Contains("false_") End If If be.HasAttribute("negatives_in_parenthesis") Then bd.Parens = Box.Attributes("negatives_in_parenthesis").Value.ToString.Contains("false_") End If If .GetElementsByTagName("text").Item(0).HasChildNodes Then For Each BoxElement As XmlNode In Box.ChildNodes Select Case BoxElement.Name Case "font" bd.FontName = BoxElement.InnerText Dim xe As XmlElement = BoxElement If xe.HasAttribute("size") Then bd.FontSize = BoxElement.Attributes("size").Value Else bd.FontSize = 12 End If If xe.HasAttribute("style") Then If BoxElement.Attributes("style").Value <> "regular" Then MsgBox(BoxElement.Attributes("style").Value) End If End If Case "bbox" Dim adj_decimal As Integer = 0 If bd.Justification.StartsWith("decimal") Then adj_decimal = 21 End If bd.Left = BoxElement.Attributes("left").Value - c_adj_horz bd.Right = BoxElement.Attributes("right").Value + c_adj_horz + adj_decimal bd.Top = BoxElement.Attributes("top").Value + c_adj_vert bd.Bottom = BoxElement.Attributes("bottom").Value + c_adj_vert End Select Next End If If bd.Visible Then al.Add(bd) End If Next End With Catch ex As Exception DisplayErrorMessage(ex, "PWC Class", System.Reflection.MethodInfo.GetCurrentMethod.Name) End Try Return al End Function Private Function AddBoxesToPDF(ByVal Boxes As ArrayList, ByVal BoxMap As Hashtable) As Boolean Console.WriteLine("Begin adding boxes to PDF") If Boxes.Count > 0 Then Dim bytFile() As Byte ' Read the PDF into memory Try Console.WriteLine("Begin reading box file") Dim fsFile As FileStream fsFile = New FileStream(m_PdfFI.FullName, FileMode.Open) Dim brFile As New BinaryReader(fsFile) brFile.BaseStream.Seek(0, SeekOrigin.Begin) bytFile = brFile.ReadBytes(fsFile.Length) brFile.Close() fsFile.Close() Console.WriteLine("Done reading box file") Catch ex As Exception DisplayErrorMessage(ex, "PWC Class", System.Reflection.MethodInfo.GetCurrentMethod.Name) Exit Function End Try Try Dim fileInput As New IDoc ' Input PDF to repurpose Dim fileOutput As New PDoc ' Repurposed PDF Dim ContainsOCRA As Boolean = False ' Add the boxes to the PDF With fileInput .OpenMem(bytFile) ' If the file has no pages it is likely because it is encrypted If .NumPages = 0 Then ' Something is wrong with this file, so we need to try one more time to convert it Try ' There was an issue with the input pdf so rerun the unlock Dim thisToughPDF As New Utils(m_PdfFI.FullName, m_PdfFI.FullName + "2") thisToughPDF.ProcessUnlock() ' Read the PDF into memory Try bytFile = Nothing Console.WriteLine("Begin reading box file") Dim fsFile As FileStream fsFile = New FileStream(m_PdfFI.FullName + "2", FileMode.Open) Dim brFile As New BinaryReader(fsFile) brFile.BaseStream.Seek(0, SeekOrigin.Begin) bytFile = brFile.ReadBytes(fsFile.Length) brFile.Close() fsFile.Close() Console.WriteLine("Done reading box file") .OpenMem(bytFile) Catch ex As Exception DisplayErrorMessage(ex, "PWC Class", System.Reflection.MethodInfo.GetCurrentMethod.Name) Exit Function End Try Catch ex As Exception Exit Function End Try End If Dim Rotations As New Hashtable For counter As Integer = 1 To fileInput.NumPages .GoPage(counter) Rotations.Add(counter, .GetRotate) Next .GoPage() Dim intFont As Integer = PTFormFontType.PTffCour Dim intJustify As Integer = 0 For Each box As BoxingData In Boxes Select Case box.FontName Case "Arial" intFont = PTFormFontType.PTffHelv Case "Times New Roman" intFont = PTFormFontType.PTffTiRo Case "OCR A Extended", "OCRAExtended" .SetFormFont(PTFormFontType.PTffCustom2, "OCRAExtended") intFont = PTFormFontType.PTffCustom2 ContainsOCRA = True Case Else intFont = PTFormFontType.PTffCour End Select Select Case box.Justification Case "center" intJustify = 1 Case "right", "decimal", "decimal_no_point" intJustify = 2 Case Else intJustify = 0 End Select ' Store the box name Dim strBoxName As String = box.BoxName ' If the key is found, user their name If BoxMap.ContainsKey(strBoxName) Then strBoxName = BoxMap(strBoxName) End If ' Handle mixed rotation If Rotations(box.Page) = 0 Then Console.WriteLine(String.Concat("Adding ", strBoxName)) .AddTextField(strBoxName, "", box.Left, box.Bottom, box.Right - box.Left, box.Top - box.Bottom, box.Page, intFont, box.FontSize, intJustify, , PTAnnotFlags.PTFlagAnnotPrintable,,,, 0) Else Console.WriteLine(String.Concat("Adding ", strBoxName)) .AddTextField(strBoxName, "", Math.Abs(box.Top - 603) + (c_adj_horz * 3), box.Left + c_adj_vert, box.Top - box.Bottom, box.Right - box.Left - (c_adj_vert), box.Page, intFont, box.FontSize, intJustify, , PTAnnotFlags.PTFlagAnnotPrintable, , , Rotations(box.Page), 0) End If Next End With With fileOutput Console.WriteLine(String.Concat("Saving fillable pdf", m_NewFile.FullName)) .[New](m_NewFile.FullName) ' Set PDF attributes .SetPDFVersion("1.6") .SetOpenAction(1, "Window") .SetPageLayout("SinglePage") .SetPageMode("UseNone") '.SetPreserveFontNames(True) '.SetSecurity("vY+S2ltK@m", "", "cied") '' c = changing the document is denied in Acrobat ' a = adding or changing annotations or form fields is denied ' i = disable editing of form fields ' e = disable extraction of text and graphics ' d = disable document assembly (insert pages, rotating, etc) '.SetMetaData("0None") .Attach(fileInput) ' Clean up meta data Dim strKeys As String = fileInput.GetInfoKeys Dim arrKeys2 As Array = strKeys.Split(vbCrLf) Dim strMetaData As String = fileInput.GetMetaData Dim myenum As IEnumerator = arrKeys2.GetEnumerator ' Blank any of the attributes that are set, including non-standard ones. While myenum.MoveNext If myenum.Current.ToString.Length > 0 Then Select Case myenum.Current.ToString.Remove(0, 1) Case "Producer", "ModDate" Case "Creator" .SetAttr(myenum.Current.ToString.Remove(0, 1), "PDFUtils") Case "CreationDate" .SetAttr(myenum.Current.ToString.Remove(0, 1), Date.Now) Case Else fileOutput.SetAttr(myenum.Current.ToString.Remove(0, 1), "") End Select End If End While .SetAttr("Author", "PWC") .SetAttr("Creator", "PDF Utilities") .SetAttr("Keywords", "PWC") .SetAttr("Subject", "PWC Fillable PDF") .SetAttr("Title", m_PdfFI.Name.ToUpper.Replace(".PDF", "")) .AddViewerPreference("PrintScaling", "None", True) .InputCopyAll() '.InputCopyPages() If ContainsOCRA Then Dim fontlist As New ArrayList(fileInput.GetFonts(0).Split(Chr(13))) If Not fontlist.Contains("OCRAExtended") Then MsgBox("The OCR A Extended font must be fully embed. DO NOT use partial embedding." _ , MsgBoxStyle.Exclamation + MsgBoxStyle.MsgBoxSetForeground, "Source PDF Missing Font") End If '.Merge(My.Application.Info.DirectoryPath & "\ocra.pdf") '.InputOpen(My.Application.Info.DirectoryPath & "\ocra.pdf") '.NewPage() '.Page.SetFont("OCR%20A%20Extended", 12) '.Page.PrintText("Mark", 1, 1) End If .Close() End With Catch ex As System.Exception DisplayErrorMessage(ex, "PWC Class", System.Reflection.MethodInfo.GetCurrentMethod.Name) End Try End If End Function Private Function LoadPwCMap() As Hashtable Dim htMap As New Hashtable Dim di As New DirectoryInfo(m_OriginalFile.Directory.FullName) Dim fiMap As New FileInfo(di.FullName) For Each fi As FileInfo In di.GetFiles("Spec-PwCTags_*.xlsx") fiMap = New FileInfo(fi.FullName) Next If fiMap.Exists Then Try Using spreadsheetDocument As SpreadsheetDocument = SpreadsheetDocument.Open(fiMap.FullName, False) Dim workbookPart As WorkbookPart = spreadsheetDocument.WorkbookPart Dim worksheetPart As WorksheetPart = workbookPart.WorksheetParts.First() Dim sheetData As SheetData = worksheetPart.Worksheet.Elements(Of SheetData)().First() For Each r As Row In sheetData.Elements(Of Row)() Dim ours As String = "" Dim theirs As String = "" For Each c As Cell In r.Elements(Of Cell)() Dim id As Integer = -1 Dim ssi As SharedStringItem If (Int32.TryParse(c.InnerText, id)) Then ssi = GetSharedStringItemById(workbookPart, id) If ours = "" Then ours = ssi.Text.Text Else theirs = ssi.Text.Text End If End If Next If ours <> "" And theirs <> "" Then htMap.Add(ours, theirs) End If Next End Using Catch ex As Exception End Try End If Return htMap Throw New NotImplementedException() End Function Public Function GetSharedStringItemById(wp As WorkbookPart, id As Integer) As SharedStringItem Return wp.SharedStringTablePart.SharedStringTable.ElementAt(id) End Function End Class Public Class BoxingData Dim m_Page As Integer Dim m_BoxName As String Dim m_Justification As String Dim m_Case As String Dim m_Commas As Boolean Dim m_Parens As Boolean Dim m_FontName As String Dim m_FontSize As Integer Dim m_left As Single Dim m_top As Single Dim m_right As Single Dim m_bottom As Single Dim m_Visible As Boolean Public Property Page() As Integer Get Return m_Page End Get Set(ByVal value As Integer) m_Page = value End Set End Property Public Property BoxName() As String Get Return m_BoxName End Get Set(ByVal value As String) m_BoxName = value End Set End Property Public Property Justification() As String Get Return m_Justification End Get Set(ByVal value As String) m_Justification = value End Set End Property Public Property CaseType() As String Get Return m_Case End Get Set(ByVal value As String) m_Case = value End Set End Property Public Property Commas() As Boolean Get Return m_Commas End Get Set(ByVal value As Boolean) m_Commas = value End Set End Property Public Property Parens() As Boolean Get Return m_Parens End Get Set(ByVal value As Boolean) m_Parens = value End Set End Property Public Property FontName() As String Get Return m_FontName End Get Set(ByVal value As String) m_FontName = value End Set End Property Public Property FontSize() As Integer Get Return m_FontSize End Get Set(ByVal value As Integer) m_FontSize = value End Set End Property Public Property Left() As Integer Get Return m_left End Get Set(ByVal value As Integer) m_left = value End Set End Property Public Property Right() As Integer Get Return m_right End Get Set(ByVal value As Integer) m_right = value End Set End Property Public Property Top() As Integer Get Return m_top End Get Set(ByVal value As Integer) m_top = value End Set End Property Public Property Bottom() As Integer Get Return m_bottom End Get Set(ByVal value As Integer) m_bottom = value End Set End Property Public Property Visible() As Boolean Get Return m_Visible End Get Set(ByVal value As Boolean) m_Visible = value End Set End Property End Class