BoldonJames · Numpsy · Jul 25, 2023 · Jul 24, 2023 · Jul 25, 2023
diff --git a/dotNET/pdfclown.lib/src/org/pdfclown/tokens/FileParser.cs b/dotNET/pdfclown.lib/src/org/pdfclown/tokens/FileParser.cs
@@ -24,22 +24,18 @@ this list of conditions.
 */
 
 using org.pdfclown.bytes;
-using org.pdfclown.documents;
-using org.pdfclown.files;
 using org.pdfclown.objects;
 using org.pdfclown.util.parsers;
 
 using System;
-using System.Globalization;
-using System.IO;
-using System.Text;
+using System.Text;
 
 namespace org.pdfclown.tokens
-{
-  /**
-    <summary>PDF file parser [PDF:1.7:3.2,3.4].</summary>
-  */
-  public sealed class FileParser
+{
+    /**
+      <summary>PDF file parser [PDF:1.7:3.2,3.4].</summary>
+    */
+    public sealed class FileParser
     : BaseParser
   {
     #region types
@@ -214,18 +210,64 @@ XRefEntry xrefEntry
     /**
       <summary>Retrieves the PDF version of the file [PDF:1.6:3.4.1].</summary>
     */
-    public string RetrieveVersion(
+    public string RetrieveVersion(out long headerOffset
       )
-    {
-      IInputStream stream = Stream;
-      stream.Seek(0);
+    {
+      IInputStream stream = Stream;
+      stream.Seek(0);
+
+      headerOffset = FindTrueHeaderPosition();
+
+      stream.Seek(headerOffset);
+
       string header = stream.ReadString(10);
       if(!header.StartsWith(Keyword.BOF))
         throw new PostScriptParseException("PDF header not found.", this);
 
       return header.Substring(Keyword.BOF.Length,3);
     }
 
+    /**
+      <summary>Some third-party tools insert a block before the header. 
+      The header shuold be the first entry in the document, 
+      therefore find the true header here.</summary>
+      <returns>Header position whithin stream.</returns>
+    */
+    internal long FindTrueHeaderPosition(
+      )
+    {
+      IInputStream stream = Stream;
+      long position = stream.Position;
+      stream.Seek(0);
+
+      do
+      {
+        int read = stream.ReadByte();
+        switch (read)
+        {
+          case Symbol.Percent:    // Comment
+            long headerOffset = stream.Position - 1;
+
+            StringBuilder header = new StringBuilder(Keyword.BOF.Length);
+            header.Append(Convert.ToChar(read));
+            header.Append(stream.ReadString(4));
+            if (!header.ToString().StartsWith(Keyword.BOF))
+            {
+              continue;
+            }
+
+            // Header found
+            stream.Seek(position);
+            return headerOffset;
+
+          case -1:                // EOF
+            stream.Seek(position);
+            return 0;
+        }
+      }
+      while (true);
+    }
+
     /**
       <summary>Retrieves the starting position of the last xref-table section [PDF:1.6:3.4.4].</summary>
     */

diff --git a/dotNET/pdfclown.lib/src/org/pdfclown/tokens/PlainWriter.cs b/dotNET/pdfclown.lib/src/org/pdfclown/tokens/PlainWriter.cs
@@ -67,6 +67,7 @@ protected override void WriteIncremental(
     {
       // 1. Original content (head, body and previous trailer).
       FileParser parser = file.Reader.Parser;
+      long headerOffset = parser.FindTrueHeaderPosition();
       stream.Write(parser.Stream);
 
       // 2. Body update (modified indirect objects insertion).
@@ -118,7 +119,7 @@ in file.IndirectObjects.ModifiedObjects
             AppendXRefEntry(
               xrefSubBuilder,
               indirectObjectEntry.Value.Reference,
-              stream.Length
+              stream.Length - headerOffset
               );
             // Add in-use entry content!
             indirectObjectEntry.Value.WriteTo(stream, file);
@@ -148,7 +149,7 @@ in file.IndirectObjects.ModifiedObjects
       }
 
       // 3. XRef-table last section.
-      long startxref = stream.Length;
+      long startxref = stream.Length - headerOffset;
       stream.Write(xrefBuilder.ToString());
 
       // 4. Trailer.

diff --git a/dotNET/pdfclown.lib/src/org/pdfclown/tokens/Reader.cs b/dotNET/pdfclown.lib/src/org/pdfclown/tokens/Reader.cs
@@ -24,23 +24,18 @@ this list of conditions.
 */
 
 using org.pdfclown.bytes;
-using org.pdfclown.documents;
-using org.pdfclown.files;
 using org.pdfclown.objects;
-using org.pdfclown.util.collections.generic;
 using org.pdfclown.util.parsers;
 
 using System;
 using System.Collections.Generic;
-using System.IO;
-using System.Linq;
 
 namespace org.pdfclown.tokens
-{
-  /**
-    <summary>PDF file reader.</summary>
-  */
-  public sealed class Reader
+{
+    /**
+      <summary>PDF file reader.</summary>
+    */
+    public sealed class Reader
     : IDisposable
   {
     #region types
@@ -116,12 +111,13 @@ public FileParser Parser
     public FileInfo ReadInfo(
       )
     {
-  //TODO:hybrid xref table/stream
-      Version version = Version.Get(parser.RetrieveVersion());
+      //TODO:hybrid xref table/stream
+
+      Version version = Version.Get(parser.RetrieveVersion(out long headerOffset));
       PdfDictionary trailer = null;
       SortedDictionary<int,XRefEntry> xrefEntries = new SortedDictionary<int,XRefEntry>();
       {
-        long sectionOffset = parser.RetrieveXRefOffset();
+        long sectionOffset = parser.RetrieveXRefOffset() + headerOffset;
         while(sectionOffset > -1)
         {
           // Move to the start of the xref section!
@@ -172,7 +168,7 @@ its entries.
                 }
 
                 // Get the indirect object offset!
-                int offset = (int)parser.GetToken(1);
+                int offset = (int)parser.GetToken(1) + (int)headerOffset;
                 // Get the object generation number!
                 int generation = (int)parser.GetToken(1);
                 // Get the usage tag!
@@ -222,7 +218,7 @@ its entries.
 
           // Get the previous xref-table section's offset!
           PdfInteger prevXRefOffset = (PdfInteger)sectionTrailer[PdfName.Prev];
-          sectionOffset = (prevXRefOffset != null ? prevXRefOffset.IntValue : -1);
+          sectionOffset = (prevXRefOffset != null ? prevXRefOffset.IntValue + headerOffset : -1);
         }
       }
       return new FileInfo(version, trailer, xrefEntries);