Extract PDF on file system instead of memory (#16958)

--------- Co-authored-by: Mike Alhayek <[email protected]>
OrchardCMS · Nov 7, 2024 · 5a14fb7 · 5a14fb7
1 parent d08a99d
commit 5a14fb7
Showing 1 changed file with 4 additions and 5 deletions.
diff --git a/src/OrchardCore.Modules/OrchardCore.Media.Indexing.Pdf/Services/PdfMediaFileTextProvider.cs b/src/OrchardCore.Modules/OrchardCore.Media.Indexing.Pdf/Services/PdfMediaFileTextProvider.cs
@@ -11,16 +11,15 @@ public async Task<string> GetTextAsync(string path, Stream fileStream)
         // https://github.com/UglyToad/PdfPig/blob/master/src/UglyToad.PdfPig.Core/StreamInputBytes.cs#L45.
         // Thus if it isn't, which is the case with e.g. Azure Blob Storage, we need to copy it to a new, seekable
         // Stream.
-        MemoryStream seekableStream = null;
+        FileStream seekableStream = null;
         try
         {
             if (!fileStream.CanSeek)
             {
-                // Since fileStream.Length might not be supported either, we can't preconfigure the capacity of the
-                // MemoryStream.
-                seekableStream = new MemoryStream();
-                // While this involves loading the file into memory, we don't really have a choice.
+                seekableStream = new FileStream(Path.GetTempFileName(), FileMode.OpenOrCreate, FileAccess.Write, FileShare.None, 4096, FileOptions.DeleteOnClose);
+
                 await fileStream.CopyToAsync(seekableStream);
+
                 seekableStream.Position = 0;
             }