EVO PDF Logo

Search Text in PDF

EVO PDF Client for .NET Core

EVO PDF client library allows you to search text in PDF documents and obtain its location in PDF pages using the PDF to Text Converter component. The PDF to Text Converter object of PdfToTextConverter type can be initialized with the TCP/IP address of the server or with the HTTP URL address of the server, function of the EVO PDF Server type you have installed.

PDF to Text Converter Options

The PDF to Text Converter allows you select the page range where to search the text and to specify if the search is case sensitive or to search for whole words only. These features of the PDF to Text converter are exemplified in the code sample below which searches a text in the PDF document and also updates the PDF document to highlight the found text. The full Visual Studio demo project for ASP.NET Core is available in product package you can download from website.

Code Sample - Search Text in PDF in ASP.NET with PdfToTextConverter Class

C#
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Mvc;

using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Hosting;

using EvoPdfClient;

namespace FindTextDemo.Controllers
{
    public class Find_TextController : Controller
    {
        private readonly IWebHostEnvironment m_hostingEnvironment;
        public Find_TextController(IWebHostEnvironment hostingEnvironment)
        {
            m_hostingEnvironment = hostingEnvironment;
        }

        public IActionResult Index()
        {
            ViewData["DemoFilePath"] = m_hostingEnvironment.ContentRootPath + "/wwwroot" + "/DemoFiles/Input/Demo.pdf";

            return View();
        }

        [HttpPost]
        public ActionResult FindText(IFormCollection collection)
        {
            // Get the server options
            string serverIP = collection["textBoxServerIP"];
            uint serverPort = uint.Parse(collection["textBoxServerPort"]);
            string servicePassword = collection["textBoxServicePassword"];
            bool useServicePassword = servicePassword.Length > 0;
            bool useTcpService = collection["ServerType"] == "radioButtonUseTcpService";
            string webServiceUrl = collection["textBoxWebServiceUrl"];

            // the pdf file to convert
            string pdfFilePath = collection["filePathTextBox"][0].Trim();
            if (pdfFilePath.Equals(String.Empty))
                throw new Exception("Please choose a PDF file");

            // start page number
            int startPageNumber = int.Parse(collection["startPageTextBox"][0].Trim());
            // end page number
            // when it is 0 the extraction will continue up to the end of document
            int endPageNumber = 0;
            if (collection["endPageTextBox"][0].Trim() != String.Empty)
                endPageNumber = int.Parse(collection["endPageTextBox"][0].Trim());

            string outputFileName = System.IO.Path.GetFileNameWithoutExtension(pdfFilePath) + "_Highlighted.pdf";
            Document pdfDocument = null;
            byte[] outPdfBuffer = null;
            try
            {
                // Create the PDF to Text converter object
                PdfToTextConverter pdfToTextConverter = null;
                if (useTcpService)
                    pdfToTextConverter = new PdfToTextConverter(serverIP, serverPort);
                else
                    pdfToTextConverter = new PdfToTextConverter(true, webServiceUrl);

                // Set optional service password
                if (useServicePassword)
                    pdfToTextConverter.ServicePassword = servicePassword;

                pdfToTextConverter.LicenseKey = "ujQlNSAgNSU1IzslNSYkOyQnOywsLCw1JQ==";

                // read the PDF file in a memory buffer
                byte[] sourcePdfBytes = System.IO.File.ReadAllBytes(pdfFilePath);

                // search text in PDF
                FindTextLocation[] findTextLocations = pdfToTextConverter.FindText(sourcePdfBytes, collection["textToFindTextBox"],
                            startPageNumber, endPageNumber, collection["caseSensitiveCheckBox"].Count > 0, collection["wholeWordCheckBox"].Count > 0);

                // open the PDF to search in PDF library
                if (useTcpService)
                    pdfDocument = new Document(serverIP, serverPort, servicePassword, pdfFilePath, null);
                else
                    pdfDocument = new Document(true, webServiceUrl, servicePassword, pdfFilePath, null);


                // highlight the found text in PDF
                foreach (FindTextLocation findTextLocation in findTextLocations)
                {
                    RectangleElement highlightRectangle = new RectangleElement(findTextLocation.X, findTextLocation.Y,
                        findTextLocation.Width, findTextLocation.Height);
                    highlightRectangle.BackColor = RgbColor.Yellow;
                    highlightRectangle.Opacity = 50;

                    pdfDocument.Pages[findTextLocation.PageNumber - 1].AddElement(highlightRectangle);
                }

                // Save the modified PDF document in a memory buffer
                outPdfBuffer = pdfDocument.Save();
            }

            catch (Exception ex)
            {
                // The search failed
                throw new Exception(String.Format("An error occurred. {0}", ex.Message));
            }
            finally
            {
                // Close the PDF document
                if (pdfDocument != null)
                    pdfDocument.Close();
            }

            // Send the PDF file to browser
            FileResult fileResult = new FileContentResult(outPdfBuffer, "application/pdf");
            fileResult.FileDownloadName = outputFileName;

            return fileResult;
        }
    }
}