SilverStripe\TextExtraction\Extractor\PDFTextExtractor
Text extractor that calls pdftotext to do the conversion.
- Author: mstephens
Synopsis
class PDFTextExtractor
extends FileTextExtractor
{
- // members
- private static $binary_location = NULL;
- private static array $search_binary_locations = ;
- // Inherited members from FileTextExtractor
- protected static $sorted_extractor_classes;
- // methods
- public void isAvailable()
- public void supportsExtension()
- public void supportsMime()
- protected string bin()
- public void getContent()
- protected string getRawOutput()
- protected string cleanupLigatures()
- // Inherited methods from FileTextExtractor
- protected static array get_extractor_classes()
- protected static FileTextExtractor get_extractor()
- public static FileTextExtractor|null for_file()
- protected static string getPathFromFile()
- public abstract boolean isAvailable()
- public abstract boolean supportsExtension()
- public abstract boolean supportsMime()
- public abstract string getContent()
Hierarchy
Members
private
- $binary_location
—
string
Set to bin path this extractor can execute - $search_binary_locations
—
array
Used if binary_location isn't set.
protected
- $sorted_extractor_classes
—
array
Cache of extractor class names, sorted by priority
Methods
protected
- bin() — Accessor to get the location of the binary
- cleanupLigatures() — Removes utf-8 ligatures.
- getRawOutput() — Invoke pdftotext with the given File object
public
Inherited from SilverStripe\TextExtraction\Extractor\FileTextExtractor
protected
- getPathFromFile() — Some text extractors (like pdftotext) may require a physical file to read from, so write the current file contents to a temp file and return its path
- get_extractor() — Get the text file extractor for the given class
- get_extractor_classes() — Gets the list of prioritised extractor classes
public
- for_file() — Given a File object, decide which extractor instance to use to handle it
- getContent() — Given a File instance, extract the contents as text.
- isAvailable() — Checks if the extractor is supported on the current environment, for example if the correct binaries or libraries are available.
- supportsExtension() — Determine if this extractor supports the given extension.
- supportsMime() — Determine if this extractor supports the given mime type.