hi there
I wonder if there’s a scriptable way to search in a pdf for a text and when the text was found, the page(s) should be exported as singe pdfs.
Is this even possible?
thanks in advance for your help
kind regerds
Marth
hi there
I wonder if there’s a scriptable way to search in a pdf for a text and when the text was found, the page(s) should be exported as singe pdfs.
Is this even possible?
thanks in advance for your help
kind regerds
Marth
Here is a modified version of a Shane STANLEY’s script supposed to fit your needs.
use AppleScript version "2.3.1"
use scripting additions
use framework "Foundation"
use framework "Quartz" -- required for PDF stuff
--property theKey : ""
#===== Handlers
-- Supposed to create a new PDF file for every page from the passed PDF file which contain the key string.
on splitPDF:thePath forKey:theKey
set inNSURL to current application's |NSURL|'s fileURLWithPath:thePath
set thePDFDocument to current application's PDFDocument's alloc()'s initWithURL:inNSURL
# CAUTION. theList contain indexes of pages numbered starting from 1, but ASObjC number them starting from 0
set theCount to thePDFDocument's pageCount() as integer
repeat with i from 1 to theCount
set thePDFPage to (thePDFDocument's pageAtIndex:(i - 1)) # ?????
set itsText to (thePDFPage's |string|()) as text
if itsText contains theKey then
set newPath to (its addString:("-page " & text -2 thru -1 of ((100 + i) as text)) beforeExtensionIn:thePath)
set outNSURL to (current application's |NSURL|'s fileURLWithPath:newPath)
set newPDFDoc to current application's PDFDocument's alloc()'s init()
(newPDFDoc's insertPage:thePDFPage atIndex:0)
(newPDFDoc's writeToURL:outNSURL)
end if
end repeat
end splitPDF:forKey:
-- inserts a string in a path before the extension
on addString:extraString beforeExtensionIn:aPath
set pathNSString to current application's NSString's stringWithString:aPath
set newNSString to current application's NSString's stringWithFormat_("%@%@.%@", pathNSString's stringByDeletingPathExtension(), extraString, pathNSString's pathExtension())
return newNSString as text
end addString:beforeExtensionIn:
#===== Caller
set theKey to text returned of (display dialog "Enter the key to search for:" default answer "Manang Saling")
set thePath to POSIX path of (choose file with prompt "Choose a PDF file." of type {"PDF"})
its splitPDF:thePath forKey:theKey
You will be urged to enter the key string to search and to select the file to search into.
Yvan KOENIG running High Sierra 10.13.6 in French (VALLAURIS, France) lundi 4 novembre 2019 14:00:02
How about this?
-- Created 2017-06-18 by Takaaki Naganoya
-- 2017 Piyomaru Software
use AppleScript version "2.4"
use scripting additions
use framework "Foundation"
use framework "Quartz"
use bPlus : script "BridgePlus"
--Keywords (accept fluctuations)
set sList to {"Piyomaru Software", "PIYOMARU Soft"} --considering case
set thePath to POSIX path of (choose file of type {"com.adobe.pdf"})
set aRes to findWordListInPDFContents(thePath, sList) of me
--> {1, 3, 4, 71, 72, 75, 95, 96, 97, 98, 420, 429, 479, 483}--hit page numbers list
on findWordListInPDFContents(thePOSIXPath as string, sList as list)
script spdPDF
property textCache : missing value
property aList : {}
end script
--Make Text Search Cache from a PDF
set anNSURL to (current application's |NSURL|'s fileURLWithPath:thePOSIXPath)
set theDoc to current application's PDFDocument's alloc()'s initWithURL:anNSURL
set theCount to theDoc's pageCount() as integer
set (textCache of spdPDF) to current application's NSMutableArray's new()
repeat with i from 0 to (theCount - 1)
set aPage to (theDoc's pageAtIndex:i)
set tmpStr to (aPage's |string|())
((textCache of spdPDF)'s addObject:{pageIndex:i + 1, pageString:tmpStr})
end repeat
--Search for text cache
repeat with s in sList
--❶Partial match search
set bRes to ((my filterRecListByLabel1((textCache of spdPDF), "pageString contains '" & s & "'"))'s pageIndex) as list
--❷、❶Search keywords lie on multiple pages
if bRes = {} then
set bRes to {}
set theSels to (theDoc's findString:s withOptions:0)
repeat with aSel in theSels
set thePage to (aSel's pages()'s objectAtIndex:0)'s label()
set curPage to (thePage as integer)
if curPage is not in bRes then
set the end of bRes to curPage
end if
end repeat
end if
set the end of (aList of spdPDF) to bRes
end repeat
--2D list to 1D list conversion (Flatten)
load framework
set bList to (current application's SMSForder's arrayByFlattening:(aList of spdPDF)) as list
--Uniquefy
set cList to uniquifyList(bList) of me
--Sort 1D List
set anArray to current application's NSArray's arrayWithArray:cList
set sortRes1 to (anArray's sortedArrayUsingSelector:"compare:") as list
set (textCache of spdPDF) to "" --Purge
set (aList of spdPDF) to {} --Purge
return sortRes1
end findWordListInPDFContents
on filterRecListByLabel1(aRecList as list, aPredicate as string)
set aArray to current application's NSArray's arrayWithArray:aRecList
set aPredicate to current application's NSPredicate's predicateWithFormat:aPredicate
set filteredArray to aArray's filteredArrayUsingPredicate:aPredicate
return filteredArray
end filterRecListByLabel1
on uniquifyList(aList as list)
set aArray to current application's NSArray's arrayWithArray:aList
set bArray to aArray's valueForKeyPath:"@distinctUnionOfObjects.self"
return bArray as list
end uniquifyList
Model: MacBook Pro 2012
AppleScript: 2.7
Browser: Safari 13.0.1
Operating System: macOS 10.14
Thank you very much guys!