Merge PDF contents thru apple script

This is basically the same, with an additional step that allow to visualise in a table several infos for the chosen PDFs, and the possibility to leave some of those PDFs out (so they won’t be part of the combined final PDF), and also to reorganise their order (by dragging them up or down) which will affect how they will be joined in final combined PDF.
I am sure it can be improved…

Ciao
L.

use AppleScript version "2.4" -- Yosemite (10.10) or later
use scripting additions
use framework "Foundation"
use framework "AppKit" -- for NSImage
use framework "Quartz" -- required for PDF stuff
use script "Myriad Tables Lib" version "1.0.9"

set inFiles to (choose file of type {"public.image", "com.adobe.pdf"} with prompt "Choose your  files (images or PDFs):" with multiple selections allowed) -- is a list of alias
set destPosixPath to POSIX path of (choose file name default name "Combined.pdf" with prompt "Save new PDF to:") -- is a POSIX path

# let's order the files
# get information about contents
set {theJoin, theNames, theSizes, theKinds, theFiles} to {{}, {}, {}, {}, {}}
tell application "Finder"
	repeat with i in inFiles
		set end of theJoin to true
		set end of theNames to displayed name of i
		set end of theSizes to size of i
		set end of theKinds to kind of i
		set end of theFiles to i as alias
	end repeat
end tell
-- convert from "columns" to "rows"
set theData to swap columns and rows in {theJoin, theNames, theSizes, theKinds, theFiles}
-- show table
set myTable to make new table with data theData column headings {"Join", "Name", "Size", "Kind"} with prompt "Choose files to join by checking the checkbox, and the order in which the files willl be joined by dragging them up or down." editable columns {1} with multiple selections allowed
modify table myTable with alternate backgrounds and row dragging
-- modify columns in table myTable date format "d MMM yyyy, H:mm"
modify columns in table myTable columns list {4} head alignment align center
modify columns in table myTable columns list {2} with bold type
set theResult to display table myTable
-- extract the files, which will be returned as file references
set theNames to extract column 5 from values selected of theResult
set NewList to extract column 5 from values returned of theResult
set NewList2 to {PDFName:extract column 5 from values returned of theResult, PDFValue:extract column 1 from values returned of theResult}
set NewList3 to {}
repeat with i in values returned of theResult
	if item 1 of i then
		set end of NewList3 to item 5 of i
	end if
end repeat

my combineFiles:NewList3 savingToPDF:destPosixPath
on combineFiles:inFiles savingToPDF:destPosixPath
	--  make URL of the first file
	set inNSURL to current application's |NSURL|'s fileURLWithPath:(POSIX path of item 1 of inFiles)
	-- make PDF document from the URL
	if (inNSURL's pathExtension()'s isEqualToString:"pdf") as boolean then
		set theDoc to current application's PDFDocument's alloc()'s initWithURL:inNSURL
	else
		set theDoc to my pdfDocFromImageURL:inNSURL
	end if
	-- loop through the rest
	set oldDocCount to theDoc's pageCount()
	set inFiles to rest of inFiles
	repeat with aFile in inFiles
		--  make URL of the next PDF
		set inNSURL to (current application's |NSURL|'s fileURLWithPath:(POSIX path of aFile))
		-- make PDF document from the URL
		if (inNSURL's pathExtension()'s isEqualToString:"pdf") as boolean then
			set newDoc to (current application's PDFDocument's alloc()'s initWithURL:inNSURL)
		else
			set newDoc to (my pdfDocFromImageURL:inNSURL)
		end if
		-- loop through, moving pages
		set newDocCount to newDoc's pageCount()
		repeat with i from 1 to newDocCount
			-- get page of  PDF
			set thePDFPage to (newDoc's pageAtIndex:(i - 1)) -- zero-based indexes
			-- insert the page into main PDF
			(theDoc's insertPage:thePDFPage atIndex:oldDocCount)
			set oldDocCount to oldDocCount + 1
		end repeat
	end repeat
	set outNSURL to current application's |NSURL|'s fileURLWithPath:destPosixPath
	-- save the main PDF
	(theDoc's writeToURL:outNSURL)
end combineFiles:savingToPDF:

on pdfDocFromImageURL:inNSURL
	set theImage to current application's NSImage's alloc()'s initWithContentsOfURL:inNSURL
	set theSize to theImage's |size|()
	set theRect to {{0, 0}, theSize}
	set theImageView to current application's NSImageView's alloc()'s initWithFrame:theRect
	theImageView's setImage:theImage
	set theData to theImageView's dataWithPDFInsideRect:theRect
	return current application's PDFDocument's alloc()'s initWithData:theData
end pdfDocFromImageURL:

The OP has a good solution and I wanted to raise a related matter. I’m rewriting my PDF AppleScripts to use ASObjC–which I’m just learning–and I had a question about error correction.

I ran Shane’s script with 3 PDF files in the source folder and everything worked as expected. To introduce an error, I created a text file in the folder and changed its extension to PDF. I reran Shane’s script and it threw an error:

What is the best way to handle this error? For now, I will include the code that calls the handler in a try statement, but this is a bit cumbersome because the handler is called at numerous points in my script. Thanks.

You can use a try, or a test for missing value:

if theDoc is missing value then -- something went wrong

Thanks Shane. I had this wrong. The line that causes the error is actually the prior line so this can be dealt with something like:

set theURLs to current application's NSFileManager's defaultManager()'s contentsOfDirectoryAtURL:dirURL includingPropertiesForKeys:{} options:(current application's NSDirectoryEnumerationSkipsHiddenFiles) |error|:(missing value)

if theURLs = missing value then -- catch error
	display alert "The error message"
	error number -128
end if

set theDoc to current application's PDFDocument's alloc()'s initWithURL:(theURLs's firstObject())

It can be more helpful if you sow the relevant error:

set {theURLs, theError} to current application's NSFileManager's defaultManager()'s contentsOfDirectoryAtURL:dirURL includingPropertiesForKeys:{} options:(current application's NSDirectoryEnumerationSkipsHiddenFiles) |error|:(reference)

if theURLs = missing value then -- catch error
	display alert (theError's localizedDescription() as text)
	error number -128
end if

Thanks Shane. I’ll use that.

This is proabably more of a question for Shane, since it involves his code below. I am wondering how the sequencing of the pages is determined. I have experienced mixed results on different machines in terms of what file becomes page 1 vs. page 2. For all intents and purposes - In my situation, I am always dealing with just 2 separate PDFs in the same folder on the desktop that is called “View PDFs” - Inside that “View PDFs” folder will always be two PDFs that are similarly named except for their filename’s ending. For exmple, thsi folder will contain two spearare PDFs named as follows:
AD03103_side1.pdf
AD03103_side2.pdf

What I am seeking is a method to always have the file that contains “side1” become page 1, and the file containing “side2” to become page 2. However, I am not sure how to incorporate that logic into the code seen below. I understand this is a lot to ask - so if it cannot be easily done, please ignore.

Thanks,
-Jeff

use scripting additions
use framework "Foundation"
use framework "Quartz" -- required for PDF stuff


set inFolderPosix to POSIX path of ((path to desktop as text) & "View PDFs")
set destPosixPath to POSIX path of ((path to desktop as text) & "Combined.pdf")
its combineFilesIn:inFolderPosix savingTo:destPosixPath

on combineFilesIn:inFolderPosix savingTo:destPosixPath
	set dirURL to current application's class "NSURL"'s fileURLWithPath:inFolderPosix
	set theURLs to current application's NSFileManager's defaultManager()'s contentsOfDirectoryAtURL:dirURL includingPropertiesForKeys:{} options:(current application's NSDirectoryEnumerationSkipsHiddenFiles) |error|:(missing value)
	-- make PDF document from the URL
	set theDoc to current application's PDFDocument's alloc()'s initWithURL:(theURLs's firstObject())
	-- loop through the rest
	set oldDocCount to theDoc's pageCount()
	repeat with i from 1 to ((theURLs's |count|()) - 1)
		-- make URL of the next PDF
		-- make PDF document from the URL
		set newDoc to (current application's PDFDocument's alloc()'s initWithURL:(theURLs's objectAtIndex:i))
		-- loop through, moving pages
		set newDocCount to newDoc's pageCount()
		repeat with i from 1 to newDocCount
			-- get page of old PDF
			set thePDFPage to (newDoc's pageAtIndex:(i - 1)) -- zero-based indexes
			-- insert the page
			(theDoc's insertPage:thePDFPage atIndex:oldDocCount)
			set oldDocCount to oldDocCount + 1
		end repeat
	end repeat
	set outNSURL to current application's class "NSURL"'s fileURLWithPath:destPosixPath
	-- save the new PDF
	(theDoc's writeToURL:outNSURL)
end combineFilesIn:savingTo:

Jeffkr. You directed your question to Shane but I couldn’t resist responding, just FWIW.

It appears that your source folder contains only two PDF’s and if that’s the case you might want to consider using the Finder. It returns the PDF files generally sorted by name and Finder has a sort function if you’d rather sort in some other manner. Finder can be abysmally slow but in this case it might be worth considering.

Anyways, my suggestion is:

use framework "Foundation"
use framework "Quartz"
use scripting additions

set sourceFolder to ((path to desktop as text) & "View PDFs:")
set targetFile to ((path to desktop as text) & "Combined.pdf")

tell application "Finder"
	set sourceFiles to (every file in folder sourceFolder whose name ends with ".pdf") as alias list
end tell

mergeFiles(sourceFiles, targetFile)

on mergeFiles(sourceFiles, targetFile)
	set firstFile to current application's class "NSURL"'s fileURLWithPath:(POSIX path of item 1 of sourceFiles)
	set firstDoc to current application's PDFDocument's alloc()'s initWithURL:firstFile
	set firstDocCount to firstDoc's pageCount()
	repeat with anItem in (rest of sourceFiles)
		set aFile to (current application's class "NSURL"'s fileURLWithPath:(POSIX path of anItem))
		set aDoc to (current application's PDFDocument's alloc()'s initWithURL:aFile)
		set aDocCount to aDoc's pageCount()
		repeat with i from 1 to aDocCount
			set thePDFPage to (aDoc's pageAtIndex:(i - 1))
			(firstDoc's insertPage:thePDFPage atIndex:firstDocCount)
			set firstDocCount to firstDocCount + 1
		end repeat
	end repeat
	set mergedFile to current application's class "NSURL"'s fileURLWithPath:(POSIX path of targetFile)
	(firstDoc's writeToURL:mergedFile)
end mergeFiles

Thank you very much Pavine,
I am pretty sure you are on the right track, however can you assist in modifying the script a bit further so it actually combines the 2 pdfs? It is probably just some syntax error?

use framework "Foundation"
use framework "Quartz"
use scripting additions

set sourceFolder to ((path to desktop as text) & "View PDFs")
set targetFile to ((path to desktop as text) & "Combined.pdf")

tell application "Finder"
	set sourceFiles to (every file in folder sourceFolder whose name ends with ".pdf") as alias list
end tell

my mergeFiles(sourceFiles, targetFile)

on mergeFiles(sourceFiles, targetFile)
	set firstFile to current application's class "NSURL"'s fileURLWithPath:(POSIX path of item 1 of sourceFiles)
	set firstDoc to current application's PDFDocument's alloc()'s initWithURL:firstFile
	set firstDocCount to firstDoc's pageCount()
	repeat with anItem in (rest of sourceFiles)
		set aFile to (current application's class "NSURL"'s fileURLWithPath:(POSIX path of anItem))
		set aDoc to (current application's PDFDocument's alloc()'s initWithURL:aFile)
		set aDocCount to aDoc's pageCount()
		repeat with i from 1 to aDocCount
			set thePDFPage to (aDoc's pageAtIndex:(i - 1))
			(firstDoc's insertPage:thePDFPage atIndex:firstDocCount)
			set firstDocCount to firstDocCount + 1
		end repeat
	end repeat
	set mergedFile to current application's class "NSURL"'s fileURLWithPath:(POSIX path of targetFile)
	(firstDoc's writeToURL:mergedFile)
end mergeFiles

Jeffkr. I just tested my script a second time and it worked fine. I’m new to ASObjC and probably made some rookie mistake. I’m sure Shane will provide a working script as soon as he has the time.

BTW, I made an inconsequential change to my script above and you may want to try it again. I don’t think it will make a difference, though. Also, you may want to see if the following code actually returns the PDF’s:

set sourceFolder to ((path to desktop as text) & "View PDFs:")
set targetFile to ((path to desktop as text) & "Combined.pdf")

tell application "Finder"
	set sourceFiles to (every file in folder sourceFolder whose name ends with ".pdf") as alias list
end tell

Hello.
The colon at the very end of the pathname “View PDFs:” is useless but its availability doesn’t hurt.

I wish to point an important detail: if, at least, one of the original PDFs is protected by a password, you will get no error message but the created combined PDF will be empty.

Yvan KOENIG running High Sierra 10.13.6 in French (VALLAURIS, France) mardi 5 janvier 2021 19:05:08

Thank you VERY much Peavine, your script works extremely well.

Jeffkr. Thanks for letting me know–I’m glad that worked.

Very thanks to all, I made a search with Google for a similar problem: merge two separate PDF files with odd pages in one file and even pages in the other. Because the resulting file has 500 pages the work that awaited me was well beyond the time available. I therefore ended up here and thanks to your indications with lots of examples, by slightly modifying Peavine’s script, I managed to do it in just a few minutes. By this I want to thank you all for allowing this thread.

use framework "Foundation"
use framework "Quartz"
use scripting additions

set sourceFolder to ((path to desktop as text) & "View PDFs:") -- I have put here 2 files odd.pdf and even.pdf
set targetFile to ((path to desktop as text) & "Combined.pdf")

tell application "Finder"
	set sourceFiles to (every file in folder sourceFolder whose name ends with ".pdf") as alias list
end tell

mergeFiles(sourceFiles, targetFile)

on mergeFiles(sourceFiles, targetFile)
	set firstFile to current application's class "NSURL"'s fileURLWithPath:(POSIX path of item 1 of sourceFiles)
	-- /Users/stefano/Desktop/View PDFs/dispari.pdf -- 257 pages
	set secondFile to current application's class "NSURL"'s fileURLWithPath:(POSIX path of item 2 of sourceFiles)
	-- /Users/stefano/Desktop/View PDFs/pari.pdf -- 256 pages
	set firstDoc to current application's PDFDocument's alloc()'s initWithURL:firstFile
	set secondDoc to current application's PDFDocument's alloc()'s initWithURL:secondFile
	set firstDocCount to firstDoc's pageCount()
	set secondDocCount to secondDoc's pageCount()
	set Counter to 0
	-- debug first of all :-)
	--display dialog "firstDocCount = " & firstDocCount & return & "secondDocCount: " & secondDocCount
	repeat with n from 1 to firstDocCount -- repeat with n from 1 to 257
		set thePDFPageFromfirstDoc to (firstDoc's pageAtIndex:(n - 1)) -- take the current page, the first index is zero
		(secondDoc's insertPage:thePDFPageFromfirstDoc atIndex:Counter) -- insert the page at the index defined by the Counter into the temporary file
		set secondDocCount to secondDocCount + 1 -- increase the page count index of the second temporary file
		set Counter to (Counter + 1) + 1 -- advance by 2 units (existing page and added page)
	end repeat
	set mergedFile to current application's class "NSURL"'s fileURLWithPath:(POSIX path of targetFile)
	(secondDoc's writeToURL:mergedFile) -- save the temporary file to Combined.pdf
end mergeFiles

1 Like

I was looking through this thread and there are references to “framework Quartz.” Is that a framework that you download and put in the user/Library folder? I’m still learning and thought of running a few of these PDF scripts as a learning exercise. Thanks.

Homer712. The script should run fine as written–the Quartz framework doesn’t need to be installed.

BTW, you may see a “use framework ‘PDFKit’” statement in scripts that manipulate PDFs and that is already installed and will also work fine (including in Stefano_Monti’s script).

Found 216 individual frameworks folders in System/Library/Frameworks. Quartz was there as well as the PDFKit. What I initially found strange, was that all 216 folders had a date of Sept. 15, 2023, 10:47 PM. Then I looked at some of the Apple applications, and they had the same date/time. Things must get updated during macOS updates I would guess.

I tested Stefano_Monti’s script and it worked great. My source PDFs were two copies of Shane’s ASObjC book (159 pages each), and the merged PDF was created in less than a second.

As part of my ongoing effort to learn the Shortcuts app, I wrote a shortcut that does he same thing, although the shortcut solution differs in that it:

  • prompts the user to select the source PDF files;
  • creates the merged PDF in the same folder as the source PDFs;
  • is slower but only marginally so; and
  • requires macOS Monterey or newer.

Merge PDFs.shortcut (22.7 KB)

1 Like

I want to merge all pdf that are open in Preview most of the pdf’s are open foo the web and are not saved on the hard drive, is there a way to do it or is it impossible because Apple won’t allow it to happen, like they don’t allow preview to open pdf links directly?

Keith. In my testing, PDFs opened in Preview from the internet have a temporary file on the local computer, and these temporary files can be merged and saved. The following worked on my Sonoma computer.

use framework "Foundation"
use framework "PDFKit"
use scripting additions

set targetFile to POSIX path of (path to desktop) & "Merged Files.pdf" -- edit as desired

tell application "Preview" -- add error correction if Preview not open or document not found
	activate
	set thePaths to path of every document
end tell

mergeFiles(thePaths, targetFile)

on mergeFiles(theFiles, targetFile)
	set outDoc to current application's PDFDocument's new()
	set outDocCount to outDoc's pageCount()
	repeat with aFile in theFiles
		set aFile to (current application's |NSURL|'s fileURLWithPath:aFile)
		set aDoc to (current application's PDFDocument's alloc()'s initWithURL:aFile)
		set aDocCount to aDoc's pageCount()
		repeat with i from 1 to aDocCount
			set aDocPage to (aDoc's pageAtIndex:(i - 1))
			(outDoc's insertPage:aDocPage atIndex:outDocCount)
			set outDocCount to outDocCount + 1
		end repeat
	end repeat
	set targetFile to (current application's |NSURL|'s fileURLWithPath:targetFile)
	set fileExists to targetFile's checkResourceIsReachableAndReturnError:(missing value)
	if (fileExists as boolean) is true then display dialog "The target file already exists" buttons {"OK"} cancel button 1 default button 1
	outDoc's writeToURL:targetFile
end mergeFiles