Spring Cleaning Duplicates Management

Officially Spring Cleaning does not work under OSX 10.7, but I’m still using it.

Here are three scripts I use to manage duplicate files.

This script makes a list of files found by the duplicate file finder of Spring Cleaning.


set log_it to load script file ((path to shared documents as string) & "AppleScript - General Purpose:Tracking Utilities:Log It.scpt")
set log_file_path of log_it to (path to desktop as string) & "Duplicates Report.txt"

-- Run this after Spring Cleaning's Duplicates Finder is finished
-- This creates a Duplicates Report directly from Spring Cleaning

try
	tell application "Spring Cleaning"
		tell search tool "Duplicates Finder"
			set file_count to 1
			set status_count to 1
			with timeout of 10 * minutes seconds
				set item_list to result item of search results
			end timeout
			repeat with current_result in item_list
				if class of current_result = file result item then
					set file_name to (file of current_result as string)
					tell log_it to add_to_log(file_name)
					if status_count ≥ 100 then
						set status_count to 0
						tell application "System Events" to say file_count
					end if
					set status_count to status_count + 1
					set file_count to file_count + 1
				else if class of current_result = category result item then
					tell log_it to add_to_log("New Category")
				else
					tell log_it to add_to_log("**** Unexpected Class: " & (class of current_result) as string)
				end if
			end repeat
		end tell
	end tell
	say "Finished"
	return
	error -2706
end try

This script will list the number of duplicates in a pair or set of folders using the output of the first script.


-- This reads the Duplicates Report and counts the number of files for every pair or n-tuple of partent folders.

set duplicates_file_path_name to (choose file with prompt "Pick the Duplicates Report to use") as string
my add_to_log(duplicates_file_path_name & " " & (current date) & return)
set announcement_frequency to 100
set duplicates_file_id to open for access file duplicates_file_path_name
set tuple_level to 1 + (choose from list {0, 1, 2, 3, 4} with title "Tuple Level" with prompt "Select the level above the set folder to collect") as integer
set result_set to {}
set name_tuple to missing value
try
	repeat
		set temp_text to read (duplicates_file_id as text) until return
		if (count of characters in temp_text) > 0 then
			set file_path_name to text 1 thru -2 of temp_text
			if file_path_name = "New Category" then
				my increment_tuple_count(name_tuple, result_set)
				set name_tuple to missing value
			else if file_path_name begins with "**** Unexpected Class: " then
				set one_file_saved to false
				set prior_file_path_name to missing value
			else
				set AppleScript's text item delimiters to ":"
				set location_name to (text items 1 thru -tuple_level of file_path_name) as string
				set AppleScript's text item delimiters to ""
				set name_tuple to my add_to_tuple(location_name, name_tuple)
			end if
		end if
	end repeat
on error number -39 -- eof
end try
my increment_tuple_count(name_tuple, result_set)
close access duplicates_file_id

set list_count to count of items of result_set
log (list_count)
my add_to_log((list_count as string) & " Folder sets found" & return)
if list_count > 0 then
	set low_limit to 1
	set high_limit to list_count - 1
	repeat while low_limit > 0
		log (low_limit as string) & "  " & high_limit
		set new_low_limit to 0
		set new_high_limit to 0
		repeat with current_index from low_limit to high_limit
			set temp_item to item current_index of result_set
			if item 1 of name_tuple of temp_item > item 1 of name_tuple of item (current_index + 1) of result_set then
				set item current_index of result_set to item (current_index + 1) of result_set
				set item (current_index + 1) of result_set to temp_item
				if new_low_limit = 0 then
					if current_index = 1 then
						set new_low_limit to 1
					else
						set new_low_limit to current_index - 1
					end if
				end if
				set new_high_limit to current_index - 1
			end if
		end repeat
		set low_limit to new_low_limit
		set high_limit to new_high_limit
	end repeat
	repeat with curr_record in result_set
		my add_to_log((tuple_count of curr_record) as string)
		repeat with folder_name in name_tuple of curr_record
			tell application "Finder" to set folder_count to count of files in folder folder_name
			my add_to_log((folder_count as string) & (ASCII character 9) & folder_name)
		end repeat
		my add_to_log("")
	end repeat
else
	my add_to_log("No dups found")
end if

say "Finished"
return

on add_to_tuple(location_name, name_tuple)
	if name_tuple is missing value then
		set name_tuple to {location_name}
	else
		set list_count to count of items of name_tuple
		set end of name_tuple to location_name
		repeat with list_index from 1 to list_count
			if item (list_count + 1 - list_index) of name_tuple > location_name then
				set item (list_count + 2 - list_index) of name_tuple to item (list_count + 1 - list_index) of name_tuple
				if list_index = list_count then
					set item 1 of name_tuple to location_name
				end if
			else
				set item (list_count + 2 - list_index) of name_tuple to location_name
				exit repeat
			end if
		end repeat
	end if
	return name_tuple
end add_to_tuple

on increment_tuple_count(name_tuple, result_set)
	if name_tuple is not missing value then
		set existing_record to false
		repeat with curr_record in result_set
			if name_tuple of curr_record = name_tuple then
				set existing_record to true
				set tuple_count of curr_record to (tuple_count of curr_record) + 1
				exit repeat
			end if
		end repeat
		if not existing_record then
			set end of result_set to {name_tuple:name_tuple, tuple_count:1}
		end if
	end if
end increment_tuple_count

on add_to_log(the_message)
	set file_id to open for access file ((path to desktop as string) & "Duplicate Folder List.txt") with write permission
	write the_message & return to file_id starting at eof
	close access file_id
end add_to_log

This script will delete duplicates using the output of the first script.


-- This reads the Duplicates Report and deletes selected files.

-- Copyright Richard Cohen  (richard.c.cohen@verizon.net)
-- You may modify and use this as you wish so long as the copyright notice is included
-- You may distribute this or derivative works to others so long as you do not charge for this software.

--  It only deletes files contained in the "duplicate folder".
--  It always leaves at least one file from each category.

--  If you set the "duplicate folder" to a root level, there will be little protection, but the last file in a category will be kept.

-- I use this with Spring Cleaning to compare files in two sets of folders and remove duplicates from the older set.
-- I then merge the pruned old folder with the new one either manually or with other tools.

-- WARNING: I use this regularly, but it has not been exhaustively tested.  USE AT YOUR OWN RISK.  I MAKE NO WARRANTIES ABOUT IT'S OPERATION.

set duplicates_file_path_name to (choose file with prompt "Pick the Duplicates Report to use") as string
my add_to_log(duplicates_file_path_name & " " & (current date))
my add_to_log("")
set announcement_frequency to 100
repeat
	set deletion_count to 0
	set announcement_count to 0
	tell me to activate
	set duplicate_folder_name to (choose folder with prompt "Pick the duplicate folder where deletions are permitted") as string
	my add_to_log(return & "Deleting from: " & duplicate_folder_name)
	set duplicates_file_id to open for access file duplicates_file_path_name
	try
		repeat
			set temp_text to read (duplicates_file_id as text) until return
			if (count of characters in temp_text) > 0 then
				set file_path_name to text 1 thru -2 of temp_text
				if file_path_name = "New Category" then
					set one_file_saved to false
					set prior_file_path_name to missing value
				else if file_path_name begins with "**** Unexpected Class: " then
				else
					with timeout of 5 * minutes seconds
						tell application "Finder" to if exists file file_path_name then -- allows a second pass after a crash
							if prior_file_path_name is not missing value then
								tell application "Finder" to delete file prior_file_path_name
								my add_to_log(prior_file_path_name)
								set prior_file_path_name to missing value
								if announcement_count ≥ announcement_frequency then
									set announcement_count to 0
									say deletion_count as string
								end if
								set deletion_count to deletion_count + 1
								set announcement_count to announcement_count + 1
							end if
							if file_path_name begins with duplicate_folder_name then
								if one_file_saved then
									tell application "Finder" to delete file file_path_name
									my add_to_log(file_path_name)
									if announcement_count ≥ announcement_frequency then
										set announcement_count to 0
										say deletion_count as string
									end if
									set deletion_count to deletion_count + 1
									set announcement_count to announcement_count + 1
								else
									set prior_file_path_name to file_path_name
								end if
							else
								set one_file_saved to true
							end if
						end if
					end timeout
				end if
			end if
		end repeat
	on error number -39 -- eof
	end try
	close access duplicates_file_id
	my add_to_log("Total Deleted: " & deletion_count)
	say "Deleted " & deletion_count
end repeat
say "Finished"
return

on add_to_log(the_message)
	set file_id to open for access file ((path to desktop as string) & "Duplicate Deletion Log") with write permission
	write the_message & return to file_id starting at eof
	close access file_id
end add_to_log