Data sort and concatenate across 2 excel files using 4 search lists

Morning Folks,

this script searches for data from four different category lists {Air, Raw, Water, Soil} in one excel sheet, concatenates the result with matching uncertainty data from another sheet and then copies it all into a new text file for later review. It can work through a user-specified folder of files, although this takes about half an hour to process 14 files each containing 2 sheets of 600 rows. Next I’ll be adding a 3rd sheet containing climate data and would like to call the category lists from a file (for varying scenarios) but for now i am moderately happy with the outcome.

My programming background has so far included a lot of Labview and a bit of mathematical python - this is my first Applescript. I would be really happy to get some feedback

my main goal for using applescript is to speed up pre- and post-processing of environmental assessment data and aerosol/biomass combustion data while still keeping the data in an excel friendly format (important for exchange with colleagues)

cheers

Nic

PS - if you are an enviro scientist, yes the search lists are incomplete at present.






set theFolder to choose folder -- nifty line; asks user to choose folder to work from

tell application "Finder"
	set fileList to every file of theFolder
	
	repeat with filename in fileList -- step through all files in user defined folder
		
		open filename -- note that the file is opened by Finder rather than Excel. this is a quirk I haven't been able to get around.
		
		tell application "Microsoft Excel"
			activate
			activate object sheet "All"
			
			
			tell active sheet
				clear range formats used range
			end tell -- clears formatting of active sheet. reduces formatting issues when concatenating further in the script.
			
			
			set samplerange to range ("A11:L11")
			set info_range to range ("A2:b8")
			select info_range
			
			copy range info_range -- copy and paste data characteristics from the file.
			
			
			tell application "TextEdit"
				activate
				set date_ to (current date) as string
				tell application "System Events"
					tell application "TextEdit" to activate
					tell process "TextEdit" to keystroke date_
					keystroke return
					keystroke return
					delay 1
				end tell --Insert date and time into TextEdit 
				
				tell application "System Events"
					tell process "TextEdit" -- this section pastes the information block at the top of the file
						keystroke "v" using {command down, shift down, option down}
						keystroke return
						keystroke return
					end tell
				end tell
			end tell
			
			tell application "Microsoft Excel"
				activate
				activate object sheet "All"
				copy range range ("A10:l10") -- data_header
				tell application "TextEdit" to activate
				tell application "System Events"
					tell process "TextEdit"
						keystroke "v" using {command down, shift down, option down}
						keystroke tab
						keystroke tab
					end tell
				end tell
			end tell
			
			tell application "Microsoft Excel"
				activate
				activate object sheet "Uncertainty Analysis"
				
				tell active sheet
					clear range formats used range
				end tell -- clears formatting of active sheet.
				
				copy range range ("a1:j1") -- stat_header
				tell application "TextEdit" to activate
				tell application "System Events"
					tell process "TextEdit"
						keystroke "v" using {command down, shift down, option down}
						keystroke return
						keystroke return
					end tell
				end tell
			end tell
			
			tell application "Microsoft Excel"
				activate
				
				-- the following are lists of pollutant species the program should search for. Each list is specific to a particular pollutant class (e.g. Air, Soil...)
				
				set Air to {"Arsenic", "Cadmium", "Chromium", "Carbon Monoxide", "Carbon Dioxide", "Dioxin", "Formaldehyde", "Lead", "Methane, Bio", "Methane, Fossil", "Mercury", "Nitrogen", "Nickel", "NMVOC", "Nitrate", "Pah", "Partic", "Sulfate"}
				set Raw to {"Aluminium", "Chromi"}
				set Water to {"Uranium"}
				set Soil to {"Fluoride", "Chloride", "Chromium"}
				
				set flow_vars to {Air, Raw, Water, Soil}
				
				repeat with current_item in flow_vars
					
					if text of current_item is equal to Raw then
						set filter_cat to "Raw"
					else if text of current_item is equal to Air then
						set filter_cat to "Air"
					else if text of current_item is equal to Water then
						set filter_cat to "Water"
					else if text of current_item is equal to Soil then
						set filter_cat to "Soil"
					end if
					
					-- bug checks
					
					print "*** the filter_Category is '" & filter_cat & "'"
					
					print "*** the current flow variables are " & current_item
					
					--
					
					activate object worksheet "All"
					autofilter range range "A10:L669" field 3 criteria1 filter_cat
					activate object worksheet "Uncertainty Analysis"
					autofilter range range "A1:J669" field 2 criteria1 filter_cat
					
					repeat with flow_item in current_item
						print "*** current search item is " & flow_item
						
						set continueSearch to true
						repeat while continueSearch is true
							set activecell to "B11"
							try
								
								set foundRange1 to (find range "b:b" of worksheet "All" what flow_item)
								print "*** found range in sheet ALL for '" & flow_item & "'"
								set foundValue to value of foundRange1
								print "### range is '" & foundValue & "'"
								
								copy range entire row of foundRange1
								
								tell application "TextEdit" to activate
								tell application "System Events"
									tell process "TextEdit"
										keystroke "v" using {command down, shift down, option down}
										keystroke tab
										keystroke tab
									end tell
								end tell
								
								activate object worksheet "Uncertainty Analysis"
								set activecell to "a1"
								set foundRange2 to (find range "a:a" of worksheet "Uncertainty Analysis" what foundValue)
								print "*** found range in sheet UNCERTAINTY for '" & foundValue & "'"
								
								
								copy range entire row of foundRange2
								
								tell application "TextEdit" to activate
								tell application "System Events"
									tell process "TextEdit"
										keystroke "v" using {command down, shift down, option down}
										keystroke return
									end tell
								end tell
								
								delete entire row of foundRange1
								delete entire row of foundRange2
								
							on error
								print "*** " & flow_item & " not found!"
								set continueSearch to false
								
							end try
							
						end repeat
					end repeat
				end repeat
			end tell
			
			
			
			--
		end tell
		
		
		
		
		
		tell application "Finder" to set the clipboard to name of (info for (filename as alias))
		
		tell application "TextEdit"
			activate
			
			-- new bit	- replaces mu with u to save issues with formatting later
			
			tell application "System Events"
				tell application "TextEdit" to activate
				tell process "TextEdit" to keystroke "a" using (command down)
				keystroke "f" using (command down)
				keystroke "µ"
				keystroke tab
				keystroke "u"
				keystroke tab
				keystroke tab
				keystroke tab
				keystroke tab
				keystroke space
				
			end tell
			
			
			
			
			
			
			
			
			save document "Untitled" in file {"Macintosh HD:Sima_data:processing_tests:" & (the clipboard) & ".txt"}
			
		end tell
		
		
		tell application "TextEdit"
			
			quit without saving
		end tell
		
		tell application "Microsoft Excel"
			
			close active workbook without saving
		end tell
		
		
		
		
	end repeat
end tell

--useful bit of routine

--tell application "TextEdit"
--	tell text of document of front window
--		repeat with P from (count of paragraphs) to 1 by -1
--			if characters of paragraph P is {linefeed} then delete paragraph P
--		end repeat
--	end tell
--end tell