Here is the new and improved script.
It takes 24 seconds to process 108 database rows by average of 10 columns with a 108 row Line with an average of 10-12 columns for a total of 133 resulting Big List rows, as there as some unique database and some line rows. Less than 50% of the time it took before.
Changes versus the original:
Processes the copy function as a huge range for each of database and line spreadsheets
Fixes two bugs… if unique rows at end of database or end of line spreadsheets they were missed.
Added the millisec timing functions to see what processes take how much time.
Found that the conversion of clipboard (in Excel) to variable in AS takes a long time. Maybe someone else has a resolution of this.
Cannot use the bulk entry of Big List values as each row may have a different number of columns and any blank columns in a huge value function entry over multiple rows causes a problem.
-------------------
--notes: assumes there are no duplicated part number rows in data file or duplicate rows in line file
-- written for Excel 2008 on Snow Leopard
-- depends on three files: Database, Line, and Big List
-- case is ignored here, need to modify if case is significant to part numbers or colors
-- assumes it is ok to sort the Big List for ease of reviewing (and procesing, actually)
-- Database can have unique part numbers (not in Line file) and vice versa
-- database and Line rows can have diferent colors in them, they are combined into the BL
-- there are several options below one can or must set
-- assumes there are no blank rows in database or line spreadsheets
-- The code assumes the database and Line may or may not be sorted by part number already, so it tells Excel to sort
-- if there is a missing color value in the middle of a line or database row, this blank is eliminated when converting to Big List
-- no header row is used here, so do not include header row in the rows to be processed
-- Database and Line cells must be in 'text' format, including the numbers, pre-format Big List to 'Text' format as well
---------- Adjust these to meet your needs
property DoTimingCheck : true -- this turns on timing reports to log, slows script a little (very little)
property skipUniqueLineColors : false --if there is a color value in Line that does not show in database, add or skip
property addNewLinePartNumbers : true -- if there are new Line part numbers (not in database) then add to big list or ignore
property BLFileName : "Big List.xls" --name of the database file
property BLFilePath : "Users:papa:Downloads:" & BLFileName
property BLSheetName : "Big List" --worksheet name
property sortBLColors : true -- this is the value that really counts for sorting, not the database or line
property firstBLCol : "A" --where will big list start (column) (don't need end cell as it is created as we go)
property lastDataCol : "AL" -- adjusted later on
property lastDataRow : 300 -- adjuated later on
property lastLineCol : "AL" -- adjuated later on
property lastLineRow : 300 -- adjuated later on
property rightBLCol : "AL" -- adjuated later on
property lastBLRow : 300 -- this is adjusted later...
property getMillisec : missing value
property myRecord : {}
---------- Adjust these values as well to customize for your use
global currentBLRow -- what row will be copied to
set DataFileName to "Database.xls" --name of the database file
set DataFilePath to "Users:papa:Downloads:" & DataFileName
set DataSheetName to "Database file.txt" --worksheet name
set LineFileName to "Line.xls" --name of the Line info file
set LineFilePath to "Users:papa:Downloads:" & LineFileName
set LineSheetName to "Line file.txt" --worksheet name
set firstDataRow to 1
set firstLineRow to 1
set firstDataCol to "A"
set sortDataCol to "A:A" --the column to sort the worksheet on (part numbers) and to use for comparing
set firstLineCol to "A"
set sortLineCol to "A:A" --the column to sort the worksheet on (part numbers) and to use for comparing
set rangeDataCol to "A:A" --firstDataCol & firstDataRow & ":" & firstDataCol & lastDataRow
set rangeLineCol to "A:A" --firstLineCol & firstLineRow & ":" & firstLineCol & lastLineRow
set startDataRangeCell to firstDataCol & firstDataRow --the top left cell of the database info
set startLineRangeCell to firstLineCol & firstLineRow --the top left cell of the Line info
set endDataRangeCell to lastDataCol & lastDataRow
set endLineRangeCell to lastLineCol & lastLineRow
set FilesNotOpen to false --are the files not loaded already?
set firstBLRow to 1 --where will Big List start (row)
set sortBLCol to "A:A" --column to sort the Big List on (part numbers)
set leftBLCol to "A"
set currentBLRow to firstBLRow
set startBLRangeCell to firstBLCol & firstBLRow --top left in BL range
set endBLRangeCell to rightBLCol & lastBLRow -- adjust as needed, bottom right cell in range
------ timing routines
script timeTools
on firstMillisec()
local overhead
my realgetMillisec() -- gets stuff loaded into mem for later, this first call takes longer time.
set my getMillisec to my realgetMillisec
set _overhead to -(my realgetMillisec()) + 2 * (my realgetMillisec()) -- Nigel Garvey
return _overhead
end firstMillisec
on realgetMillisec()
local res
set res to do shell script "/Library/UnixApps/timetools -ums"
return res
end realgetMillisec
on run
set my getMillisec to my realgetMillisec --firstMillisec, crashes if I use it with firstMillisec...???
end run
end script
-----
if FilesNotOpen is true then -- can modify to check if files open, if not then load
tell application "Microsoft Excel"
open DataFilePath
open LineFilePath
open BLFilePath -- or create the workbook, if does not exist yet
end tell
end if
-----
-----
if DoTimingCheck is true then
tell timeTools to run
set t to timeTools's getMillisec()
log "Start timing, files loaded..."
end if
----
my SortSheet(DataFileName, DataSheetName, startDataRangeCell, endDataRangeCell, sortDataCol, "Data")
my SortSheet(LineFileName, LineSheetName, startLineRangeCell, endLineRangeCell, sortLineCol, "Line")
if DoTimingCheck is true then
set res to ((timeTools's getMillisec()) - t) / 1000
log res & " Worksheets sorted by Excel..."
set lastTime to res
end if
----
set rangeDataCol to firstDataCol & firstDataRow & ":" & firstDataCol & lastDataRow
set rangeLineCol to firstLineCol & firstLineRow & ":" & firstLineCol & lastLineRow
----
set DataCompareCol to GetFirstCol(DataFileName, DataSheetName, rangeDataCol) -- gets the part number column
set LineCompareCol to GetFirstCol(LineFileName, LineSheetName, rangeLineCol) -- gets the part number column
if DoTimingCheck is true then
set res to ((timeTools's getMillisec()) - t) / 1000
log res - lastTime & " Got 'A' column ranges..."
set lastTime to res
end if
----
set LenDataColumnA to (lastDataRow - firstDataRow) + 1 --takes the large number (from global calculation) and brings it into accuracy, faster than 'length of'
set LenLineColumnA to (lastLineRow - firstLineRow) + 1
set lastBLRow to LenDataColumnA + LenLineColumnA -- the max number of rows expected in the Big List, adjusted later
set endBLRangeCell to rightBLCol & lastBLRow
if DoTimingCheck is true then
set res to ((timeTools's getMillisec()) - t) / 1000
log res - lastTime & " Got length 'A' columns..."
set lastTime to res
end if
----
-- these grab the values in the ranges into variables
set theDataRange to GetTheRanges(DataFileName, DataSheetName, startDataRangeCell, lastDataCol, firstDataRow, lastDataRow)
set theLineRange to GetTheRanges(LineFileName, LineSheetName, startLineRangeCell, lastLineCol, firstLineRow, lastLineRow)
if DoTimingCheck is true then
set res to ((timeTools's getMillisec()) - t) / 1000
log res - lastTime & " Move both ranges into variables"
set lastTime to res
end if
----
set BigList to DoComparisonMerge(theDataRange, DataCompareCol, theLineRange, LineCompareCol)
-- primary handler that does the bulk of the work or farms out to subroutines
if DoTimingCheck is true then
set res to ((timeTools's getMillisec()) - t) / 1000
log res - lastTime & " Done parsing data from 2 lists to one Big List..."
set lastTime to res
end if
----
ProcessResult(BigList) -- sends the result to Excel by row as row length varies, each row sorted by color
if DoTimingCheck is true then
set res to ((timeTools's getMillisec()) - t) / 1000
log res - lastTime & " Passing Big List to spreadsheet completed..."
set lastTime to res
end if
----
SortSheet(BLFileName, BLSheetName, startBLRangeCell, endBLRangeCell, sortBLCol, "BL") -- sorts BL by pn
if DoTimingCheck is true then
set res to ((timeTools's getMillisec()) - t) / 1000
log res - lastTime & " Big List worksheet sorted..."
end if
----
-- ========== this sorts a worksheet into part number ascending order
on SortSheet(XLFileName, XLSheetName, startRangeCell, endRangeCell, XLSortCol, TypeFile)
tell application "Microsoft Excel"
tell workbook XLFileName
tell worksheet XLSheetName
set UsedRange to used range
set LastCell to last cell of used range
set theLastRow to first row index of LastCell
set theRightCol to ASCII character ((ASCII number ("A")) - 1 + (first column index of LastCell))
if TypeFile is "Data" then
set lastDataCol to theRightCol
set lastDataRow to theLastRow
else if TypeFile is "Line" then
set lastLineCol to theRightCol
set lastLineRow to theLastRow
else
set rightBLCol to theRightCol
set lastBLRow to theLastRow
end if
sort UsedRange key1 (range XLSortCol) orientation sort columns order1 sort ascending without header
end tell
end tell
end tell
end SortSheet
-- ========== get the first colum (part numbers) to use for comparing
on GetFirstCol(XLFileName, XLSheetName, XLSortCol)
set LineResult1 to {}
tell application "Microsoft Excel"
tell workbook XLFileName
tell worksheet XLSheetName
set LineResult to the value of range XLSortCol as list --get whole range at once into variable (is a list of 1 item lists)
end tell --converted from a copy to clipboard command as took 5 seconds
end tell
end tell
repeat with counter1 from 1 to count of LineResult
set end of LineResult1 to item 1 of (item counter1 of LineResult) -- converts to a list of items
end repeat
return LineResult1
end GetFirstCol
-- ========== compare the line versus database info and use this to get data into rows for big list
on DoComparisonMerge(theDataRange, DataCompareCol, theLineRange, LineCompareCol)
-- now adds rows faster but requires a sort by Excel at the end due to unique Line rows that may come in at randowm
set ResultingList to {}
set lengthDataColumn to length of DataCompareCol
repeat with DataCounter from 1 to lengthDataColumn -- Data A col contents to check vs Line A col
set DataItem to item DataCounter of my DataCompareCol --take Data pn from left column
if DataItem ≠"" then
if LineCompareCol contains DataItem then -- check shows there is a match, so process with Line data
set lengthLineColumn to length of LineCompareCol
set IncludeDataRow to true
set IncludeLineRow to true
set theResult to ProcessRows(DataItem, lengthLineColumn, DataCounter, IncludeDataRow, IncludeLineRow, theDataRange, theLineRange, DataCompareCol, LineCompareCol)
else -- means there is not a match, so save unique data row
set IncludeDataRow to true
set IncludeLineRow to false
set theResult to ProcessRows(DataItem, lengthLineColumn, DataCounter, IncludeDataRow, IncludeLineRow, theDataRange, theLineRange, DataCompareCol, LineCompareCol)
end if
end if
set the end of ResultingList to theResult
end repeat -- now do the unique Line rows left, if wanted
if addNewLinePartNumbers is true then -- if the user wants to include unique line part numbers then check to see if there are any
set lengthLineColumn to length of LineCompareCol
if lengthLineColumn > 0 then -- means there are some unique line part numbers
repeat with LineCounter from 1 to lengthLineColumn
set LineItem to item LineCounter of LineCompareCol
if LineItem ≠"" then
set IncludeDataRow to false
set IncludeLineRow to true
set theResult to ProcessRows(LineItem, lengthLineColumn, DataCounter, IncludeDataRow, IncludeLineRow, theDataRange, theLineRange, DataCompareCol, LineCompareCol)
set the end of ResultingList to theResult
end if
end repeat
end if
end if
return ResultingList --the Big List precursor
end DoComparisonMerge
-- ========== eliminate blanks from a row
on DropBlanks(RowData)
set RowResult to {}
repeat with counter1 from 1 to length of RowData
if item counter1 of RowData ≠"" then
set end of RowResult to item counter1 of RowData as string
end if
end repeat
return RowResult -- this Data row has no blanks, including no blanks for status (NEW), which are added later
end DropBlanks
-- ========== compress line row to only cells with color data and associated status
on DoCompresLineRow(RowLine)
set RowResult to {}
set end of RowResult to item 1 of RowLine -- the part number
repeat with counter1 from 2 to length of RowLine by 2
if item counter1 of RowLine ≠"" then
if counter1 + 1 > length of RowLine then
set LineColorStat to "" -- covers case if the Line row filled cells = length but last cell is color, not status as it was blank
else
set LineColorStat to item (counter1 + 1) of RowLine
end if
set end of RowResult to item counter1 of RowLine as string
set end of RowResult to LineColorStat
end if -- cannot do an exit repeat (after 2 or more blanks) as unsure if there is allowed to be any blankcolor cells in the midst of a row
end repeat
return RowResult -- no blank cells for colors, yet there are for status
end DoCompresLineRow
-- ========== a sort routine based on work from someone else
on SwapSort(tempList) -- by default ignores case, change if you want to use lower case letters in part numbers
set lengthList to length of tempList
if lengthList < 2 then return tempList
set swapDone to true
repeat while swapDone
set swapDone to false
repeat with counter1 from 1 to lengthList - 1
if item counter1 of tempList > item (counter1 + 1) of tempList then
set tempSwapItem to item counter1 of tempList
set item counter1 of tempList to item (counter1 + 1) of tempList
set item (counter1 + 1) of tempList to tempSwapItem
set swapDone to true
end if
end repeat
set lengthList to lengthList - 1
end repeat
return tempList
end SwapSort
-- ========== a sort routine based on work from someone else that does the color/status pair at once
on SwapSortPairs(tempList) -- by default ignores case, change if you want to use lower case letters in part numbers
set lengthList to length of tempList
if lengthList < 3 then return tempList
if lengthList mod 2 = 1 then
set lengthList to lengthList + 1
set the end of tempList to ""
end if
set swapDone to true
repeat while swapDone
set swapDone to false
repeat with counter1 from 1 to (lengthList - 2) by 2
if item counter1 of tempList > item (counter1 + 2) of tempList then
set tempSwapColorItem to item counter1 of tempList
set tempSwapStatusItem to item (counter1 + 1) of tempList
set item counter1 of tempList to item (counter1 + 2) of tempList
set item (counter1 + 1) of tempList to item (counter1 + 3) of tempList
set item (counter1 + 2) of tempList to tempSwapColorItem
set item (counter1 + 3) of tempList to tempSwapStatusItem
set swapDone to true
end if
end repeat
set lengthList to lengthList - 2
end repeat
return tempList
end SwapSortPairs
-- ========== grab the ranges of cells in one copy action to cut down time
on GetTheRanges(theFileName, theSheetName, startRangeCell, theLastCol, firstRow, lastRow) -- gets whole Data range
set theRangeList to {}
tell application "Microsoft Excel"
tell workbook theFileName
tell worksheet theSheetName
set theRangeList to the value of range (startRangeCell & ":" & theLastCol & lastRow) as list
--gets rid of the copy to clipboard activity, returns a list of lists, each sub-list is a row, commas already in place. perfect.
return theRangeList
end tell
end tell
end tell
end GetTheRanges
-- ========== process the selected row for a match, if data get line match
on ProcessRows(RowItem, lengthLineColumn, DataCounter, DataRowBoolean, LineRowBoolean, theDataRange, theLineRange, DataCompareCol, LineCompareCol) --booleans signify row is included in processing
if DataRowBoolean is true then -- if this includes processing a data row...
if item 1 of (item DataCounter of my theDataRange) = RowItem then
set theDataRow to item DataCounter of my theDataRange -- get the full Data row contents
end if
set theDataRow to DropBlanks(theDataRow) -- remove the blanks
set thePN to item 1 of theDataRow -- the part number
set colorList to the rest of theDataRow -- the color cells remaining
set theDataRow to SwapSort(colorList) -- color cells now in order
set beginning of theDataRow to thePN -- add part number back in
set theDataRow to InsertStat(theDataRow)
else
set theDataRow to "" -- means no data row was included in the processing
end if
if LineRowBoolean is true then -- if includes the processing of a line row
set theLineRowNumber to GetMatchRow(RowItem, lengthLineColumn, LineCompareCol) -- find the Line row
set theLineRow to item theLineRowNumber of my theLineRange -- get the row's list
set theLineRow to DoCompresLineRow(theLineRow) -- eliminate any trailing blank cells
set thePN to item 1 of theLineRow
set colorList to rest of theLineRow
set theLineRow to SwapSortPairs(colorList)
set beginning of theLineRow to thePN
else
set theLineRow to "" -- means no line row was processed
end if
set theResultRow to MergeRows(theDataRow, theLineRow)
if sortBLColors is true then --sort row's colors, keeping associated status fields with them
set thePN to item 1 of theResultRow
set colorList to rest of theResultRow
set theResultRow to SwapSortPairs(colorList)
set beginning of theResultRow to thePN
end if
if DataRowBoolean is true then -- if this includes processing a data row...
set item DataCounter of my theDataRange to ""
set item DataCounter of DataCompareCol to ""
end if
if LineRowBoolean is true then -- if includes the processing of a line row
set item theLineRowNumber of my theLineRange to ""
set item theLineRowNumber of LineCompareCol to ""
end if
return theResultRow
end ProcessRows
-- ========== this inserts status cells into database records that were not found in line file
on InsertStat(tempList)
set lengthList to the length of tempList
if lengthList < 3 then return tempList
set tempList1 to {}
set end of tempList1 to item 1 of tempList as string
set end of tempList1 to item 2 of tempList as string
repeat with counter1 from 3 to lengthList
set end of tempList1 to ""
set end of tempList1 to item counter1 of tempList as string
end repeat
set end of tempList1 to ""
return tempList1
end InsertStat
-- ========== get the matching line row identity, includes Lion in the Desert trick
on GetMatchRow(RowItem, lengthLineColumn, LineCompareCol) --
set startPos to 1
set endPos to lengthLineColumn -- although this does not match the row numbers from Excel, need these as offsets to the start of the list
repeat
repeat with LineRowPointer from startPos to endPos
if RowItem = item LineRowPointer of LineCompareCol then return LineRowPointer
if RowItem = item endPos of LineCompareCol then return endPos
if RowItem ≤ item (((endPos - startPos) div 2) + startPos) of LineCompareCol then
set endPos to ((endPos - startPos) div 2) + 1 + startPos
set startPos to startPos + 1 -- can increment as we just checked the startPos for a match
exit repeat
else
set startPos to ((endPos - startPos) div 2) + 1 + startPos
exit repeat
end if
end repeat
end repeat
end GetMatchRow
-- ========== merge the two row lists into one that is sorted, if wanted, by color
on MergeRows(theDataRow, theLineRow)
set newBLRowList to {}
if theLineRow ≠"" then
set end of newBLRowList to item 1 of theLineRow -- place part number in the new row's list as item 1
else
set end of newBLRowList to item 1 of theDataRow -- place part number in the new row's list as item 1
end if
if theDataRow ≠"" then
set dataColorListing to the rest of theDataRow -- only the data colors are in this list
set lengthDataColorListing to length of dataColorListing
else
set dataColorListing to {} -- no data row
end if
if theLineRow ≠"" then
set lineColorListing to the rest of theLineRow -- only the line colors (& their status) are in this list
set lengthLineColorListing to length of lineColorListing -- processes unique color listings from database
else
set lineColorListing to {} -- no line row
end if
if theDataRow ≠"" then
repeat with dataColorPointer from 1 to lengthDataColorListing by 2 --by 2 because it has blank status cell values in list
if lineColorListing does not contain item dataColorPointer of dataColorListing or theLineRow is "" then -- color unique to database
set the end of newBLRowList to item dataColorPointer of dataColorListing
set the end of newBLRowList to "" -- no status associated with database colors, so add a blank cell
set item dataColorPointer of dataColorListing to "" -- this color is done so make blank
end if
end repeat
end if
if theLineRow ≠"" then
repeat with lineColorPointer from 1 to lengthLineColorListing by 2 -- processes unique color listings from line
if dataColorListing does not contain item lineColorPointer of lineColorListing or theDataRow is "" then --this is a color unique to line
if skipUniqueLineColors is false then -- this means add the unique line color items to the new row
set the end of newBLRowList to item lineColorPointer of lineColorListing
if lineColorPointer + 1 > lengthLineColorListing then
set LineColorStat to "" -- covers case if the Line row filled cells = length but last cell is color, not status as it was blank
else
set LineColorStat to item (lineColorPointer + 1) of lineColorListing
end if
set the end of newBLRowList to LineColorStat -- add status associated with line color, blank or "NEW"
end if
set item lineColorPointer of lineColorListing to "" -- this color is done so make blank
if lineColorPointer + 1 ≤ lengthLineColorListing then
set item (lineColorPointer + 1) of lineColorListing to "" -- this status is done so make blank
end if
else -- what remians is the Line colors that are matches of database row colors, so copy line ones with status
set the end of newBLRowList to item lineColorPointer of lineColorListing
if lineColorPointer + 1 > lengthLineColorListing then
set LineColorStat to "" -- covers case if the Line row filled cells = length but last cell is color, not status as it was blank
else
set LineColorStat to item (lineColorPointer + 1) of lineColorListing
end if
set the end of newBLRowList to LineColorStat -- add status associated with line color, blank or "NEW"
set item lineColorPointer of lineColorListing to "" -- this color is done so make blank
if lineColorPointer + 1 ≤ lengthLineColorListing then
set item (lineColorPointer + 1) of lineColorListing to "" -- this status is done so make blank
end if
end if
end repeat
end if
return newBLRowList
end MergeRows
-- ========== takes result and sends to Excel by item (which is a list in itself)
on ProcessResult(BigList)
set LengthBL to length of BigList
set firstRangeAddrCol to firstBLCol
repeat with counter1 from 1 to LengthBL
set BLItem to item counter1 of BigList
set LengthBLItem to the length of BLItem
set secondRangeAddrCol to ConvertToASCII(LengthBLItem)
set destRange to firstRangeAddrCol & currentBLRow & ":" & secondRangeAddrCol & currentBLRow
InsertValueToRow(destRange, BLItem)
end repeat
end ProcessResult
-- ========== paste into BL
on InsertValueToRow(destRange, tempList)
tell application "Microsoft Excel"
tell workbook BLFileName
tell worksheet BLSheetName
set destRange to range (destRange)
set value of destRange to tempList as list
set currentBLRow to currentBLRow + 1
end tell
end tell
end tell
end InsertValueToRow
-- ========== convert numeric to alpha for column identifier
on ConvertToASCII(numericValue)
set lowerLetterNum to numericValue mod 26
if lowerLetterNum > 0 then
set lowerLetterNum to lowerLetterNum - 1
else
set lowerLetterNum to 26
end if
set lowerLetterAlpha to ASCII character ((ASCII number ("A")) + lowerLetterNum)
set upperLetterAlpha to ""
if numericValue > 26 then
set upperLetterNum to numericValue div 26
set upperLetterNum to upperLetterNum - 1
set upperLetterAlpha to ASCII character ((ASCII number ("A")) + upperLetterNum)
end if
set AlphaCol to upperLetterAlpha & lowerLetterAlpha
return AlphaCol
end ConvertToASCII