Sunday, December 15, 2019

#1 2018-12-29 04:58:31 pm

Nigel Garvey
From:: Warwickshire, England
Registered: 2002-11-20
Posts: 5119

CSV text to AppleScript list

As the thread subject suggests, the handler in this script produces an AppleScript list from CSV text. It does largely the same as an old vanilla one I revised at the beginning of this year, but uses ASObjC and regex and is considerably less fiddly and a bit faster. Assumptions and parameters are detailed in the comments.


(* Assumes that the CSV text follows the RFC 4180 convention:
   Records are delimited by CRLF line breaks (but LFs or CRs are OK too here).
   The last record in the text may or may not be followed by a line break.
   All the records have the same number of fields.
   Fields in the same record are separated by commas (but a different separator can be specified here with an optional parameter).
   The last field in a record is /not/ by a field separator.
   Any field value may be enclosed in double-quotes and /must/ be if its value contains line breaks, separator characters, or double-quote characters.
   Double-quote characters within quoted fields are escaped as pairs of double-quotes.
   Trailing or leading spaces in unquoted fields are included in the field values (but can be trimmed here with the optional 'trimming' parameter).
   Spaces (or anything else!) outside the quotes of quoted fields are not allowed (but situations with spaces outside of quotes can be rescued here with the 'trimming' parameter).
   No other variations are currently supported. *)


on demo()
   set CSVText to (read (choose file of type {"csv"} default location (path to desktop)) as «class utf8»)
   CSVToList(CSVText, {}) -- Same as CSVToList(CSVText, {separator:",", trimming:false})
end demo

(* Return a list of lists from a CSV text.
       CSVText: The CSV text. Can be text, NSString, or NSMutableString.
       implementation: A record with optional 'separator' and 'trimming' properties. 'separator' specifies the field separator (default = ","), while 'trimming' indicates whether or not to trim leading and trailing spaces from field values (default = false).

on CSVToList(CSVText, implementation)
   script o
       use AppleScript version "2.4" -- Yosemite (10.10) or later
       use framework "Foundation"
       property allFields : missing value
       property output : {}
       on CSVToList()
           -- Sort out the field separator and trimming mode.
           set {separator:fieldSeparator, trimming:trimming} to implementation & {separator:",", trimming:false}
           -- Get an NSMutableString version of the CSV text and strip any trailing line breaks from it.
           set |⌘| to current application
           set CSVString to (|⌘|'s class "NSMutableString"'s stringWithString:(CSVText))
           set regexSearch to |⌘|'s NSRegularExpressionSearch
           tell CSVString to replaceOccurrencesOfString:("\\R++\\Z") withString:("") options:(regexSearch) range:({0, its |length|()})
           -- If the very first field's empty, insert an additional field separator at the beginning to make the field visible to regex.
           set testLength to 10
           tell (CSVString's |length|()) to if (it < testLength) then set testLength to it
           if ((CSVString's rangeOfString:(fieldSeparator & "|\\R") options:(regexSearch) range:({0, testLength}))'s location is 0) then tell CSVString to insertString:(fieldSeparator) atIndex:(0)
           -- Get all matches for a regex having capture groups for the field separator, record separator, or text start before each field and for the field itself.
           if (trimming) then
               set fieldPattern to "(" & fieldSeparator & "|\\R|\\A) *+(\"(?:[^\"]|\"\")*+\"|(?:[^ [:cntrl:]" & fieldSeparator & "]| *+(?!" & fieldSeparator & "|\\R|\\Z))*+) *+"
               set fieldPattern to "(" & fieldSeparator & "|\\R|\\A)(\"(?:[^\"]|\"\")*+\"|[^[:cntrl:]" & fieldSeparator & "]*+)"
           end if
           set fieldRegex to (|⌘|'s class "NSRegularExpression"'s regularExpressionWithPattern:(fieldPattern) options:(0) |error|:(missing value))
           set CSVRange to {0, CSVString's |length|()}
           set fieldMatches to fieldRegex's matchesInString:(CSVString) options:(0) range:(CSVRange)
           -- Find out the number of fields per record by counting the matches before the first whose capture group 1 represents a line break.
           set fieldsPerRecord to (count fieldMatches) -- (In case there's only one record.)
           set counter to 0
           repeat with thisMatch in fieldMatches
               set group1Range to (thisMatch's rangeAtIndex:(1))
               if ((CSVString's rangeOfString:("\\R") options:(regexSearch) range:(group1Range)) is group1Range) then
                   set fieldsPerRecord to counter
                   exit repeat
               end if
               set counter to counter + 1
           end repeat
           -- Replace every match with character id 1 and the field value.
           tell fieldRegex to replaceMatchesInString:(CSVString) options:(0) range:(CSVRange) withTemplate:((character id 1) & "$2")
           -- Delete all field-enclosing double-quotes.
           tell CSVString to replaceOccurrencesOfString:("(?<=\\u0001)\"|\"(?=\\u0001)") withString:("") options:(regexSearch) range:({0, its |length|()})
           -- Delete all double-quote-escaping double-quotes.
           tell CSVString to replaceOccurrencesOfString:("\"\"") withString:("\"") options:(0) range:({0, its |length|()})
           -- Get an AS list of the field values as AS texts.
           set allFields to (CSVString's componentsSeparatedByString:(character id 1)) as list
           -- Transfer the values to the output list in lists of a record's worth at a time.
           repeat with i from 2 to (count allFields) by fieldsPerRecord
               set end of my output to my allFields's items i thru (i + fieldsPerRecord - 1)
           end repeat
           return output
       end CSVToList
   end script
   return o's CSVToList()
end CSVToList




Board footer

Powered by FluxBB

RSS (new topics) RSS (active topics)