Here are 3.5 versions to compare performances.
All of them apply to a set of datas matching what was described in the original message.
Using script object is an alternate way to take benefit of references.
old fashioned Applescript
use AppleScript version "2.4"
use framework "Foundation"
use scripting additions
script o
property myStrings : {}
property fullResults : {}
property partialResults : {}
end script
-- Build a long string
set mystring to ""
repeat with i from 1 to 30
set mystring to mystring & "John is [[a boy" & i & "]] and Mary is [[a girl" & i & "]], they are [[brother" & i & "]] and [[sister" & i & "]] "
end repeat
-- replicate it 500 times
set myList to {}
repeat 500 times
set end of myList to mystring
end repeat
set mySource to my recolle(myList, linefeed)
-- Now the data to scan is ready
tell me to say "Go"
set startDate to current application's NSDate's |date|()
set o's fullResults to {}
set o's myStrings to paragraphs of mySource
set text item delimiters to {"[[", "]]"}
repeat with anItem in o's myStrings
set o's partialResults to {}
set textlist to text items of anItem
repeat with i from 2 to count textlist by 2
set end of o's partialResults to item i of textlist
end repeat
set end of o's fullResults to o's partialResults
end repeat
set text item delimiters to {""}
log o's fullResults
"That took " & (-(startDate's timeIntervalSinceNow()) as real) & " seconds."
--> "That took 0,874303936958 seconds."
#=====
on recolle(l, d)
local oTIDs, t
set {oTIDs, AppleScript's text item delimiters} to {AppleScript's text item delimiters, d}
set t to l as text
set AppleScript's text item delimiters to oTIDs
return t
end recolle
#=====
my attempts to use Regex
use AppleScript version "2.4"
use framework "Foundation"
use scripting additions
script o
property myStrings : {}
property fullResults : {}
property partialResults : {}
end script
-- Build a long string
set mystring to ""
repeat with i from 1 to 30
set mystring to mystring & "John is [[a boy" & i & "]] and Mary is [[a girl" & i & "]], they are [[brother" & i & "]] and [[sister" & i & "]] "
end repeat
-- replicate it 500 times
set myList to {}
repeat 500 times
set end of myList to mystring
end repeat
set mySource to my recolle(myList, linefeed)
-- Now the data to scan is ready
tell me to say "Go"
set startDate to current application's NSDate's |date|()
set o's fullResults to {}
set o's myStrings to paragraphs of mySource
repeat with anItem in o's myStrings
set aList to (its findPattern:"\\[\\[.+?\\]\\]" inString:(anItem as string))
my decoupe(my supprime(my recolle(aList, linefeed), {"[[", "]]"}), linefeed)
set end of o's fullResults to result
end repeat
log o's fullResults
"That took " & (-(startDate's timeIntervalSinceNow()) as real) & " seconds."
-- "That took 10,530691981316 seconds." if I disable the call to my decoupe.
-- "That took 10,77609705925 seconds." if I enable the call to my decoupe.
#=====
on findPattern:thePattern inString:theString
set theNSString to current application's NSString's stringWithString:theString
set theOptions to ((current application's NSRegularExpressionDotMatchesLineSeparators) as integer) + ((current application's NSRegularExpressionAnchorsMatchLines) as integer)
set theRegEx to current application's NSRegularExpression's regularExpressionWithPattern:thePattern options:theOptions |error|:(missing value)
set theFinds to theRegEx's matchesInString:theNSString options:0 range:{location:0, |length|:theNSString's |length|()}
set theResult to {} -- we will add to this
repeat with i from 1 to count of theFinds
set theRange to (item i of theFinds)'s range()
set end of theResult to (theNSString's substringWithRange:theRange) as string
end repeat
return theResult
end findPattern:inString:
#=====
on decoupe(t, d)
local oTIDs, l
set {oTIDs, AppleScript's text item delimiters} to {AppleScript's text item delimiters, d}
set l to text items of t
set AppleScript's text item delimiters to oTIDs
return l
end decoupe
#=====
on recolle(l, d)
local oTIDs, t
set {oTIDs, AppleScript's text item delimiters} to {AppleScript's text item delimiters, d}
set t to l as text
set AppleScript's text item delimiters to oTIDs
return t
end recolle
#=====
(*
replaces every occurences of d1 by d2 in the text t
*)
on remplace(t, d1, d2)
local oTIDs, l
set {oTIDs, AppleScript's text item delimiters} to {AppleScript's text item delimiters, d1}
set l to text items of t
set AppleScript's text item delimiters to d2
set t to l as text
set AppleScript's text item delimiters to oTIDs
return t
end remplace
#=====
(*
removes every occurences of d in text t
*)
on supprime(t, d)
local oTIDs, l
set {oTIDs, AppleScript's text item delimiters} to {AppleScript's text item delimiters, d}
set l to text items of t
set AppleScript's text item delimiters to ""
set t to l as text
set AppleScript's text item delimiters to oTIDs
return t
end supprime
#=====
stefanK’s version enhanced
use AppleScript version "2.4"
use framework "Foundation"
use scripting additions
script o
property myStrings : {}
property fullResults : {}
property partialResults : {}
end script
-- Build a long string
set mystring to ""
repeat with i from 1 to 30
set mystring to mystring & "John is [[a boy" & i & "]] and Mary is [[a girl" & i & "]], they are [[brother" & i & "]] and [[sister" & i & "]] "
end repeat
-- replicate it 500 times
set myList to {}
repeat 500 times
set end of myList to mystring
end repeat
set mySource to my recolle(myList, linefeed)
-- Now the data to scan is ready
tell me to say "Go"
set startDate to current application's NSDate's |date|()
set o's fullResults to {}
set o's myStrings to paragraphs of mySource
-- two constants defined only once
set pattern to "\\[{2}([^]]+)]{2}"
set regex to (current application's NSRegularExpression's regularExpressionWithPattern:pattern options:0 |error|:(missing value))
repeat with theString in o's myStrings
set cocoaString to (current application's NSString's stringWithString:theString)
set matches to (regex's matchesInString:theString options:0 range:{location:0, |length|:(count theString)})
set o's partialResults to {}
repeat with aMatch in matches
set end of o's partialResults to (cocoaString's substringWithRange:(aMatch's rangeAtIndex:1)) as text
end repeat
set end of o's fullResults to o's partialResults
end repeat
log o's fullResults
"That took " & (-(startDate's timeIntervalSinceNow()) as real) & " seconds."
--> "That took 10,742474913597 seconds."
#=====
on recolle(l, d)
local oTIDs, t
set {oTIDs, AppleScript's text item delimiters} to {AppleScript's text item delimiters, d}
set t to l as text
set AppleScript's text item delimiters to oTIDs
return t
end recolle
#=====
The Regex versions require more than 10 times what requires old fashioned AppleScript.
Yvan KOENIG running High Sierra 10.13.6 in French (VALLAURIS, France) dimanche 21 juin 2020 22:55:37