Hello.
This is for fun but can be useful now and then! Like if you want to compute the readability index, or format every start of a sentence with some markup of some kind.
It should support most western languages well, as I use Satimage.osax for detecting uppercase, well, it is at least not lesser than most uppercase functions.
This is not made performance wise, not before I have the need for it, which I donât see happen anytime soon really.
Caveats
Doesnât work well with uppercase figure denotations, like fig. A, this holds for footnotes, and endnotes as well, should you use an abbreviation in front of it.
The work around is to either lowercase the denotations, or spell it out fully: figure A.
Edit
Removed Norwegian comments, and unnecessary log statements.
â This code is ©McUsr 2012, you are not allowed to post it standalone elsewhere without permission, but you may use it as a part of your own work. You preferable post that here as well!
property scripttitle : "everySentence Driver"
on run
global nonAlnums
set nonAlnums to {" ", " ", "!", "@", "#", "$", "%", "^", "&", "*", "(", ")", "-", "_", "=", "+", "[", "]", "}", ";", ":", "'", ",", "<", ".", ">", "/", "?", "`", "~", ".", "«", "»", "â", "â", "š", "ÂŽ", "|", "\\", "§"}
set AppleScript's text item delimiters to ""
set {fullSentences, allClauses} to {{}, {}}
set thetext to "Here's to the crazy ones. This particular sample is illustrated by fig. 1, it clearly shows the correlation between figs and almonds as christmas snacks. The rebels? The troublemakers. The round pegs in the square holes. It all happened around 200 b.c. The ones who see things differently! They're not fond of rules. And they have no respect for the status quo! You can quote them, disagree with them, glorify or vilify them. One of the most infamous acronyms, to my knowledge is d.i.y."
if not (checkforOsax by "Satimage.osax" against "This script requires satimage.osax You can download Satimage.osax from here:" from "http://www.satimage.fr/software/en/downloads/downloads_companion_osaxen.html" for my scripttitle) then
tell application "Finder"
open folder (path to scripting additions folder from local domain)
open folder (path to downloads folder)
activate
end tell
error number -128
end if
set thePars to every paragraph of thetext
repeat with aPar in thePars
set end of allClauses to everySentence from aPar with clauses
-- set end of fullSentences to everySentence from aPar without clauses
end repeat
log "done"
end run
to everySentence from aParagraph given clauses:clauses
local tt, ct, ofa, ofb, ofc, ofs, factor, isAbbrev, rparagraph, tmp_ofs, ofs_saved, aSentence, Sentences, revSentence, aClause, theClauses
set tt to {}
set rparagraph to reverse of every character of aParagraph as text
set ct to count rparagraph
set ofa to offset of "." in rparagraph
set ofb to offset of "!" in rparagraph
set ofc to offset of "?" in rparagraph
set ofs to min3({ofa, ofb, ofc}, {".", "!", "?"})
set end of tt to {(ct - ofs + 1)}
set factor to 0
set isAbbrev to false
set ofs_saved to 0
repeat
if isAbbrev then set isAbbrev to false
set ofa to offset of "." in (characters (ofs + 1) through -1 of rparagraph as text)
set ofb to offset of "!" in (characters (ofs + 1) through -1 of rparagraph as text)
set ofc to offset of "?" in (characters (ofs + 1) through -1 of rparagraph as text)
set tmp_ofs to min3({ofa, ofb, ofc})
-- if the period, is at the end of an abbrev or acronym then
-- we must set the offset aside, until we have found the end of it
if tmp_ofs = ofa then -- check for abbreviation
if character (tmp_ofs + ofs - 1) of rparagraph as text = space then
if not chIsntAlnum(character (tmp_ofs + ofs - 2) of rparagraph) then
if not isnumber(character (tmp_ofs + ofs - 2) of rparagraph) then
if not isUCAS(character (tmp_ofs + ofs - 2) of rparagraph) then
-- we haven't got at an end of period marker
set isAbbrev to true
else
set isAbbrev to false
end if
else
set isAbbrev to true
end if
else
set isAbbrev to true
end if
else
set isAbbrev to true
end if
-- an abbreviation is broken by a space, and an uppercase char.
end if
if tmp_ofs is 0 then exit repeat -- we're done
if not isAbbrev then
set ofs to ofs_saved + tmp_ofs
set factor to factor + ofs + 1
set ofs to factor
set end of tt to {ct - factor + 2}
set ofs_saved to 0
else
set ofs_saved to ofs_saved + tmp_ofs
set ofs to ofs + tmp_ofs
end if
end repeat
set end of tt to {0}
-- end parsing a paragraph into sentences, we'll now construct sentences by the
-- offsets acquired.
set tt to reverse of tt
set Sentences to {}
repeat with i from 1 to ((get count tt) - 1)
set aSentence to characters (((item i of tt) + 1) as number) thru ((item ((i + 1)) of tt) as number) of aParagraph as text
copy aSentence to end of Sentences
end repeat
if not clauses then return Sentences
set theClauses to {}
repeat with aSentence in Sentences
set ofs to 0
set factor to 0
set tt to {}
set ct to (get count aSentence)
set revSentence to reverse of every character of aSentence as text
repeat
set ofa to offset of "," in (characters (ofs + 1) through -1 of revSentence as text)
set ofb to offset of ";" in (characters (ofs + 1) through -1 of revSentence as text)
set ofc to offset of ":" in (characters (ofs + 1) through -1 of revSentence as text)
set ofs to min3({ofa, ofb, ofc})
if ofs is 0 then exit repeat
set factor to factor + ofs + 1
set ofs to factor
set end of tt to {ct - factor + 2}
end repeat
if tt is not {} then
set tt to {ct} & tt
set end of tt to {0}
set tt to reverse of tt
repeat with i from 1 to ((get count tt) - 1)
set aClause to characters (((item i of tt) + 1) as number) thru ((item ((i + 1)) of tt) as number) of aSentence as text
copy aClause to end of theClauses
end repeat
else -- no clauses
copy contents of aSentence to end of theClauses
end if
end repeat
return theClauses
end everySentence
to chIsntAlnum(ach)
global nonAlnums
if first character of ach is not in nonAlnums then return false
return true
end chIsntAlnum
on isnumber(aStr)
try
aStr as number
return true
on error
return false
end try
end isnumber
on isUCAS(ch) -- Satimage.osax
considering case
if first character of ch = (uppercase ch) then return true
end considering
return false
end isUCAS
on min(a, b)
if a < b then
return a
else
return b
end if
end min
on min3(l)
-- returns least number above zero
local a, b, c, d, e, f, oka, okb, okc
set {a, b, c} to {item 1 of l, item 2 of l, item 3 of l}
set {oka, okb, okc} to {false, false, false}
if a > 0 then set oka to true
if b > 0 then set okb to true
if c > 0 then set okc to true
if oka and okb and okc then
set d to min(a, b)
set e to min(b, c)
set f to min(d, e)
return f
else if oka and okb then
set d to min(a, b)
return d
else if oka and okc then
set d to min(a, c)
return d
else if okb and okc then
set d to min(b, c)
return d
else if oka then
return a
else if okb then
return b
else if okc then
return c
else
return 0
end if
end min3
to checkforOsax by OsaxName against eMsg from dlUrl for scripttitle
-- http://macscripter.net/viewtopic.php?id=39190
local localFol, userFol, found, tBt, go
set localFol to path to scripting additions folder from local domain as text
set userFol to path to scripting additions folder from user domain as text
tell application "System Events"
set found to (exists file (localFol & OsaxName))
if not found then set found to (exists file (userFol & OsaxName))
end tell
if not found then
set go to false
try
tell application "SystemUIServer"
activate
set tBt to button returned of (display dialog eMsg with title scripttitle default answer dlUrl buttons {"Go", "Ok"} cancel button 2 default button 1 with icon 2)
set go to (tBt = "Go")
end tell
end try
if go then tell application "Safari"
activate
open location dlUrl
end tell
return false
else
return true
end if
end checkforOsax