Hi everyone,
Here’s my first attempt at a useful AppleScript. (Useful for me anyway.) If you use a Kindle you can log in at http://kindle.amazon.com/ with your Amazon login and see all of notes you’ve created and passages you’ve highlighted while reading with your Kindle or the Kindle application on a computer, iPhone, iPad, etc.
The script works by parsing the HTML source from the web page that displays the annotations for an individual book. If you have OmniOutliner Professional it will create a new document and add the annotations. If you don’t, it will write the annotations to an OPML file for importing into any other outlining app. If you have you the non-Pro version of OmniOutliner, you’ll have to change the “tell” in the MakeOmniDocument subroutine.
This script requires the Satimage Scripting Addition to do some regular expressions. You can download that here: http://www.satimage.fr/software/en/downloads/downloads_companion_osaxen.html
I’d be happy to get feedback. The HTML parsing, in particular, is a bit kludgy, but it works. I hope someone else finds it useful.
-Tim
-- NoteScraper v.0.5 by Tim Wilson <wilson@visi.com>
-- Applescript to convert the HTML output from Amazon's Kindle web page to an OmniOutliner Pro file
-- or generic OPML XML file.
global outlinerApp
global bookTitle
-- Check to see if OmniOutliner is installed. Default to OPML output if it's not.
set outlinerApp to ""
tell application "Finder"
try
exists application file id "com.omnigroup.OmniOutliner3"
set outlinerApp to "com.omnigroup.OmniOutliner3" -- OmniOutliner
on error
false
try
exists application file id "com.omnigroup.OmniOutlinerPro3"
set outlinerApp to "com.omnigroup.OmniOutlinerPro3" -- OmniOutliner Professional
on error
false
end try
end try
end tell
--set outlinerApp to "" -- Uncomment to test OPML output
BuildOutline()
on BuildOutline()
-- Load the HTML source from Safari's current web page containing the summary of your
-- Kindle notes from kindle.amazon.com.
tell application "Safari"
if name of document 1 starts with "Amazon Kindle:" and name of document 1 is not "Amazon Kindle: Daily Refresh" then
set kindleHTML to the source of document 1
else
display alert "Invalid Kindle page" message "Make sure your Kindle notes and highlights are on the front Safari page." buttons {"OK"} default button "OK" as warning
return
end if
end tell
-- Parse the Kindle page
set theItems to my ParseKindlePage(kindleHTML)
-- Sort the parsing result by Kindle book location
set theItems to my SortItems(theItems)
-- Construct an OmniOutliner document or OPML file with the results
if outlinerApp is "com.omnigroup.OmniOutliner3" or outlinerApp is "com.omnigroup.OmniOutlinerPro3" then
my MakeOmniDocument(theItems)
else
my MakeOPML(theItems)
end if
end BuildOutline
on ParseKindlePage(kindleHTML)
(*
Parse the source from the Kindle web page to pull out the notes and highlights and return
a library of records that contains the Kindle locations, location URLs, as well as the contents
of the notes and highlights themselves.
*)
-- First grab the book title and author info
tell application "Safari"
set bookTitle to the name of window 1
set bookTitle to characters 16 through -1 of bookTitle as string
if bookTitle contains ":" then
set bookTitle to text 1 through ((offset of ":" in bookTitle) - 1) of bookTitle
end if
end tell
-- Retrieve all of the Kindle highlights and notes on the page
set theNotes to getHTMLElement(kindleHTML, "<div class='highlightRow noteOnly'>", "</div>", true)
set theHighlights to getHTMLElement(kindleHTML, "<div class=\"highlightRow personalHighlight\">", "</div>
</div>", true)
set theHTMLSnippets to theHighlights & theNotes --combine them into one list
-- Set up a list to hold the records for each note and highlight
set theItems to {}
-- Step through the snippets and look for the actual text, etc.
repeat with snippet in theHTMLSnippets
-- Check to see if the snippet matches one in theNotes
if snippet is in theNotes then
set annotationType to "note"
set theURL to find text "kindle:(.*)location=[0-9]*" in snippet with regexp and string result
set location to (find text "[0-9]*$" in theURL with regexp and string result) as integer
set context to getHTMLElement(snippet, "<span class=\"context\">", "</span>", true) as string
set content to getHTMLElement(snippet, "<span class=\"noteContent\">", "</span>", true) as string
set noteRecord to {annotationType:annotationType, location:location, theURL:theURL, content:content, context:context}
set end of theItems to noteRecord
else -- It must be a highlight if it's not a note
set annotationType to "highlight"
set theURL to find text "kindle:(.*)location=[0-9]*" in snippet with regexp and string result
set location to (find text "[0-9]*$" in theURL with regexp and string result) as integer
set content to getHTMLElement(snippet, "<span class=\"highlight\">", "</span>", true) as string
set highlightNote to getHTMLElement(snippet, "<span class=\"noteContent\">", "</span>", true) as string
if highlightNote is "><" then set highlightNote to "" -- work around a quirky parsing problem
set highlightRecord to {annotationType:annotationType, location:location, theURL:theURL, content:content, highlightNote:highlightNote}
set end of theItems to highlightRecord
end if
end repeat
return theItems
end ParseKindlePage
on SortItems(theItems)
-- Sort the items so they are in order of location in the Kindle book
repeat with i from 1 to (count of theItems) - 1
repeat with j from i + 1 to count of theItems
if location of item j of theItems < location of item i of theItems then
set temp to item i of theItems
set item i of theItems to item j of theItems
set item j of theItems to temp
end if
log theItems
end repeat
end repeat
return theItems
end SortItems
on MakeOPML(theItems)
-- Generate an OPML file from notes and highlights
set opmlHead to "<?xml version=\"1.0\" encoding=\"utf-8\"?>
<opml version=\"1.0\">
<head>
<title></title>
<expansionState></expansionState>
</head>
<body>" & return
set opmlFooter to " </body>" & return & "</opml>"
set opml to opmlHead
repeat with theItem in theItems
-- Check each item to determin if it's a highlight or a note.
-- Insert quotes around the highlights (and check for an accompanying note) or simply insert the note
-- as a reader annotation.
if annotationType of theItem is "note" then
set opml to opml & " <outline text=\"" & (content of theItem) & " (" & (location of theItem) & ")\"/>" & return
else -- Tt must be a highlight (reader annotation) then
if (highlightNote of theItem) is not "" then -- There's an accompanying note
set opml to opml & " <outline text=\""" & (content of theItem) & "" Note: " & (highlightNote of theItem) & " (" & (location of theItem) & ")\"/>" & return
else
set opml to opml & " <outline text=\""" & (content of theItem) & """ & " (" & (location of theItem) & ")\"/>" & return
end if
end if
end repeat
set opml to opml & opmlFooter
set fileHandle to choose file name with prompt ("Choose a file name: ") default name bookTitle & ".opml"
open for access fileHandle with write permission
set eof fileHandle to 0
write opml to fileHandle as «class utf8»
close access fileHandle
end MakeOPML
on MakeOmniDocument(theItems)
-- Open OmniOutliner and create a new document for the notes and highlights
-- set the "tell" block below to "OmniOutliner Professional" or "OmniOutliner"
-- depending on which version you have installed.
-- Still looking for a good way to make this work more smoothly.
tell application "OmniOutliner Professional"
set newDoc to make new document at beginning of documents
tell newDoc -- set up some defaults for the new document
set status visible to false -- hide checkboxes
end tell
repeat with theItem in theItems
set newRow to make new row at the end of children of newDoc
-- Build a string to place in the new row
if annotationType of theItem is "note" then
set topic of newRow to (content of theItem) & " (" & (location of theItem) & ")"
else
set topic of newRow to "\"" & (content of theItem) & "\" (" & (location of theItem) & ")"
if (highlightNote of theItem) is not "" then -- There's a note accompanying the highlight
set note of newRow to (highlightNote of theItem)
tell newRow
set note expanded to true
end tell
end if
end if
end repeat
end tell
end MakeOmniDocument
to getHTMLElement(someText, openTag, closeTag, contentsOnly)
-- Source for this function from http://discussions.apple.com/message.jspa?messageID=11182225
(*
return a list of the specified HTML element in someText
parameters - someText [mixed]: the text to look at
openTag [text]: the opening tag (the ending ">" will be searched for if the tag is incomplete)
closeTag [text]: the closing tag (the tag should be complete when returning the element)
contentsOnly [boolean]: true returns just the contents, false returns the entire element
returns [list]: a list of the HTML elements found - {} if none
*)
set someText to someText as text
set currentOffset to 0 -- the current offset in the text buffer
set elementList to {} -- the list of elements found
try
repeat while currentOffset is less than (count someText)
set currentOffset to currentOffset + 1
set here to offset of openTag in (text currentOffset thru -1 of someText) -- start of opening tag
if here is 0 then exit repeat -- not found
set currentOffset to currentOffset + here
set currentTag to currentOffset - 1 -- mark the start of the element
if openTag does not end with ">" then -- find the close of the tag
set here to offset of ">" in (text (currentOffset - 1) thru -1 of someText) -- end of opening tag
if here is 0 then exit repeat -- not found
set currentOffset to currentOffset + here - 1
else
set currentOffset to currentOffset + (count openTag) - 1
end if
set here to currentOffset
set there to offset of closeTag in (text currentOffset thru -1 of someText) -- end tag
if there is 0 then exit repeat -- not found
set currentOffset to currentOffset + there + (count closeTag) - 2
set there to currentOffset
if contentsOnly then -- add the element contents
set the end of elementList to text here thru (there - (count closeTag)) of someText
else -- add the complete element (tags and contents)
set the end of elementList to text currentTag thru there of someText
end if
end repeat
on error errorMessage number errorNumber
if (errorNumber is -128) or (errorNumber is -1711) then -- nothing (user cancelled)
else
activate me
display alert "Error " & (errorNumber as string) message errorMessage as warning buttons {"OK"} default button "OK"
end if
end try
return elementList
end getHTMLElement
Model: 15" MacBook Pro
AppleScript: 2.1.2
Browser: Safari 533.16
Operating System: Mac OS X (10.6)