Archive Instapaper RSS Feed to Evernote – BETA

Description

This is an early beta version of a script whch will import items from an Instapaper RSS feed directly into a selected Evernote notebook (the default is called "Instapaper Feed"). Before you begin, you'll need to copy-and-paste the URL address for the RSS feed from the Instapaper website into the script. One place you can find the address is in the FOLDER TOOLS section of your Instapaper webpage: <img src="http://media.veritrope.com/wp-content/uploads/2011/01/INSTAPAPER-RSS.jpg" Once you run it, the AppleScript will parse the RSS feed for your folder. If there isn't already an item in your folder with the same title, it then creates a new note in Evernote which contains an "Instapaper-ed Version" of the item, preserving as much data as possible along the way (Description, Source URL, and Creation Date). I've also put in some basic error checking in case of "rate throttling", but didn't get to test that thoroughly.

The Code

(*
http://veritrope.com
Archive Instapaper RSS Feed to Evernote -- BETA
Version 0.8
January 23, 2011
Project Status, Latest Updates, and Comments Collected at:
http://veritrope.com/code/archive-instapaper-rss-feed-to-evernote

THIS SCRIPT ARCHIVES THE 25 MOST RECENT ITEMS
FROM YOUR INSTAPAPER FOLDER'S RSS FEED.

JUST ADD THE URL FOR THE RSS FEED BELOW!
*)


--EDIT THIS LINE TO ADD URL BEFORE RUNNING SCRIPT!
property theRSSfeed : "http://www.instapaper.com/....."

--IF YOU WANT TO CHANGE THE NAME OF THE IMPORT FOLDER, YOU CAN DO SO HERE
property evNotebook : "Instapaper Feed"

--OTHER PROPERTIES (DO NOT CHANGE)
property EVTitles : []
property inHTML : ""
property the_code : ""

(*
======================
/// MAIN PROGRAM
======================
*)

--GET THE RSS FEED
set theFeed to do shell script "curl " & theRSSfeed

--PARSE THE XML
set IP_folder to item 1 of (my parsecode(theFeed, "<channel><title>", "</title>"))
set IP_items to my parsecode(theFeed, "<item>", "</item>")

--BUILD THE DATA ARRAY (Thanks RIRedinPA!)
set IP_articles to {}
repeat with x from 1 to count of every item of IP_items
    set IP_item to item x of IP_items
    set AppleScript's text item delimiters to return
    set astid to AppleScript's text item delimiters
    set item_name to my cleantags(IP_item, "<title>", "</title>", astid)
    set item_description to my cleantags(IP_item, "<description>", "</description>", astid)
    set item_link to my cleantags(IP_item, "<link>", "</link>", astid)
    set item_date to cleantags(IP_item, "<pubDate>", "</pubDate>", astid)
    copy {name:item_name, description:item_description, URL:item_link, date:item_date} to end of IP_articles
end repeat

--ANY EXISTING ITEMS IN NOTEBOOK?
my getEvernoteList(evNotebook)
set EVTitles to my ev_title_List(evNotebook)

--PROCESS THE DATA FROM THE RSS FEED
repeat with IP_article in IP_articles
    set evName to name of IP_article
    set rawURL to URL of IP_article as text
    set encURL to my encodedURL(rawURL)
    set evURL to "http://www.instapaper.com/text?u=" & encURL
    set item_date to date of IP_article
    set evDate to my convertInstaDate(item_date)
    set evDesc to description of IP_article
   
    --DOES ITEM ALREADY EXIST?
    if EVTitles does not contain evName then
        set newNote to my makeEvHTML(rawURL, evURL, evName, evNotebook, evDate, evDesc)
    end if
end repeat


(*
======================================
/// INSTAPAPER-RELATED SUBROUTINES
=======================================
*)


--PARSE CODE
on parsecode(code, opentag, closetag)
    set itemlist to {}
    set AppleScript's text item delimiters to opentag
    set taglist to every text item of code as list
    set childtaglist to {}
    repeat with x from 2 to count of every item of taglist
        copy item x of taglist to end of childtaglist
    end repeat
   
    repeat with thisitem in childtaglist
        set AppleScript's text item delimiters to closetag
        copy text item 1 of thisitem to end of itemlist
        set AppleScript's text item delimiters to opentag
    end repeat
   
    return itemlist
end parsecode

--CLEAN TAGS
on cleantags(thisitem, opentag, closetag, astid)
    try
        set AppleScript's text item delimiters to opentag
        set rawitem to text item 2 of thisitem
        set AppleScript's text item delimiters to closetag
        set cleanitem to text item 1 of rawitem
        -->reset the delimiters
        set AppleScript's text item delimiters to astid
        return cleanitem
    on error errmsg
        display dialog "Could not clean the " & opentag & return & return & errmsg
    end try
end cleantags

--CONVERT INSTAPAPER DATE/TIME FORMATS
on convertInstaDate(item_date)
    set iTOD to "AM"
    set iDay to first word of item_date
    set iDate to second word of item_date
    set iMonth to third word of item_date
    set iYear to fourth word of item_date
    set iHour to fifth word of item_date
    set iMin to sixth word of item_date
    set iSec to seventh word of item_date
    set iZone to eighth word of item_date
    if (iHour > 12) then
        set iHour to (iHour - 12)
        set iTOD to "PM"
    end if
    set dateString to "" & iMonth & " " & iDate & ", " & iYear & " " & iHour & ":" & iMin & ":" & iSec & " " & iTOD as string
    set theDate to date dateString
end convertInstaDate

--URL ENCODE
on encodedURL(rawURL)
    set scpt to "php -r 'echo rawurlencode("" & rawURL & "");'"
    return do shell script scpt
end encodedURL

(*
======================================
/// EVERNOTE-RELATED SUBROUTINES
======================================
*)


--MAKE THE NOTE
on makeEvHTML(rawURL, evURL, evName, evNotebook, evDate, evDesc)
    set inHTML to my process_Link(evURL)
    set evHTML to "<html>" & evDesc & "<hr />" & my extractBetween(inHTML, "<div id="story">", "</html>") as text
    tell application "Evernote"
        set newNoteHTML to create note with html evHTML title evName notebook evNotebook created evDate
        set source URL of newNoteHTML to rawURL
    end tell
end makeEvHTML

--PROCESS WORKING URL
on process_Link(evURL)
    try
        set the_page to do shell script "curl -siL " & evURL
        set the_code to items 10 thru 12 of the_page as string
        set the_code to my replaceString(the_code, return, "") as integer
        --ERROR CHECKING
        if the_code ≥ 400 then
            --RATE LIMIT EXCEEEDED
            if the_code = 400 then
                delay 5
                set the_page to do shell script "curl -siL " & evURL
            end if
        end if
        return the_page
    end try
end process_Link

--GET LIST OF TITLES IN NOTEBOOK
on getEvernoteList(evNotebook)
    tell application "Evernote"
        set notebook_List to my notebook_List(evNotebook)
        set title_List to my ev_title_List(evNotebook)
    end tell
end getEvernoteList

--EVERNOTE NOTEBOOK SELECTION/CREATION SUBROUTINE
on notebook_List(evNotebook)
    tell application "Evernote"
        activate
        set listOfNotebooks to {} (*PREPARE TO GET EVERNOTE'S LIST OF NOTEBOOKS *)
        set EVNotebooks to every notebook (*GET THE NOTEBOOK LIST *)
        repeat with currentNotebook in EVNotebooks
            set currentNotebookName to (the name of currentNotebook)
            copy currentNotebookName to the end of listOfNotebooks
        end repeat
        --DOES IT EXIST?
        if listOfNotebooks contains evNotebook then
            return
        else
            set createNotebook to create notebook evNotebook
        end if
    end tell
end notebook_List

--TITLES IN EVERNOTE NOTEBOOK LIST SUBROUTINE
on ev_title_List(evNotebook)
    tell application "Evernote"
        activate
        set listofNotes to {} (*PREPARE TO GET EVERNOTE'S LIST OF NOTES *)
        set allNotes to (find notes "notebook:"" & evNotebook & "" ")
        repeat with currentNote in allNotes
            set currentNoteName to (the title of currentNote)
            copy currentNoteName to the end of listofNotes
        end repeat
        set theTitles to listofNotes
    end tell
end ev_title_List

(*
======================================
/// UTILITY SUBROUTINES
=======================================
*)

--REPLACE SUBROUTINE
on replaceString(theString, theOriginalString, theNewString)
    set {od, AppleScript's text item delimiters} to {AppleScript's text item delimiters, theOriginalString}
    set theStringParts to text items of theString
    if (count of theStringParts) is greater than 1 then
        set theString to text item 1 of theStringParts as string
        repeat with eachPart in items 2 thru -1 of theStringParts
            set theString to theString & theNewString & eachPart as string
        end repeat
    end if
    set AppleScript's text item delimiters to od
    return theString
end replaceString

-- EXTRACTION SUBROUTINE
on extractBetween(SearchText, startText, endText)
    ignoring case and diacriticals
        set tid to AppleScript's text item delimiters
        set AppleScript's text item delimiters to startText
        set endItems to text of text item -1 of SearchText
        set AppleScript's text item delimiters to endText
        set beginningToEnd to text of text item 1 of endItems
        set AppleScript's text item delimiters to tid
        return beginningToEnd
    end ignoring
end extractBetween

--SORT SUBROUTINE
on simple_sort(my_list)
    set the index_list to {}
    set the sorted_list to {}
    repeat (the number of items in my_list) times
        set the low_item to ""
        repeat with i from 1 to (number of items in my_list)
            if i is not in the index_list then
                set this_item to item i of my_list as text
                if the low_item is "" then
                    set the low_item to this_item
                    set the low_item_index to i
                else if this_item comes before the low_item then
                    set the low_item to this_item
                    set the low_item_index to i
                end if
            end if
        end repeat
        set the end of sorted_list to the low_item
        set the end of the index_list to the low_item_index
    end repeat
    return the sorted_list
end simple_sort