Monday, February 4, 2008

Goodbye netflix, hello greencine

I got sufficiently frustrated with netflix new releases page that I have now left netflix. There have been many complaints about this new feature, and they seem to have largely fallen on deaf ears. My last act as a netflix member was to gather my ratings and queue entries. Netflix does not make that easy. The queue was much easier. I don't know if the queue is filled dynamically, but it doesn't matter. Select some text in the queue page that includes some of the films in your queue. Then right click in firefox, and select View Selection Source. Select everything in the source window that pops up, paste into emacs, and a little regexp search-and-replace later (or a keyboard macro or two later) you have everything from your queue in a nice list. I haven't done this yet, but I'm quite certain this is going to be easy.

Here's what you need for the more complicated task of grabbing your netflix ratings:
  1. Firefox.
  2. Greasemonkey. I thought this was a gimmick until I actually tried it.
  3. A bit of hacking.
I started off with the Getflix Grabber by Anthony Lieuallen. This script is a lot more complex than what I wanted: it sets up a server side php backend with mysql database to store the ratings and movie details. All I wanted was the movie name and rating, so that I could recreate the data in my new movie rental site, GreenCine. So all I had to do was to produce a version of Getflix Grabber that didn't put everything into a database, and didn't grab any additional movie details.

So instead I had the script open a second Firefox window, and write all the titles and ratings into that. For some reason, opening another tab and writing it into there didn't work. I had to go back into the tabbed browsing preferences, change to opening a new window on a link, then execute the script. Maybe that's just a quirk of the mac version of firefox, but that doesn't make the problem any less annoying.

The script puts a pair of start/stop buttons at the bottom of the page. Hit the start, and it will gather all the ratings. Keep an eye on it though, after getting all the ratings it seems to start getting recommendations. You might want to kill the script when it hits that point.

You can't just save the page when you're done gathering ratings either. The source of the page is empty, since all the content is dynamically generated. Instead, once again, select something on the page, view selection source, and save the contents of the page by copy/pasting to your favorite editor.

So the next task now is to put all the ratings and my queue into GreenCine. I suspect I may have to do that from scratch, since GreenCine is a much less likely target for scripts.

Here's the source code of my version of Getflix Grabber:

// ==UserScript==
// @name GetFlix Grabber
// @namespace http://sfmishras.com/getflix
// @description Grab all the data about your NetFlix ratings, pass it to the GetFlix analyzer.
// @include http://www.netflix.com/*
// ==/UserScript==

////////////////////////////////////////////////////////////////////////////////

// Set up the UI

GM_registerMenuCommand('Start GetFlix', startGetFlix);
GM_registerMenuCommand('Stop GetFlix', stopGetFlix);

var button1=document.createElement('button');
button1.setAttribute('style', 'margin: 0.5em 1em; vertical-align: middle;');
button1.appendChild(document.createTextNode('Start'));
button1.addEventListener('click', startGetFlix, true);

var button2=document.createElement('button');
button2.setAttribute('style', 'margin: 0.5em 1em; vertical-align: middle;');
button2.appendChild(document.createTextNode('Stop'));
button2.addEventListener('click', stopGetFlix, true);

var menu=document.createElement('div');
menu.setAttribute('style', 'text-align: center; border: 10px solid #B9090B;');
menu.appendChild(document.createTextNode('GetFlix:'));
menu.appendChild(button1);
menu.appendChild(button2);
document.body.appendChild(menu);

////////////////////////////////////////////////////////////////////////////////

// Output window

var output;
var ratingsTable;

function setupOutput() {
output = window.open();
ratingsTable = output.document.createElement('table');
output.document.body.appendChild(ratingsTable);

var ratingsHeader = output.document.createElement('tr');
ratingsTable.appendChild(ratingsHeader);

var ratingsColumn = output.document.createElement('th');
ratingsColumn.appendChild(output.document.createTextNode('Name'));
ratingsHeader.appendChild(ratingsColumn);

ratingsColumn = output.document.createElement('th');
ratingsColumn.appendChild(output.document.createTextNode('Rating'));
ratingsHeader.appendChild(ratingsColumn);
}

// Start function

function startGetFlix() {
setupOutput();
// init a single-task queue
actionQueue=[ ['getRatingsPage', 1] ];
// and start the queue running!
runQueue();
}

// Stop function

function stopGetFlix() {
// stop the queue runner
clearTimeout(actionTimer);
actionTimer=null;
// and empty out the queue
actionQueue=[];
}

////////////////////////////////////////////////////////////////////////////////

// To control execution speed

var niceness=150;
var nicefact=0.33;
function getNice() {
var min=niceness-(niceness*nicefact);
var max=niceness+(niceness*nicefact);

return ( (Math.random()*(max-min)) + min );
}

////////////////////////////////////////////////////////////////////////////////

// Run the queue

var actionTimer=null;
var actionQueue=[];
function runQueue() {
actionTimer=setTimeout(runQueue, getNice());

var action=actionQueue.shift();
if (!action) return;

console.log('Queue length: '+actionQueue.length+'. Running action '+action[0]+'.');

switch (action[0]) {
case 'getRatingsPage':
getRatingsPage(action[1]);
break;
case 'parseRatings':
parseRatingsPage(action[1], action[2]);
break;
case 'saveRating':
saveRating(action[1]);
break;
}
}
////////////////////////////////////////////////////////////////////////////////

function getRatingsPage(pagenum) {
var url='http://www.netflix.com/MoviesYouveSeen?pageNum='+parseInt(pagenum);
console.log('Fetch:', url);
GM_xmlhttpRequest({
'method':'GET',
'url':url,
'onload':function(xhr) {
actionQueue.push(['parseRatings', pagenum, xhr.responseText]);
}
});
}

////////////////////////////////////////////////////////////////////////////////

function parseRatingsPage(num, text) {
var ratings=text.split('addlk');
ratings.shift(); // get rid of the HTML before the first one

for (var i=0, rating=null; rating=ratings[i]; i++) {
try {
var detail={
'id':rating.match(/movieid=([0-9]+)/)[1],
'title':rating.match(/"list-title">(.*?) 'year':rating.match(/"list-titleyear"> \(([0-9]+)\)/)[1],
'mpaa':rating.match(/"list-mpaa">(.+?) 'genre':rating.match(/"list-genre">(.+?)</)[1],
'rating':rating.match(/([.0-9]+) Stars/)[1]
};

actionQueue.push(['saveRating', detail]);
} catch (e) {
console.debug('Couldn\'t parse item '+i+' because:');
console.error(e);
}
}

if (text.match(/alt="Next"/)) {
actionQueue.push(['getRatingsPage', num+1]);
}
}

////////////////////////////////////////////////////////////////////////////////

function saveRating(detail) {
// for (key in detail) {
//alert(key + "=" + detail[key]);
var title = detail['title'];
var rating = detail['rating'];
var outputRow = output.document.createElement('tr');
var outputCell = output.document.createElement('td');
outputRow.appendChild(outputCell);
outputCell.appendChild(output.document.createTextNode(title));
outputCell = output.document.createElement('td');
outputRow.appendChild(outputCell);
outputCell.appendChild(output.document.createTextNode(rating));
ratingsTable.appendChild(outputRow);
}

////////////////////////////////////////////////////////////////////////////////