#!/usr/bin/env ruby require 'rubygems' require 'open-uri' require 'hpricot' require "activesupport" require 'cgi' @headers = { 'User-Agent' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12', 'Accept' => 'image/png,*/*;q=0.5', 'Accept-Language' => 'en-us,en;q=0.5', 'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Referer' => 'http://www.bbc.co.uk/iplayer/' } @programs = [] def scrape_page(url) url.gsub!("&", "&") # sleep(4 + (rand() * 4)) # Being subtle... doc = Hpricot(open(url, @headers)) begin (doc/'#results div.result').each do |program| @programs << { :src => 'http://www.bbc.co.uk' + program.at('h3 a.resultlink')['href'], :name => program.at('h3 a.resultlink').inner_html, :img_src => program.at('a.resultlink img')['src'], :title => program.at('.resultSynopsis .title').inner_text.split('|').last.strip, :description => program.at('.resultSynopsis .description').inner_text, :time_left => program.at('.episode .available').inner_html.strip } end rescue end if pn = doc.at('#pager-next') scrape_page('http://www.bbc.co.uk' + pn['href']) end end # Changes yesterday: # # date = Time.now.yesterday # iplayer_date = date.strftime("%d-%m") # morning = "http://www.bbc.co.uk/iplayer/last7days/?filter=txdate%3A#{iplayer_date}&filter=txslot%3Amorning&scope=iplayerlast7days" # afternoon = "http://www.bbc.co.uk/iplayer/last7days/?filter=txdate%3A#{iplayer_date}&filter=txslot%3Aafternoon&scope=iplayerlast7days" # evening = "http://www.bbc.co.uk/iplayer/last7days/?filter=txdate%3A#{iplayer_date}&filter=txslot%3Aevening&scope=iplayerlast7days" # # [afternoon, evening].each do |url| # scrape_page(url) # end urls = [ 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200017&order=date&scope=iplayercategories', # Drama - Classic & Period 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200018&order=date&scope=iplayercategories', # Drama - Crime 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200032&order=date&scope=iplayercategories', # Drama - Scifi fantasy 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A100002PT004&order=date&scope=iplayercategories', # Comedy - Chat shows 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200009&order=date&scope=iplayercategories', # Comedy - Satire 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200011&order=date&scope=iplayercategories', # Comedy - Sketch 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200047&order=date&scope=iplayercategories', # Factual - Cars & Motors 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200055&order=date&scope=iplayercategories', # Factual - History 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200045&order=date&scope=iplayercategories', # Factual - Life stories #'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200058&order=date&scope=iplayercategories', # Factual - Politics 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200059&order=date&scope=iplayercategories', # Factual - Science, Nature & Environment 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200060&order=date&scope=iplayercategories', # Factual - Travel 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A100003&order=date&start=1&scope=iplayercategories', # Drama ] urls.each {|u| scrape_page(u) } @programs.uniq! @programs.each do |program| program[:body] =<<-EOF

EOF end @programs.each do |program| program[:published_at] = (7 - program[:time_left].to_i).days.ago end xml = Builder::XmlMarkup.new() xml.instruct! xml.rss "version" => "2.0", "xmlns:dc" => "http://purl.org/dc/elements/1.1/" do xml.channel do xml.pubDate CGI.rfc1123_date(Time.now) xml.title "BBC iPlayer Feed" xml.link "http://feeds.feedburner.com/iplayer" xml.description "Big British Castle's iPlayer's RSS Feeed" @programs.each do |program| xml.item do xml.title("#{program[:name]} - #{program[:title]}") xml.description program[:body] xml.guid program[:src] xml.pubDate CGI.rfc1123_date(program[:published_at]) xml.link program[:src] end end end end File.open(File.expand_path(ARGV[0] || 'iplayer-feed.rss'), 'w') {|f| f.write xml.target! }