#!/usr/bin/env ruby require 'rubygems' require 'open-uri' require 'hpricot' require "activesupport" require 'cgi' @headers = { 'User-Agent' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12', 'Accept' => 'image/png,*/*;q=0.5', 'Accept-Language' => 'en-us,en;q=0.5', 'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Referer' => 'http://www.bbc.co.uk/iplayer/' } @programs = [] def scrape_page(url) url.gsub!("&", "&") # sleep(4 + (rand() * 4)) # Being subtle... doc = Hpricot(open(url, @headers)) begin (doc/'#results div.result').each do |program| @programs << { :src => 'http://www.bbc.co.uk' + program.at('h3 a.resultlink')['href'], :name => program.at('h3 a.resultlink').inner_html, :img_src => program.at('a.resultlink img')['src'], :title => program.at('.resultSynopsis .title').inner_text.split('|').last.strip, :description => program.at('.resultSynopsis .description').inner_text, :time_left => program.at('.episode .available').inner_html.strip } end rescue end if pn = doc.at('#pager-next') scrape_page('http://www.bbc.co.uk' + pn['href']) end end # Changes yesterday: # # date = Time.now.yesterday # iplayer_date = date.strftime("%d-%m") # morning = "http://www.bbc.co.uk/iplayer/last7days/?filter=txdate%3A#{iplayer_date}&filter=txslot%3Amorning&scope=iplayerlast7days" # afternoon = "http://www.bbc.co.uk/iplayer/last7days/?filter=txdate%3A#{iplayer_date}&filter=txslot%3Aafternoon&scope=iplayerlast7days" # evening = "http://www.bbc.co.uk/iplayer/last7days/?filter=txdate%3A#{iplayer_date}&filter=txslot%3Aevening&scope=iplayerlast7days" # # [afternoon, evening].each do |url| # scrape_page(url) # end urls = [ 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200017&order=date&scope=iplayercategories', # Drama - Classic & Period 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200018&order=date&scope=iplayercategories', # Drama - Crime 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200032&order=date&scope=iplayercategories', # Drama - Scifi fantasy 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A100002PT004&order=date&scope=iplayercategories', # Comedy - Chat shows 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200009&order=date&scope=iplayercategories', # Comedy - Satire 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200011&order=date&scope=iplayercategories', # Comedy - Sketch 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200047&order=date&scope=iplayercategories', # Factual - Cars & Motors 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200055&order=date&scope=iplayercategories', # Factual - History 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200045&order=date&scope=iplayercategories', # Factual - Life stories #'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200058&order=date&scope=iplayercategories', # Factual - Politics 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200059&order=date&scope=iplayercategories', # Factual - Science, Nature & Environment 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A200060&order=date&scope=iplayercategories', # Factual - Travel 'http://www.bbc.co.uk/iplayer/categories/?filter=category%3A100003&order=date&start=1&scope=iplayercategories', # Drama ] urls.each {|u| scrape_page(u) } @programs.uniq! @programs.each do |program| program[:body] =<<-EOF