\*?([^<]+)\s*\([^)]+\)(.*)%
# movie name rating start1 start2 times
cur_normal_days = normal_days
cur_bold_days = bold_days & cur_normal_days
cur_star_days = star_days & cur_normal_days
cur_paren_days = paren_days & cur_normal_days
state=2
movie_name = $1
rest=$2
if(rest =~ %r%(.*)(
)(.*)$%)
extra_info=$1
br=$2
time_info=$3
found_break=true
elsif(rest =~ %r%(.*)(DTS|THX)(\s*(?:[0-9].*)?)$%)
extra_info=$1
br=$2
time_info=$3
found_break=true
else
extra_info=rest
br=""
time_info=""
found_break=false
end
had_star = (movie_name =~ /^([^*]*)\s*(?:\*)?\s*$/)
if(had_star)
movie_name = $1
end
movie_name = title_case(movie_name)
if(found_break)
raw_time_data = time_info
else
raw_time_data = ""
end
end
when 2
emit_movie=false
if(found_break)
case line
when %r|^(.+)
\s*$|
print "extra data foundbr\n" if ($debug)
raw_time_data += $1+","
when /^\s*$/
emit_movie=true
when %r|^(.+)\s*$|
print "extra data found\n" if ($debug)
raw_time_data += $1
end
else
case line
when %r|^.*
(.*)
\s*$|
print "extra data foundbr afb\n" if ($debug)
raw_time_data += $1+","
when %r|^.*
(.*)\s*$|
print "extra data found afb\n" if ($debug)
found_break=true
raw_time_data += $1
when /^\s*$/
emit_movie=true
end
end
if (emit_movie)
state = 1
raw_time_data = " "+raw_time_data
print "raw_time_data |"+raw_time_data+"|\n" if($debug)
raw_time_data.gsub!(%r|(<[^>]*>)|," \\1 ")
#print "raw_time_data |"+raw_time_data+"|\n" if($debug)
#raw_time_data.gsub!(Regexp.new("(\[^<]*"+$month_regexp.source+"[^<]*)<\b>\) "),",DATES:\\1,")
#print "raw_time_data |"+raw_time_data+"|\n" if($debug)
#raw_time_data.gsub!(%r|\(([^)]+-[^)]+)\) |,",DATES:\\1,")
print "raw_time_data |"+raw_time_data+"|\n" if($debug)
raw_time_data.gsub!(%r|([^ /]+/[^-]+-[^/]+/[^) ]+) |,",DATES:\\1,")
print "raw_time_data |"+raw_time_data+"|\n" if($debug)
raw_time_data.gsub!(%r|(?!,DATES:)([A-Za-z]+-[A-Za-z]+) |,",DAYS:\\1,")
print "raw_time_data |"+raw_time_data+"|\n" if($debug)
raw_time_data.gsub!(%r|(?!,DATES:)\(([A-Za-z]+(?:-[A-Za-z]+)?)\)|,",DAYS:\\1,")
print "raw_time_data |"+raw_time_data+"|\n" if($debug)
raw_time_data.gsub!(%r|(?!,DATES:)\(([A-Za-z]+\s*&\s*[A-Za-z]+)\)|,",DAYS2:\\1,")
print "raw_time_data |"+raw_time_data+"|\n" if($debug)
raw_time_data.gsub!(%r|?br>|,"")
print "raw_time_data |"+raw_time_data+"|\n" if($debug)
raw_times = raw_time_data.split(',')
raw_times.reject! {|rt| rt=~/^\s*$/}
raw_times = raw_times.collect {|rt|
rt.split('<')
}
raw_times.flatten!
raw_times = raw_times.collect {|rt|
rt.split('>')
}
raw_times.flatten!
raw_times = raw_times.collect {|rt|
(" "+rt+" ").split('(').join("|(|").split("|")
}
raw_times.flatten!
raw_times = raw_times.collect {|rt|
(" "+rt+" ").split(')').join("|)|").split("|")
}
raw_times.flatten!
raw_times.each {|rt|
rt.strip!
}
raw_times = raw_times.reject {|rt|
rt==""
}
print "raw_times |"+raw_times.join('|')+"|\n" if($debug)
# should now have just times and b|/b stuff
bold = false
paren = false
cts = ConvertTimeState.new()
times = raw_times.collect {|rt|
case rt
when %r|^DATES:(.*)$|
begin
dates = $1.split("-")
date1 = dates[0].split("/")
month1 = date1[0].to_i()
day1 = date1[1].to_i()
date2 = dates[1].split("/")
month2 = date2[0].to_i()
day2 = date2[1].to_i()
day_of_week1 = day_of_week($year,month1,day1)
day_of_week2 = day_of_week($year,month1,day2)
cur_normal_days = (day_of_week1..day_of_week2).to_a()
cur_bold_days = bold_days & cur_normal_days
cur_star_days = star_days & cur_normal_days
cur_paren_days = paren_days & cur_normal_days
cts = ConvertTimeState.new()
nil
end
when %r|^DAYS:(.*)$|
begin
days_of_week = $1.split("-")
if(days_of_week.size < 2)
days_of_week[1] = days_of_week[0]
end
day_of_week1 = days_of_week[0].strip
day_of_week1.upcase!
day_of_week1 = day_of_week1[0,3]
day_of_week1 = $days.index(day_of_week1)
day_of_week2 = days_of_week[1].strip
day_of_week2.upcase!
day_of_week2 = day_of_week2[0,3]
day_of_week2 = $days.index(day_of_week2)
cur_normal_days = (day_of_week1..day_of_week2).to_a()
cur_bold_days = bold_days & cur_normal_days
cur_star_days = star_days & cur_normal_days
cur_paren_days = paren_days & cur_normal_days
cts = ConvertTimeState.new()
nil
end
when %r|^DAYS2:(.*)$|
begin
days_of_week = $1.split("&")
#print "days_of_week |#{days_of_week.join('|')}|\n"
if(days_of_week.size < 2)
days_of_week[1] = days_of_week[0]
end
day_of_week1 = days_of_week[0].strip
day_of_week1.upcase!
day_of_week1 = day_of_week1[0,3]
day_of_week1 = $days.index(day_of_week1)
day_of_week2 = days_of_week[1].strip
day_of_week2.upcase!
day_of_week2 = day_of_week2[0,3]
day_of_week2 = $days.index(day_of_week2)
cur_normal_days = [day_of_week1,day_of_week2]
cur_bold_days = bold_days & cur_normal_days
cur_star_days = star_days & cur_normal_days
cur_paren_days = paren_days & cur_normal_days
cts = ConvertTimeState.new()
nil
end
when %r|^[bB]$|
begin
bold=true
nil
end
when %r|^/[bB]$|
begin
bold=false
nil
end
when %r|^\($|
begin
paren=true
nil
end
when %r|^\)$|
begin
paren=false
nil
end
else
ok_time = (rt =~ /^\s*(?:DTS|THX)?([^*]*)(\*?)\s*$/)
time_str = $1
star = $2
#paren = false
#if(time_str =~ /^\s*\((.*)\)\s*$/)
# paren = true
# time_str = $1
# ok_time = (time_str =~ /^\s*(\d+):?(\d*|OO)\s?([aApP]?[mM]?)\s*$/)
#end
if(!ok_time)
time_str = rt
star=""
#paren=false
end
time = convert_str_to_time_state(time_str,cts)
print "bold #{bold} star #{star} paren #{paren}\n" if ($debug)
if(bold)
day = cur_bold_days
if(day.nil? || day==false)
day = cur_normal_days
time.note = (time.note ? time.note : "") + "ERROR:bold day but no bold day header"
end
else
if(star!="")
day = cur_star_days
if(day.nil? || day==false)
day = cur_normal_days
time.note = (time.note ? time.note : "") + "ERROR:star day but no star day header"
end
elsif(paren)
day = cur_paren_days
if(day.nil? || day==false)
day = cur_normal_days
time.note = (time.note ? time.note : "") + "ERROR:paren day but no paren day header"
end
else
day = cur_normal_days
end
end
MyTimeRaw.new(time,day)
end
}
print "times\n" if($debug)
p times if($debug)
times = times.reject {|g|
g==nil
}
movie_is_new = true
old_movie = theater.movies_raw.find {|movie_raw|
movie_raw.name == movie_name
}
if(old_movie)
old_movie.times_raw += times
movie = old_movie
movie_is_new = false
else
movie = MovieRaw.new
movie.name = movie_name
#movie.times_raw= times
movie.times_raw = times
end
# collapse day sets for times that are equal
movie.times_raw.sort!
new_times = []
last_time = nil
movie.times_raw.each {|tr|
if(!last_time.nil? && last_time.time == tr.time)
last_time.days += tr.days
else
if(last_time)
last_time.days.sort!
new_times.push last_time
end
last_time = tr
end
}
p last_time if($debug)
if(last_time)
last_time.days.sort!
new_times.push last_time
end
print "new times\n" if($debug)
p new_times if($debug)
movie.times_raw = new_times
if(movie_is_new)
theater.movies_raw= theater.movies_raw + [movie];
end
end
when 3
if (line =~ /For the week of ([^,]*)(?:, 200[1-9])?/)
dates = $1.split('-')
if(dates.size == 2)
date = dates[0]
start_date_month = start_date_day = -1
end_date_month = end_date_day = -1
if(date =~ Regexp.new("^\s*("+$month_regexp.source+"\S*)\s+([0-9]+)\s*$"))
#print "$&='#{$&}'\n"
start_date_month = $1.downcase
start_date_month = start_date_month[0,3]
start_date_month.capitalize!
print "month '#{start_date_month}' day '#{$2}'\n" if($debug)
start_date_month = $months.index(start_date_month)
start_date_month+=1 if(start_date_month)
start_date_day = $2.to_i
print "month #{start_date_month} day #{start_date_day}\n" if($debug)
end
date = dates[1]
if(date =~ Regexp.new("^\s*("+$month_regexp.source+"\S*)\s+([0-9]+)\s*$"))
#print "$&='#{$&}'\n"
end_date_month = $1.downcase
end_date_month = end_date_month[0,3]
end_date_month.capitalize!
print "month '#{end_date_month}' day '#{$2}'\n" if($debug)
end_date_month = $months.index(end_date_month)
end_date_month+=1 if(end_date_month)
end_date_day = $2.to_i
print "month #{end_date_month} day #{end_date_day}\n" if($debug)
elsif(date =~ /^\s*([0-9]+)\s*$/)
end_date_month = start_date_month
end_date_day = $1.to_i
print "month #{end_date_month} day #{end_date_day}\n" if($debug)
end
if(start_date_month != -1 && end_date_month != -1)
start_overlap = (start_date_month < $end_month ||
(start_date_month == $end_month &&
start_date_day <= $end_day))
start_before = (start_date_month < $start_month ||
(start_date_month == $start_month &&
start_date_day <= $start_day))
end_overlap = (end_date_month > $start_month ||
(end_date_month == $start_month &&
end_date_day >= $start_day))
end_after = (end_date_month > $end_month ||
(end_date_month == $end_month &&
end_date_day >= $end_day))
if(start_before && end_after)
theater.uptodate = 1
elsif (start_overlap && end_overlap)
theater.uptodate = sprintf("times for %d/%d to %d/%d",
start_date_month,start_date_day,
end_date_month, end_date_day)
start_day_of_week=day_of_week($year,start_date_month,start_date_day)
end_day_of_week =day_of_week($year,end_date_month, end_date_day)
days_total = (start_day_of_week .. end_day_of_week).to_a()
normal_days &= days_total
cur_normal_days = normal_days
cur_bold_days = bold_days & cur_normal_days
cur_star_days = star_days & cur_normal_days
cur_paren_days = paren_days & cur_normal_days
else
theater.uptodate = -1
end
else
# missing start or end date
if(theater.movies_raw.size > 0)
theater.uptodate = "Couldn't parse dates\n"
else
theater.uptodate = -1
end
end
else
# wrong number of dates in range
if(theater.movies_raw.size > 0)
theater.uptodate = "Wrong number of dates\n"
else
theater.uptodate = -1
end
end
state = 0
end
end
end
ensure
data.close
end
# never found dates
if(theater.uptodate.nil?)
if(theater.movies_raw.size > 0)
theater.uptodate = "Couldn't find dates\n"
else
theater.uptodate = -1
end
end
theater
end
# Read times from Yahoo source
def read_times_yahoo(source_files,theater_name)
#print "parsing yahoo data from [#{source_files.join(' ')}]\n"
days_covered = []
theater = TheaterRaw.new
theater.movies_raw = []
old_movie = nil
old_times = nil
source_files.each_index {|i|
found_a_tag = false
theater_regexp = nil
source_file=source_files[i]
days = [(i)%7]
# keeps track of whether any movies had actual times for this theater
found_real_times = false
#print "parsing yahoo data from #{source_file}\n"
data = open(source_file);
date_day_of_week = nil
state = 2
begin
times = []
while(line=data.gets)
line.chop!
print "state #{state} line '#{line}'\n" if ($debug)
# states:
# 0=looking for movie section
# 1=looking for movie
# 2=looking for theater accelerator
# 3=looking for date
case state
when 0
if (line =~ theater_regexp)
state=1
times=[]
end
when 1
case line
when //
state=0
when %r|([^<]+).*
.*]* size="-1">(.*)|
state=1
movie_name = $1
raw_time_data = $2
if(movie_name=~/(.*), The/)
movie_name="The "+$1
end
movie_name = title_case(movie_name)
old_movie = theater.movies_raw.find {|m| m.name==movie_name}
old_times = old_movie ? old_movie.times_raw : nil
emit_movie=true
end
when 2
if(line =~ %r|([^<]*)|)
found_a_tag = true
if(theater_regexp.nil?)
name = $2
if(name == theater_name)
theater_tag = $1
theater_regexp=Regexp.new("")
state = 3
end
else
state = 3
end
elsif(line=~%r|| && found_a_tag)
print "Couldn't find tag for theater name '#{theater_name}', aborting\n"
exit(2)
end
when 3
if(line =~ %r|]*>([^,]+),\s*(200[1-9])\s*|)
date = $1
year = $2.to_i()
if(date =~ Regexp.new("^\s*("+$month_regexp.source+"\S*)\s+([0-9]+)\s*$"))
#print "$&='#{$&}'\n"
date_month = $1.downcase
date_month = date_month[0,3]
date_month.capitalize!
print "month '#{date_month}' day '#{$2}'\n" if($debug)
date_month = $months.index(date_month)
date_month+=1 if(date_month)
date_day = $2.to_i
print "month #{date_month} day #{date_day}\n" if($debug)
date_day_of_week = day_of_week(year,date_month,date_day)
if(($start_month < date_month ||
($start_month == date_month && $start_day <= date_day)) &&
($end_month > date_month ||
($end_month == date_month && $end_day >= date_day)))
print "days_covered #{days_covered.join(' ')}\n" if($debug)
else
date_day_of_week = nil
end
end
state = 0
end
end
if (emit_movie)
emit_movie=false
state = 1
print "raw_time_data |"+raw_time_data+"|\n" if $debug
# remove html anchors, saving text of link
raw_time_data.gsub!(%r|]*>([^<>]*)|,"\\1")
print "raw_time_data |"+raw_time_data+"|\n" if $debug
# remove non-breakable spaces and parenthesis
raw_time_data.gsub!(/ |\(|\)/,"")
print "raw_time_data |"+raw_time_data+"|\n" if $debug
# replace & with &
raw_time_data.gsub!(/&/,"&")
print "raw_time_data |"+raw_time_data+"|\n" if $debug
# remove asterisks
#raw_time_data.gsub!(/\*\*\*\*\*/,"")
raw_time_data.gsub!(/\*/,"")
print "raw_time_data |"+raw_time_data+"|\n" if $debug
# remove "Theater has not yet announced times."
raw_time_data.gsub!(/Theater has not yet announced times\./,"")
print "raw_time_data |"+raw_time_data+"|\n" if $debug
# replace html tags with spaces
raw_time_data.gsub!(/<[^>]*>/," ")
print "raw_time_data |"+raw_time_data+"|\n" if $debug
# remove "Starts on weekday, month day Click for weekday's Showtimes (& Tickets)?
raw_time_data.gsub!(/^\s*Starts on .*Click for .* Showtimes\s*(&\s*Tickets)?\s*$/,"")
print "raw_time_data |"+raw_time_data+"|\n" if $debug
raw_times = raw_time_data.split(',')
raw_times = raw_times.reject {|rt|
rt==""
}
print "raw_times |"+raw_times.join('|')+"|\n" if $debug
# should now have just times and b|/b stuff
bold = false
times = []
cts = ConvertTimeState.new()
if(raw_times.size > 0)
found_real_times = true
end
raw_times.each {|rt|
time = convert_str_to_time_state(rt,cts)
old_time = old_times ? old_times.find {|t| t.time==time} : nil
if(old_time)
old_time.days += days
old_time.days.sort!
else
times.push MyTimeRaw.new(time,days)
end
}
print "times\n" if($debug)
p times if($debug)
if(!old_movie)
movie = MovieRaw.new
movie.name = movie_name
movie.times_raw = times
movie.times_raw.sort! {|a,b|
a.time <=> b.time
}
theater.movies_raw= theater.movies_raw + [movie];
else
old_movie.times_raw += times
old_movie.times_raw.sort! {|a,b|
a.time <=> b.time
}
end
end
end
ensure
data.close
end
if(found_real_times)
days_covered.push date_day_of_week
end
}
days_covered_str = "["+days_covered.join(',')+"]"
if(days_covered_str == "[0,1,2,3,4,5,6]")
if(theater.movies_raw != [])
theater.uptodate = 1
else
theater.uptodate = -1
end
elsif(days_covered_str == "[]")
theater.uptodate = -1
else
theater.uptodate = days_covered_str
end
theater
end
# Read times from Carmike source
def read_times_carmike(source_file_base)
theater = TheaterRaw.new
theater.movies_raw = []
source_files = $days.collect {|day|
source_file_base + day.downcase + ".html"
}
days_covered = []
source_files.each_index {|i|
source_file=source_files[i]
#print "parsing carmike data from #{source_file}\n"
data = File.open(source_file).gets(nil);
parser = HTMLTree::XMLParser.new(false,true)
parser.feed(data)
parser.close()
doc = parser.document
#doc.write($stdout,1); print "\n"
movie_root = doc.root.elements["//a[@name='Showtimes']"]
movie_root = movie_root.parent
#movie_root.write($stdout,1); print "\n"
date_str = get_all_text(movie_root.elements["./div/strong"])
print "date #{date_str}\n" if($debug)
date = MyDate.new(date_str)
p date if($debug)
print date.date.to_s,"\n" if($debug)
day_of_week = date.day_of_week
if(day_of_week != i)
raise InternalError.new("day of week #{day_of_week} and file name do not match #{source_file} (#{i})")
end
days = [day_of_week]
if($start_date <= date.date && date.date <= $end_date)
times_root = movie_root.elements["table"]
begin; times_root.write($stdout,1); print "\n"; end if($debug)
times_root.each_element("./tr") { |tr|
if(!(movie_name_el=tr.elements["./td/table//strong"]).nil?)
movie_name = get_all_text(movie_name_el)
movie_name = title_case(movie_name)
old_movie = theater.movies_raw.find {|m| m.name==movie_name}
old_times = old_movie ? old_movie.times_raw : nil
movie_times_el = tr.get_elements("./td/font")[1]
movie_times_raw = get_all_text_arr(movie_times_el)
print "movie_name #{movie_name}\n" if($debug)
print "movie_times_raw #{movie_times_raw.join('|')}\n" if($debug)
times = []
cts = ConvertTimeState.new()
movie_times_raw.each {|rt|
time = convert_str_to_time_state(rt,cts)
old_time = old_times ? old_times.find {|t| t.time==time} : nil
if(old_time)
old_time.days += days
old_time.days.sort!
else
times.push MyTimeRaw.new(time,days)
end
}
if(!old_movie)
movie = MovieRaw.new
movie.name = movie_name
movie.times_raw = times
movie.times_raw.sort! {|a,b|
a.time <=> b.time
}
theater.movies_raw += [movie];
else
old_movie.times_raw += times
old_movie.times_raw.sort! {|a,b|
a.time <=> b.time
}
end
end
}
days_covered += days
end
}
days_covered_str = "["+days_covered.join(',')+"]"
if(days_covered_str == "[0,1,2,3,4,5,6]")
if(theater.movies_raw != [])
theater.uptodate = 1
else
theater.uptodate = -1
end
elsif(days_covered_str == "[]")
theater.uptodate = -1
else
theater.uptodate = days_covered_str
end
theater
end
#def find_seperate_root(el1,el2)
# els1 = []
# while(el1!=nil)
# els1.push el1
# el1 = el1.parent
# end
# els1.reverse!
#
# els2 = []
# while(el2!=nil)
# els2.push el2
# el2 = el2.parent
# end
# els2.reverse!
#
# el_idx=0
# while(el_idx