#!/usr/bin/ruby #$DEBUG = true require 'html/tree' require 'html/xmltree' require 'rexml/document' require 'date' require 'parsedate' $debug=false $start_month= 1 $start_day = 17 $end_month = 1 $end_day = 23 $year=2003 $start_date = Date.new($year,$start_month,$start_day) $end_date = Date.new($year,$end_month, $end_day ) $CUR_YEAR = 2002 # length of shortest movie name that doesn't fit in the space usually allotted $LONG_MOVIE_NAME = 33 # length of shortest movie times that doesn't fit in the space usually allotted $LONG_MOVIE_TIMES = 83 # directory that script was run from $base_dir = File.dirname($0) def usage(exit_val) print "grab_data.rb <--grab|----format>\n" exit exit_val end $MODE_GRAB = 0 $MODE_FORMAT = 1 $mode = ARGV.shift if($mode == "--grab") $mode = $MODE_GRAB elsif($mode == "--format") $mode = $MODE_FORMAT else usage(1) end $movie_name_translations = Hash.new # collapse list by replacing consecutive values with a range of values def collapse_ranges(arr) arr_brief=[] i=0 while(i attr_accessor :weight # real def initialize() @sort = true end def Theater::make_from_intermediate_data(data) data = data.split("\n") line = data[0] if(line !~ /(.*)\[(.*)\]/) raise ArgumentError.new("unable to find theater name and source in '#{line}'") end theater = Theater.new() theater.sort = false theater.uptodate = 1 theater.name = $1 theater.source = $2 theater.movies = [] theater.sort = true if(theater.name != "CMU") line_idx=1 notes = [] while(line_idx < data.size) line = data[line_idx] case line when /^note:(.*)/ notes.push $1 when /|/ theater.movies.push Movie.make_from_intermediate_data(line,notes) notes = [] end line_idx += 1 end theater end def movies_from_raw(movies_raw) @movies = movies_raw.collect \ { |movie_raw| movie = Movie.new movie.name = movie_raw.name movie.times = movie_raw.times_raw.collect \ { |time_raw| time = MovieTime.new time.time = time_raw.time time.day_class = match_day_class(time_raw.days) time } movie.notes = movie_raw.notes movie } end def print_formatted(out) printf(out,"===#{@name}[#{@source}]\n") if(@uptodate.is_a?(String)) printf(out,"Up to date status:#{@uptodate}\n") else if(@uptodate == -1) printf(out,"Source not up to date\n") return end end if(@sort) ms = movies.sort {|ma,mb| ma.name =~ /^(The |A )?(.*)$/ sort_name_a = $2 mb.name =~ /^(The |A )?(.*)$/ sort_name_b = $2 sort_name_a <=> sort_name_b } else ms = movies end ms.each {|movie| movie.print_formatted(out) } end def print_intermediate(out) printf(out,"===#{@name}[#{@source}]\n") if(@uptodate.is_a?(String)) printf(out,"Uptodate:#{@uptodate}\n") else if(@uptodate == -1) printf(out,"Uptodate:Source not up to date\n") return end end if(@sort) ms = movies.sort {|ma,mb| ma.name =~ /^(The |A )?(.*)$/ sort_name_a = $2 mb.name =~ /^(The |A )?(.*)$/ sort_name_b = $2 sort_name_a <=> sort_name_b } else ms = movies end ms.each {|movie| if(movie.notes && movie.notes.size > 0) p movie.notes if($debug) printf(out,"note:%s\n",movie.notes.join("\nnote:")) end printf(out,"%s|",movie.name) count = movie.times.size symbol_count = 0 spec_count = 0 mtf=movie.times.collect {|time| time.intermediate_s() } printf(out,"%s\n",mtf.join(',')) } end def print_movies2_ht(out) if(@movies.size > 0) ms = [] if(@sort) ms = movies.sort {|ma,mb| ma.name =~ /^(The |A )?(.*)$/ sort_name_a = $2 mb.name =~ /^(The |A )?(.*)$/ sort_name_b = $2 sort_name_a <=> sort_name_b } else ms = movies end ms.each {|movie| movie.print_movies2_ht(out)} else printf(out,"No movies this week.\n") end end def create_xml() xtheater = REXML::Element.new("theater") xtheater.attributes["id"] = @name xtheater.attributes["src"] = @source if(@uptodate.is_a?(String)) xuptodate = REXML::Element.new("uptodate") xuptodate.text = @uptodate xtheater.elements << xuptodate else if(@uptodate == -1) xuptodate = REXML::Element.new("uptodate") xuptodate.text = "Source not up to date" xtheater.elements << xuptodate return xtheater end end if(@sort) ms = movies.sort {|ma,mb| ma.name =~ /^(The |A )?(.*)$/ sort_name_a = $2 mb.name =~ /^(The |A )?(.*)$/ sort_name_b = $2 sort_name_a <=> sort_name_b } else ms = movies end ms.each {|movie| xmovie = REXML::Element.new("movie") if(movie.notes && movie.notes.size > 0) p movie.notes if($debug) xnotes = REXML::Element.new("notes") xnotes.text = movie.notes.join("\n") xmovie.elements << xnotes end xmovie.attributes["id"]=movie.name count = movie.times.size symbol_count = 0 spec_count = 0 xtimes = REXML::Element.new("times") times_arr = movie.times.collect {|mtime| mtime.xml_s } xtimes.text = times_arr.join(",") xmovie.elements << xtimes xtheater.elements << xmovie } xtheater end end class Movie attr_accessor :name # string attr_accessor :times # array attr_accessor :notes # array def initialize() @notes = [] end def Movie::make_from_intermediate_data(line,notes) if(line !~ /([^|]*)\|([^|]*)/) raise ArgumentError.new("unable to find movie name and times in '#{line}'\n") end movie_name = $1 times = $2 if(times.nil?) times = "" end movie = Movie.new() movie.notes = notes movie.name = movie_name movie.times = [] times = times.split(/,/) movie.times = times.collect {|time| MovieTime.make_from_intermediate_data(time) } movie.times.compact! movie end def print_formatted(out) if(@notes && @notes.size > 0) p @notes if($debug) printf(out,"%s\n",@notes.join("\n")) end printf(out,"$|%s|RAMR|",@name) count = @times.size symbol_count = 0 spec_count = 0 @times.each {|time| if(time.day_class.class_name=="special") spec_count+=1 elsif(time.day_class.class_name!="") symbol_count+=1 end } #print "count #{count} symbols #{symbol_count} spec #{spec_count}\n" day_first_format = (symbol_count + 2*spec_count >= count) if(day_first_format) # calculate day first format, pick prettier format day_classes = [[0,1,2,3,4,5,6]] @times.each {|time| time_days = time.day_class.days new_day_classes = [] day_classes.each {|dc| overlap = time_days & dc if(overlap.size > 0 && overlap != dc) noverlap = dc - overlap new_day_classes += [noverlap, overlap] else new_day_classes.push dc end } day_classes = new_day_classes } day_classes.sort! {|dca,dcb| dca[0]<=>dcb[0] } #print "day_classes #{(day_classes.collect{|dc| dc.join(',')}).join(':')}\n" day_times = day_classes.collect {|day_class| dc_times = [] @times.each {|time| if((time.day_class.days & day_class).size > 0) dc_times.push time.time end } if(dc_times.size > 0) [day_class, dc_times] else nil end } day_times.compact! out_str = (day_times.collect {|dt| DayClass::days_to_str(dt[0]) + " " + \ (dt[1].collect {|t| t.movie_s()}).join(', ') }).join("\n^|")+"\n" end mtf=@times.collect {|time| time.to_s() } if(day_first_format) printf(out,"%s",out_str) else printf(out,"%s\n",mtf.join(', ')) end end def print_movies2_ht(out) notes_str = "" if(@notes && @notes.size > 0) p @notes if($debug) notes_str = sprintf("[%s]",@notes.join("][")) end if(@name.size < $LONG_MOVIE_NAME) printf(out,"$|%s|RAMR|",@name) else printf(out,"$|%s|RAMR|\n^|",@name) end count = @times.size symbol_count = 0 spec_count = 0 @times.each {|time| if(time.day_class.class_name=="special") spec_count+=1 elsif(time.day_class.class_name!="") symbol_count+=1 end } #print "count #{count} symbols #{symbol_count} spec #{spec_count}\n" day_first_format = (symbol_count + 2*spec_count >= count) if(day_first_format) # calculate day first format, pick prettier format day_classes = [[0,1,2,3,4,5,6]] @times.each {|time| time_days = time.day_class.days new_day_classes = [] day_classes.each {|dc| overlap = time_days & dc if(overlap.size > 0 && overlap != dc) noverlap = dc - overlap new_day_classes += [noverlap, overlap] else new_day_classes.push dc end } day_classes = new_day_classes } day_classes.sort! {|dca,dcb| dca[0]<=>dcb[0] } #print "day_classes #{(day_classes.collect{|dc| dc.join(',')}).join(':')}\n" day_times = day_classes.collect {|day_class| dc_times = [] @times.each {|time| if((time.day_class.days & day_class).size > 0) dc_times.push time.time end } if(dc_times.size > 0) [day_class, dc_times] else nil end } day_times.compact! out_str = (day_times.collect {|dt| DayClass::days_to_str(dt[0]) + " " + \ (dt[1].collect {|t| t.movie_s()}).join(', ') }).join(" #{notes_str}\n^|")+" #{notes_str}\n" end mtf=@times.collect {|time| time.to_s() } if(day_first_format) printf(out,"%s",out_str) else mtf_str = "" mtf_idx = 0 while(mtf_idx < mtf.size) new_mtf_str = nil if(mtf_str == "") new_mtf_str = mtf[mtf_idx] else new_mtf_str = mtf_str + ", " + mtf[mtf_idx] end if(new_mtf_str.size >= $LONG_MOVIE_TIMES-notes_str.size) printf(out,"%s #{notes_str}\n",mtf_str) mtf_str = "^|"+mtf[mtf_idx] else mtf_str = new_mtf_str end mtf_idx += 1 end if(mtf_str != "^|") printf(out,"%s #{notes_str}\n",mtf_str) end end end # attr_accessor :link end class MyTime attr_accessor :hour # int, 24 hour style number attr_accessor :min # int attr_accessor :note # string def MyTime::make_from_s(t,pm_start=2,pm_end=11) note = nil good_time = ok_time = (t =~ /^\s*(\d+)[:;]?(\d*|OO)\s?([aApP]?[mM]?)\s*$/) if(!ok_time) ok_time = (t =~ /(\d+)[:;]?(\d*|OO)\s?([aApP]?[mM]?)/) end if(ok_time) print "$1 #{$1} $2 #{$2} $3 #{$3}\n" if ($debug) hour = $1.to_i() pm = false if(hour==0 || hour>12) pm = hour>12 hour %= 12 else hour %= 12 pm = (pm_start <= pm_end) ? (pm_start..pm_end)===hour : !((pm_end+1..pm_start-1)===hour); end min = $2 if(min == "OO") min = "00" # protect against the bad typist end min = (min!="" ? $2.to_i() : 0) ampm_des = $3 if(ampm_des!=nil) if(ampm_des=~/[Aa][Mm]?/) pm = false; elsif(ampm_des=~/[Pp][Mm]?/) pm = true end end if(pm) hour = (hour % 12) + 12; else hour = hour % 12; end if(min % 5 != 0) note=sprintf("ERROR:suspicious time:'%s'",t) #print(note,"\n"); min -= min % 5 end if(!good_time) note=sprintf("ERROR:partial parse: of '%s' as %d:%02d",t,hour,min) #print (note,"\n") end else note = sprintf("ERROR:bad time:%s",t) #print(note,"\n") hour=6 min=99 end time=MyTime.new(hour,min) time.note = note time end def MyTime::make_from_intermediate_data(time_str) notes = [] # remove parse comments while(time_str =~ /(.*)\{([^{])*\}(.*)/) time_str = $1 + $3 end # collect edit comments while(time_str =~ /(.*)\[([^[]*)\](.*)/) notes.push $2 time_str = $1 + $3 end time = MyTime::make_from_s(time_str) if(!time.note.nil?) return nil end if(notes && notes.size>0) time.note = notes.join('][') else time.note = nil end time end def initialize(hour,min) @hour = hour @min = min @note = nil end def ==(x) @hour==x.hour && \ @min ==x.min && \ @note==x.note end def <=>(x) res = (((@hour+24-4)%24)<=>((x.hour+24-4)%24)) return res if(res!=0) res = @min <=> x.min return res if(res!=0) if(@note.nil?) if(x.note) return -1 else return 0 end else if(x.note) return @note<=>x.note else return 1 end end end def to_s @hour.to_s()+":"+sprintf("%02d",@min.to_s())+(@note.nil? ? "" : @note) end def movie_s ((@hour+11)%12+1).to_s()+":"+sprintf("%02d",@min.to_s())+\ (@note ? "["+@note+"]" : "") #hour.to_s()+":"+sprintf("%02d",min.to_s()) end def intermediate_s ((@hour+11)%12+1).to_s()+":"+sprintf("%02d",@min.to_s()) + (@hour<12 ? "a" : "p") + (@note ? "{"+@note+"}" : "") end def xml_s ext = "a" eff_hour = @hour if(@hour >= 12) ext = "p" eff_hour -= 12 end if(eff_hour == 0) eff_hour = 12 end sprintf("%d:%02d%s",eff_hour,@min,ext) end end class MovieTime attr_accessor :time # MyTime attr_accessor :day_class # DayClass def MovieTime::make_from_intermediate_data(time_str) movie_time = MovieTime.new() time_raw = MyTimeRaw::make_from_intermediate_data(time_str) if(time_raw.nil?) return nil end movie_time.time = time_raw.time movie_time.day_class = match_day_class(time_raw.days) movie_time end def ==(x) (time==x.time && day_class==x.day_class) end def <=>(x) res = (time <=> x.time) if(res != 0) res else day_class.days <=> x.day_class.days end end def to_s @time.movie_s()+@day_class.to_s() end def intermediate_s if(day_class.days == [0,1,2,3,4,5,6]) @time.intermediate_s() else @time.intermediate_s() + "(" + day_class.days.collect{|d| $day_abbrev[d]}.join('') + ")" end end def xml_s day_str = "" if(day_class.days != [0,1,2,3,4,5,6]) day_str = "[" + (day_class.days.collect {|dn| $day_abbrev[dn]}).join('') + "]" end note_str = "" if(!@time.note.nil?) note_str = "{"+@time.note+"}" end @time.xml_s()+day_str+note_str end end class DayClass attr_accessor :class_name attr_accessor :days attr_accessor :symbol def ==(x) (class_name == x.class_name && \ days == x.days && \ symbol = x.symbol) end # convert list of days to a string suitable for the web page def DayClass::days_to_str(days) days_brief = collapse_ranges(days) except_days=[0,1,2,3,4,5,6]-days except_days_brief = collapse_ranges(except_days) if(days_brief.size <= except_days_brief.size) day_str= (days_brief.collect {|d| if(d.is_a?(Range)) $days[d.first]+"-"+$days[d.last] else $days[d] end }).join(',') else day_str="except "+ (except_days_brief.collect {|d| if(d.is_a?(Range)) $days[d.first]+"-"+$days[d.last] else $days[d] end }).join(',') end day_str end def to_s if(@class_name!="special") @symbol else "(" + DayClass::days_to_str(@days) + ")" end end end class TheaterRaw attr_accessor :name # string attr_accessor :movies_raw # array attr_accessor :uptodate # int/string (1=yes, string=partial/unknown, -1=no) def initialize() @uptodate = "unknown dates" end def debug_print @movies_raw.each { |m| m.debug_print() } end end class MovieRaw attr_accessor :name # string attr_accessor :times_raw # array attr_accessor :notes # array def initialize @notes = [] end def debug_print print "movie name '#{@name}'\n" @times_raw.each { |t| t.debug_print } end end class MyTimeRaw attr_accessor :time # class MyTime attr_accessor :days # array def initialize(time,days) @time = time @days = days end def MyTimeRaw::make_from_intermediate_data(time_str) days = [0,1,2,3,4,5,6] if(time_str =~ /(.*)\((.*)\)(.*)/) days_str = $2 time_str = $1 + $3 days = [] days_str.each_byte {|c| days.push $day_abbrev.index(c.chr) } end time = MyTime::make_from_intermediate_data(time_str) if(time.nil?) return nil end MyTimeRaw.new(time,days) end def <=>(x) res = (time <=> x.time) if(res != 0) res else days <=> days end end def debug_print print " time ",@time," days ",@days,"\n" end end # state for conversion of a series of strings to times class ConvertTimeState # the first hour that should be considered a PM (0-11) attr_accessor :pm_start # the last hour that should be considered a PM (0-11) attr_accessor :pm_end def initialize() @pm_start=0 @pm_end=9 end end # convert a string to a time and update the conversion state appropriately def convert_str_to_time_state(t,cts) time=MyTime::make_from_s(t,cts.pm_start,cts.pm_end) if((12..23)===time.hour) # last was PM cts.pm_start = time.hour-12 cts.pm_end = 11 else #last was AM end time end $day_classes=[] day_class = DayClass.new day_class.class_name = "Late show" day_class.days = [FRI,SAT] day_class.symbol = "*" $day_classes += [day_class] day_class = DayClass.new day_class.class_name = "Matine" day_class.days = [SAT,SUN] day_class.symbol = "+" $day_classes += [day_class] #day_class = DayClass.new #day_class.class_name = "Late show special" #day_class.days = [FRI,SAT,SUN] #day_class.symbol = "%" #$day_classes += [day_class] # #day_class = DayClass.new #day_class.class_name = "Matine special" #day_class.days = [SAT,SUN,MON] #day_class.symbol = "=" #$day_classes += [day_class] day_class = DayClass.new day_class.class_name = "" day_class.days = [0,1,2,3,4,5,6] day_class.symbol = "" $day_classes += [day_class] # Find the day class that matches the array of days # Create a new day class if necessary def match_day_class(days) $day_classes.each {|dc| if(dc.days==days) return dc end } dc=DayClass.new dc.class_name="special" dc.days=days dc.symbol="" $day_classes += [dc] return dc end # read movie times from the data for CMU off of afs def read_times_cmu theater = TheaterRaw.new theater.movies_raw = [] data = open("times_cmu_cmu.txt"); first = true movie_start = times_start = loc_start = 0 begin while(line=data.gets) line.chop! if(first) line =~ /(\d{2})-(\d{2}) ([^|]+)\|([^|]+)\|([^|]+)/ print [$1, $2, $3, $4, $5].join(':'),"\n" if ($debug) movie_start = 5+1 times_start = movie_start + $3.length + 1 loc_start = times_start + $4.length + 1 first = false else date = line[0..(movie_start-1)] month=date[0..1].to_i day =date[3..4].to_i if(!(month > $start_month || (month == $start_month && day >= $start_day))) next end if(month > $end_month || (month == $end_month && day > $end_day)) next end day = day_of_week($year,month,day) name = line[movie_start..(times_start-1)].strip times = line[times_start..(loc_start-1) ].split() times = times.collect {|t| MyTime::make_from_s(t) } #times.each {|t| if(t !~ /:/) # t.concat(":00") # end} #times.each {|t| t.concat("pm")} loc = line[loc_start..-1].strip if ($debug) print line,"\n","date #{date} #{$days[day]} name #{name} loc #{loc}\n" print "times"+ (times.collect {|t| t.to_s() }).join(' ') end movie = MovieRaw.new movie.name = name #movie.times_raw= times movie.times_raw= times.collect {|t| MyTimeRaw.new(t,[day])} theater.movies_raw= theater.movies_raw + [movie]; end end ensure data.close end theater.uptodate = 1 theater end # strings that should be lowercased in titles $lower_strings=["and","the","of","in","is","as","a","an","with","to"] # Title case the given string (destructive) # Also canonicalizes the movie name # Returns new string def title_case(string) string.gsub!("(.*), [tT]he","The \\1") string.gsub!("(.*), [aA](n)?","A\\2 \\1") string_parts=string.split(' ') string_parts.each {|sp| sp.downcase! sp[0..0]=sp[0..0].upcase } if(string_parts.length > 1) string_parts[1..-1].each {|sp| if($lower_strings.find {|s| s == sp.downcase }) sp.downcase! end } end last_colon = false string_parts.each {|sp| if(last_colon) sp[0..0]=sp[0..0].upcase end last_colon = (sp =~ /:$/) } name=string_parts.join(' ') new_name=$movie_name_translations[name] if(!new_name.nil?) if($debug) print "translating '#{name}' to '#{new_name}'\n" end name=new_name end name end # Read times from a City Paper source def read_times_cp(source_file,bold_days_override,star_days_override,paren_days_override) bold_days = bold_days_override star_days = star_days_override paren_days = paren_days_override #print "parsing cp data from #{source_file}\n" theater = TheaterRaw.new theater.movies_raw = [] theater.uptodate = nil data = open(source_file); state = 3 normal_days = [0,1,2,3,4,5,6] cur_normal_days = [0,1,2,3,4,5,6] begin times = [] found_break=false while(line=data.gets) line.chop! print "state #{state} fb #{found_break} line '#{line}'\n" if ($debug) # states: # 0=looking for movie section # 1=looking for movie # 2=looking for additional movie times # 3=looking for play dates case state when 0 new_days = false case line when %r%(?:Times|Late times|Late shows) in (.*) (?:(?:are\s*(?:matinees)?\s*(?:showing|for))|show(?:\s*on)?) ([^<]*)% type = $1 days = $2 days.gsub!(/only/,"") case type when /bold/i type = "bold" when /\(\s?\)/ type = "paren" end new_days = true when %r%\*\s*(?:Late|Matinees) (?:times for|shows|show) ([^<]*)% type = "star" days = $1 days.gsub!(/only/,"") new_days = true when // state=1 times=[] end if(new_days) days = days.split("&") days = days.collect {|day| day.split(",")} days.flatten! days = days.collect {|day| day.split("/")} days.flatten! days = days.collect {|day| day.split(" and ")} days.flatten! days.each {|day| day.strip!} print "days=|#{days.join('|')}|\n" if($debug) days = days.collect {|day| aday = [day] aday = aday.collect {|ad| ad.split("-")} aday.flatten! aday = aday.collect {|ad| ad.split("thru")} aday.flatten! aday = aday.collect {|ad| ad.split("through")} aday.flatten! aday.each {|ad| ad.strip!} print "aday=|#{aday.join('|')}|\n" if($debug) aday = aday.collect {|ad| ad.upcase! ad = ad[0,3] ad = $days.index(ad) } print "aday = [#{aday.join(',')}], aday[0]=#{aday[0]}, aday[-1]=#{aday[-1]}\n" if($debug) aday = (aday[0]..aday[-1]).to_a() } days.flatten! if(type == "bold") if(bold_days_override.nil?) bold_days = days cur_bold_days = bold_days & cur_normal_days print "changing bold days to [#{bold_days.join(',')}]\n" if($debug) end elsif(type == "paren") if(paren_days_override.nil?) paren_days = days cur_paren_days = paren_days & cur_normal_days print "changing paren days to [#{paren_days.join(',')}]\n" if($debug) end elsif(type == "star") if(star_days_override.nil?) star_days = days cur_star_days = star_days & cur_normal_days print "changing star days to [#{star_days.join(',')}]\n" if($debug) end end end when 1 case line when %r%

\*?([^<]+)\s*\([^)]+\)(.*)% # movie name rating start1 start2 times cur_normal_days = normal_days cur_bold_days = bold_days & cur_normal_days cur_star_days = star_days & cur_normal_days cur_paren_days = paren_days & cur_normal_days state=2 movie_name = $1 rest=$2 if(rest =~ %r%(.*)(
)(.*)$%) extra_info=$1 br=$2 time_info=$3 found_break=true elsif(rest =~ %r%(.*)(DTS|THX)(\s*(?:[0-9].*)?)$%) extra_info=$1 br=$2 time_info=$3 found_break=true else extra_info=rest br="" time_info="" found_break=false end had_star = (movie_name =~ /^([^*]*)\s*(?:\*)?\s*$/) if(had_star) movie_name = $1 end movie_name = title_case(movie_name) if(found_break) raw_time_data = time_info else raw_time_data = "" end end when 2 emit_movie=false if(found_break) case line when %r|^(.+)
\s*$| print "extra data foundbr\n" if ($debug) raw_time_data += $1+"," when /^\s*$/ emit_movie=true when %r|^(.+)\s*$| print "extra data found\n" if ($debug) raw_time_data += $1 end else case line when %r|^.*
(.*)
\s*$| print "extra data foundbr afb\n" if ($debug) raw_time_data += $1+"," when %r|^.*
(.*)\s*$| print "extra data found afb\n" if ($debug) found_break=true raw_time_data += $1 when /^\s*$/ emit_movie=true end end if (emit_movie) state = 1 raw_time_data = " "+raw_time_data print "raw_time_data |"+raw_time_data+"|\n" if($debug) raw_time_data.gsub!(%r|(<[^>]*>)|," \\1 ") #print "raw_time_data |"+raw_time_data+"|\n" if($debug) #raw_time_data.gsub!(Regexp.new("(\[^<]*"+$month_regexp.source+"[^<]*)<\b>\) "),",DATES:\\1,") #print "raw_time_data |"+raw_time_data+"|\n" if($debug) #raw_time_data.gsub!(%r|\(([^)]+-[^)]+)\) |,",DATES:\\1,") print "raw_time_data |"+raw_time_data+"|\n" if($debug) raw_time_data.gsub!(%r|([^ /]+/[^-]+-[^/]+/[^) ]+) |,",DATES:\\1,") print "raw_time_data |"+raw_time_data+"|\n" if($debug) raw_time_data.gsub!(%r|(?!,DATES:)([A-Za-z]+-[A-Za-z]+) |,",DAYS:\\1,") print "raw_time_data |"+raw_time_data+"|\n" if($debug) raw_time_data.gsub!(%r|(?!,DATES:)\(([A-Za-z]+(?:-[A-Za-z]+)?)\)|,",DAYS:\\1,") print "raw_time_data |"+raw_time_data+"|\n" if($debug) raw_time_data.gsub!(%r|(?!,DATES:)\(([A-Za-z]+\s*&\s*[A-Za-z]+)\)|,",DAYS2:\\1,") print "raw_time_data |"+raw_time_data+"|\n" if($debug) raw_time_data.gsub!(%r||,"") print "raw_time_data |"+raw_time_data+"|\n" if($debug) raw_times = raw_time_data.split(',') raw_times.reject! {|rt| rt=~/^\s*$/} raw_times = raw_times.collect {|rt| rt.split('<') } raw_times.flatten! raw_times = raw_times.collect {|rt| rt.split('>') } raw_times.flatten! raw_times = raw_times.collect {|rt| (" "+rt+" ").split('(').join("|(|").split("|") } raw_times.flatten! raw_times = raw_times.collect {|rt| (" "+rt+" ").split(')').join("|)|").split("|") } raw_times.flatten! raw_times.each {|rt| rt.strip! } raw_times = raw_times.reject {|rt| rt=="" } print "raw_times |"+raw_times.join('|')+"|\n" if($debug) # should now have just times and b|/b stuff bold = false paren = false cts = ConvertTimeState.new() times = raw_times.collect {|rt| case rt when %r|^DATES:(.*)$| begin dates = $1.split("-") date1 = dates[0].split("/") month1 = date1[0].to_i() day1 = date1[1].to_i() date2 = dates[1].split("/") month2 = date2[0].to_i() day2 = date2[1].to_i() day_of_week1 = day_of_week($year,month1,day1) day_of_week2 = day_of_week($year,month1,day2) cur_normal_days = (day_of_week1..day_of_week2).to_a() cur_bold_days = bold_days & cur_normal_days cur_star_days = star_days & cur_normal_days cur_paren_days = paren_days & cur_normal_days cts = ConvertTimeState.new() nil end when %r|^DAYS:(.*)$| begin days_of_week = $1.split("-") if(days_of_week.size < 2) days_of_week[1] = days_of_week[0] end day_of_week1 = days_of_week[0].strip day_of_week1.upcase! day_of_week1 = day_of_week1[0,3] day_of_week1 = $days.index(day_of_week1) day_of_week2 = days_of_week[1].strip day_of_week2.upcase! day_of_week2 = day_of_week2[0,3] day_of_week2 = $days.index(day_of_week2) cur_normal_days = (day_of_week1..day_of_week2).to_a() cur_bold_days = bold_days & cur_normal_days cur_star_days = star_days & cur_normal_days cur_paren_days = paren_days & cur_normal_days cts = ConvertTimeState.new() nil end when %r|^DAYS2:(.*)$| begin days_of_week = $1.split("&") #print "days_of_week |#{days_of_week.join('|')}|\n" if(days_of_week.size < 2) days_of_week[1] = days_of_week[0] end day_of_week1 = days_of_week[0].strip day_of_week1.upcase! day_of_week1 = day_of_week1[0,3] day_of_week1 = $days.index(day_of_week1) day_of_week2 = days_of_week[1].strip day_of_week2.upcase! day_of_week2 = day_of_week2[0,3] day_of_week2 = $days.index(day_of_week2) cur_normal_days = [day_of_week1,day_of_week2] cur_bold_days = bold_days & cur_normal_days cur_star_days = star_days & cur_normal_days cur_paren_days = paren_days & cur_normal_days cts = ConvertTimeState.new() nil end when %r|^[bB]$| begin bold=true nil end when %r|^/[bB]$| begin bold=false nil end when %r|^\($| begin paren=true nil end when %r|^\)$| begin paren=false nil end else ok_time = (rt =~ /^\s*(?:DTS|THX)?([^*]*)(\*?)\s*$/) time_str = $1 star = $2 #paren = false #if(time_str =~ /^\s*\((.*)\)\s*$/) # paren = true # time_str = $1 # ok_time = (time_str =~ /^\s*(\d+):?(\d*|OO)\s?([aApP]?[mM]?)\s*$/) #end if(!ok_time) time_str = rt star="" #paren=false end time = convert_str_to_time_state(time_str,cts) print "bold #{bold} star #{star} paren #{paren}\n" if ($debug) if(bold) day = cur_bold_days if(day.nil? || day==false) day = cur_normal_days time.note = (time.note ? time.note : "") + "ERROR:bold day but no bold day header" end else if(star!="") day = cur_star_days if(day.nil? || day==false) day = cur_normal_days time.note = (time.note ? time.note : "") + "ERROR:star day but no star day header" end elsif(paren) day = cur_paren_days if(day.nil? || day==false) day = cur_normal_days time.note = (time.note ? time.note : "") + "ERROR:paren day but no paren day header" end else day = cur_normal_days end end MyTimeRaw.new(time,day) end } print "times\n" if($debug) p times if($debug) times = times.reject {|g| g==nil } movie_is_new = true old_movie = theater.movies_raw.find {|movie_raw| movie_raw.name == movie_name } if(old_movie) old_movie.times_raw += times movie = old_movie movie_is_new = false else movie = MovieRaw.new movie.name = movie_name #movie.times_raw= times movie.times_raw = times end # collapse day sets for times that are equal movie.times_raw.sort! new_times = [] last_time = nil movie.times_raw.each {|tr| if(!last_time.nil? && last_time.time == tr.time) last_time.days += tr.days else if(last_time) last_time.days.sort! new_times.push last_time end last_time = tr end } p last_time if($debug) if(last_time) last_time.days.sort! new_times.push last_time end print "new times\n" if($debug) p new_times if($debug) movie.times_raw = new_times if(movie_is_new) theater.movies_raw= theater.movies_raw + [movie]; end end when 3 if (line =~ /For the week of ([^,]*)(?:, 200[1-9])?/) dates = $1.split('-') if(dates.size == 2) date = dates[0] start_date_month = start_date_day = -1 end_date_month = end_date_day = -1 if(date =~ Regexp.new("^\s*("+$month_regexp.source+"\S*)\s+([0-9]+)\s*$")) #print "$&='#{$&}'\n" start_date_month = $1.downcase start_date_month = start_date_month[0,3] start_date_month.capitalize! print "month '#{start_date_month}' day '#{$2}'\n" if($debug) start_date_month = $months.index(start_date_month) start_date_month+=1 if(start_date_month) start_date_day = $2.to_i print "month #{start_date_month} day #{start_date_day}\n" if($debug) end date = dates[1] if(date =~ Regexp.new("^\s*("+$month_regexp.source+"\S*)\s+([0-9]+)\s*$")) #print "$&='#{$&}'\n" end_date_month = $1.downcase end_date_month = end_date_month[0,3] end_date_month.capitalize! print "month '#{end_date_month}' day '#{$2}'\n" if($debug) end_date_month = $months.index(end_date_month) end_date_month+=1 if(end_date_month) end_date_day = $2.to_i print "month #{end_date_month} day #{end_date_day}\n" if($debug) elsif(date =~ /^\s*([0-9]+)\s*$/) end_date_month = start_date_month end_date_day = $1.to_i print "month #{end_date_month} day #{end_date_day}\n" if($debug) end if(start_date_month != -1 && end_date_month != -1) start_overlap = (start_date_month < $end_month || (start_date_month == $end_month && start_date_day <= $end_day)) start_before = (start_date_month < $start_month || (start_date_month == $start_month && start_date_day <= $start_day)) end_overlap = (end_date_month > $start_month || (end_date_month == $start_month && end_date_day >= $start_day)) end_after = (end_date_month > $end_month || (end_date_month == $end_month && end_date_day >= $end_day)) if(start_before && end_after) theater.uptodate = 1 elsif (start_overlap && end_overlap) theater.uptodate = sprintf("times for %d/%d to %d/%d", start_date_month,start_date_day, end_date_month, end_date_day) start_day_of_week=day_of_week($year,start_date_month,start_date_day) end_day_of_week =day_of_week($year,end_date_month, end_date_day) days_total = (start_day_of_week .. end_day_of_week).to_a() normal_days &= days_total cur_normal_days = normal_days cur_bold_days = bold_days & cur_normal_days cur_star_days = star_days & cur_normal_days cur_paren_days = paren_days & cur_normal_days else theater.uptodate = -1 end else # missing start or end date if(theater.movies_raw.size > 0) theater.uptodate = "Couldn't parse dates\n" else theater.uptodate = -1 end end else # wrong number of dates in range if(theater.movies_raw.size > 0) theater.uptodate = "Wrong number of dates\n" else theater.uptodate = -1 end end state = 0 end end end ensure data.close end # never found dates if(theater.uptodate.nil?) if(theater.movies_raw.size > 0) theater.uptodate = "Couldn't find dates\n" else theater.uptodate = -1 end end theater end # Read times from Yahoo source def read_times_yahoo(source_files,theater_name) #print "parsing yahoo data from [#{source_files.join(' ')}]\n" days_covered = [] theater = TheaterRaw.new theater.movies_raw = [] old_movie = nil old_times = nil source_files.each_index {|i| found_a_tag = false theater_regexp = nil source_file=source_files[i] days = [(i)%7] # keeps track of whether any movies had actual times for this theater found_real_times = false #print "parsing yahoo data from #{source_file}\n" data = open(source_file); date_day_of_week = nil state = 2 begin times = [] while(line=data.gets) line.chop! print "state #{state} line '#{line}'\n" if ($debug) # states: # 0=looking for movie section # 1=looking for movie # 2=looking for theater accelerator # 3=looking for date case state when 0 if (line =~ theater_regexp) state=1 times=[] end when 1 case line when // state=0 when %r|([^<]+).*
.*]* size="-1">(.*)| state=1 movie_name = $1 raw_time_data = $2 if(movie_name=~/(.*), The/) movie_name="The "+$1 end movie_name = title_case(movie_name) old_movie = theater.movies_raw.find {|m| m.name==movie_name} old_times = old_movie ? old_movie.times_raw : nil emit_movie=true end when 2 if(line =~ %r|([^<]*)|) found_a_tag = true if(theater_regexp.nil?) name = $2 if(name == theater_name) theater_tag = $1 theater_regexp=Regexp.new("") state = 3 end else state = 3 end elsif(line=~%r|| && found_a_tag) print "Couldn't find tag for theater name '#{theater_name}', aborting\n" exit(2) end when 3 if(line =~ %r|]*>([^,]+),\s*(200[1-9])\s*|) date = $1 year = $2.to_i() if(date =~ Regexp.new("^\s*("+$month_regexp.source+"\S*)\s+([0-9]+)\s*$")) #print "$&='#{$&}'\n" date_month = $1.downcase date_month = date_month[0,3] date_month.capitalize! print "month '#{date_month}' day '#{$2}'\n" if($debug) date_month = $months.index(date_month) date_month+=1 if(date_month) date_day = $2.to_i print "month #{date_month} day #{date_day}\n" if($debug) date_day_of_week = day_of_week(year,date_month,date_day) if(($start_month < date_month || ($start_month == date_month && $start_day <= date_day)) && ($end_month > date_month || ($end_month == date_month && $end_day >= date_day))) print "days_covered #{days_covered.join(' ')}\n" if($debug) else date_day_of_week = nil end end state = 0 end end if (emit_movie) emit_movie=false state = 1 print "raw_time_data |"+raw_time_data+"|\n" if $debug # remove html anchors, saving text of link raw_time_data.gsub!(%r|]*>([^<>]*)|,"\\1") print "raw_time_data |"+raw_time_data+"|\n" if $debug # remove non-breakable spaces and parenthesis raw_time_data.gsub!(/ |\(|\)/,"") print "raw_time_data |"+raw_time_data+"|\n" if $debug # replace & with & raw_time_data.gsub!(/&/,"&") print "raw_time_data |"+raw_time_data+"|\n" if $debug # remove asterisks #raw_time_data.gsub!(/\*\*\*\*\*/,"") raw_time_data.gsub!(/\*/,"") print "raw_time_data |"+raw_time_data+"|\n" if $debug # remove "Theater has not yet announced times." raw_time_data.gsub!(/Theater has not yet announced times\./,"") print "raw_time_data |"+raw_time_data+"|\n" if $debug # replace html tags with spaces raw_time_data.gsub!(/<[^>]*>/," ") print "raw_time_data |"+raw_time_data+"|\n" if $debug # remove "Starts on weekday, month day Click for weekday's Showtimes (& Tickets)? raw_time_data.gsub!(/^\s*Starts on .*Click for .* Showtimes\s*(&\s*Tickets)?\s*$/,"") print "raw_time_data |"+raw_time_data+"|\n" if $debug raw_times = raw_time_data.split(',') raw_times = raw_times.reject {|rt| rt=="" } print "raw_times |"+raw_times.join('|')+"|\n" if $debug # should now have just times and b|/b stuff bold = false times = [] cts = ConvertTimeState.new() if(raw_times.size > 0) found_real_times = true end raw_times.each {|rt| time = convert_str_to_time_state(rt,cts) old_time = old_times ? old_times.find {|t| t.time==time} : nil if(old_time) old_time.days += days old_time.days.sort! else times.push MyTimeRaw.new(time,days) end } print "times\n" if($debug) p times if($debug) if(!old_movie) movie = MovieRaw.new movie.name = movie_name movie.times_raw = times movie.times_raw.sort! {|a,b| a.time <=> b.time } theater.movies_raw= theater.movies_raw + [movie]; else old_movie.times_raw += times old_movie.times_raw.sort! {|a,b| a.time <=> b.time } end end end ensure data.close end if(found_real_times) days_covered.push date_day_of_week end } days_covered_str = "["+days_covered.join(',')+"]" if(days_covered_str == "[0,1,2,3,4,5,6]") if(theater.movies_raw != []) theater.uptodate = 1 else theater.uptodate = -1 end elsif(days_covered_str == "[]") theater.uptodate = -1 else theater.uptodate = days_covered_str end theater end # Read times from Carmike source def read_times_carmike(source_file_base) theater = TheaterRaw.new theater.movies_raw = [] source_files = $days.collect {|day| source_file_base + day.downcase + ".html" } days_covered = [] source_files.each_index {|i| source_file=source_files[i] #print "parsing carmike data from #{source_file}\n" data = File.open(source_file).gets(nil); parser = HTMLTree::XMLParser.new(false,true) parser.feed(data) parser.close() doc = parser.document #doc.write($stdout,1); print "\n" movie_root = doc.root.elements["//a[@name='Showtimes']"] movie_root = movie_root.parent #movie_root.write($stdout,1); print "\n" date_str = get_all_text(movie_root.elements["./div/strong"]) print "date #{date_str}\n" if($debug) date = MyDate.new(date_str) p date if($debug) print date.date.to_s,"\n" if($debug) day_of_week = date.day_of_week if(day_of_week != i) raise InternalError.new("day of week #{day_of_week} and file name do not match #{source_file} (#{i})") end days = [day_of_week] if($start_date <= date.date && date.date <= $end_date) times_root = movie_root.elements["table"] begin; times_root.write($stdout,1); print "\n"; end if($debug) times_root.each_element("./tr") { |tr| if(!(movie_name_el=tr.elements["./td/table//strong"]).nil?) movie_name = get_all_text(movie_name_el) movie_name = title_case(movie_name) old_movie = theater.movies_raw.find {|m| m.name==movie_name} old_times = old_movie ? old_movie.times_raw : nil movie_times_el = tr.get_elements("./td/font")[1] movie_times_raw = get_all_text_arr(movie_times_el) print "movie_name #{movie_name}\n" if($debug) print "movie_times_raw #{movie_times_raw.join('|')}\n" if($debug) times = [] cts = ConvertTimeState.new() movie_times_raw.each {|rt| time = convert_str_to_time_state(rt,cts) old_time = old_times ? old_times.find {|t| t.time==time} : nil if(old_time) old_time.days += days old_time.days.sort! else times.push MyTimeRaw.new(time,days) end } if(!old_movie) movie = MovieRaw.new movie.name = movie_name movie.times_raw = times movie.times_raw.sort! {|a,b| a.time <=> b.time } theater.movies_raw += [movie]; else old_movie.times_raw += times old_movie.times_raw.sort! {|a,b| a.time <=> b.time } end end } days_covered += days end } days_covered_str = "["+days_covered.join(',')+"]" if(days_covered_str == "[0,1,2,3,4,5,6]") if(theater.movies_raw != []) theater.uptodate = 1 else theater.uptodate = -1 end elsif(days_covered_str == "[]") theater.uptodate = -1 else theater.uptodate = days_covered_str end theater end #def find_seperate_root(el1,el2) # els1 = [] # while(el1!=nil) # els1.push el1 # el1 = el1.parent # end # els1.reverse! # # els2 = [] # while(el2!=nil) # els2.push el2 # el2 = el2.parent # end # els2.reverse! # # el_idx=0 # while(el_idx0) times.push text[pos..loc-1] end pos=loc if(loc!=nil) spos=loc+1 if(loc!=nil) end times.push text[pos..-1] #print "mid1 '#{times.join('%')}'\n" times.collect! {|t| t.split('#')} #print "mid2 '#{times.join('%')}'\n" times.flatten! #print "mid3 '#{times.join('%')}'\n" times.each {|t| t.strip!} #print "mid4 '#{times.join('%')}'\n" times.collect! {|t| t=="" ? nil : t} #print "mid5 '#{times.join('%')}'\n" times.compact! #print "mid6 '#{times.join('%')}'\n" # join adjacent days with a - between them i=0 while(i 1) for i in (0..(t2.size-2)) t2[i]+="pm" end end t2[-1].strip! t2.collect! {|tt| tt=="" ? nil : tt} t2.compact! t2 } times.flatten! #print "mid8 '#{times.join('%')}'\n" # split text where there are @ symbols times.collect! {|t| t2=(t+" ").split(/@/) if(t2.size > 1) for i in (1..(t2.size-1)) t2[i]="@"+t2[i] end end t2[-1].strip! t2.collect! {|tt| tt=="" ? nil : tt} t2.compact! t2 } times.flatten! times.each {|t| t.strip!} #print "mid9 '#{times.join('%')}'\n" # join text when a day is followed by a month i=0 while(i 2) note=sprintf("ERROR:didn't understand dates '%s'",dates_arr.join('-')) notes.push note #print(note,"\n") dates_arr[2..-1]=nil elsif(dates_arr.size == 1) dates_arr.push dates_arr[0] end index = 0 times_days = dates_arr.collect { |date| orig_date = date day_of_week = nil date_month = nil date_day = nil overlap = true if(date =~ Regexp.new("^\s*("+$day_regexp.source+"\S*)\s*,?")) length = $&.size day_of_week = $1.upcase day_of_week = day_of_week[0,3] day_of_week = $days.index(day_of_week) date = date[length..-1] end #if(date =~ Regexp.new("^\s*("+$month_regexp.source+"\S*)\s*(\d+)")) if(date =~ Regexp.new("^\s*("+$month_regexp.source+"\S*)\s+([0-9]+)")) #print "$&='#{$&}'\n" length = $&.size date_month = $1.downcase date_month = date_month[0,3] date_month.capitalize! print "month '#{date_month}' day '#{$2}'\n" if($debug) date_month = $months.index(date_month) date_month+=1 if(date_month) date_day = $2.to_i print "month #{date_month} day #{date_day}\n" if($debug) date_day_of_week = day_of_week($year,date_month,date_day) if(date_day_of_week != day_of_week) note=sprintf("ERROR:Day of week doesn't match date for '%s' "+ "(%d vs. %d)",orig_date,day_of_week,date_day_of_week); notes.push note #print(note,"\n"); end if(index==0) if(date_month < $start_month || (date_month==$start_month && date_day<$start_day)) date_month = $start_month date_day = $start_day date_day_of_week = day_of_week($year,date_month,date_day) end overlap = (date_month < $end_month || (date_month==$end_month && date_day<=$end_day)) else if(date_month > $end_month || (date_month==$end_month && date_day>$end_day)) date_month = $end_month date_day = $end_day date_day_of_week = day_of_week($year,date_month,date_day) end overlap = (date_month > $start_month || (date_month==$start_month && date_day>=$start_day)) end day_of_week = date_day_of_week date = date[length..-1] end if(date=~/(\S+)/) first_part_of_date = orig_date[0,orig_date.length-date.length] note=sprintf("ERROR:unrecognized part[] of date '%s[%s]'",first_part_of_date,date) notes.push(note) #print(note,"\n") end if(overlap && !day_of_week) note=sprintf("ERROR:couldn't detmerine day of week for '%s'",orig_date) notes.push note #print(note,"\n") day_of_week = 6 end index+=1 #print "day of week is '#{day_of_week}'\n" if(overlap) day_of_week else -1 end } #print "days are [#{days.join(',')}]\n" if($debug) if(times_days.member?(-1)) ddays = [] else ddays = (times_days[0]..times_days[-1]).to_a end print "ddays are [#{ddays.join(',')}]\n" if($debug) ddays } big_dates_arr.flatten! big_dates_arr.sort! days = big_dates_arr print "days are [#{days.join(',')}]\n" if($debug) convert_time_state = ConvertTimeState.new else # parse times #print "found times days=[#{days.collect{|d|d.to_s}.join(',')}]\n"; times = this_times_text.split(/@|,/) times.each {|t| t.strip!} times.reject! {|t| t==""} times.each {|time| time_struct = convert_str_to_time_state(time,convert_time_state) if(days.size > 0) times_arr.push [time_struct, days] end } end end times_arr.sort! {|atd,btd| res=(atd[0]<=>btd[0]) if(res!=0) res else if(atd[1][0]!=nil && btd[1][0]!=nil) atd[1][0]<=>btd[1][0] else 0 end end } times_arr_idx=0 while(times_arr_idx+1 $end_month || (month2 == $end_month && day2 > $end_day)) month2 = $end_month day2 = $end_day end start_overlap = (month1 < $end_month || (month1 == $end_month && day1 <= $end_day)) start_equal = (month1 == $start_month && day1 == $start_day) end_overlap = (month2 > $start_month || (month2 == $start_month && day2 >= $start_day)) end_equal = (month2 == $end_month && day2 == $end_day) if(start_equal && end_equal) uptodate = 1 elsif (start_overlap && end_overlap) uptodate = sprintf("times for %d/%d to %d/%d", month1,day1, month2,day2) else uptodate = -1 end end end ensure if(file) file.close() end end #print "parsing filmmakers data from #{source_file}\n" #print "snarfing file\n" file = File.new(source_file) data = file.gets(nil) file.close() #print "subbing\n" # remove crap data.gsub!(%r%%,""); data.gsub!(/[ \t]+/," "); #data.gsub!(/[ \t]+\n/,""); if($debug) dfile = File.new("fm.html","w") printf(dfile,"%s",data) dfile.close() end html_parser = HTMLTree::XMLParser.new(false,true) html_parser.feed(data) doc = html_parser.document if($debug) print "\ndoc\n" doc.write($stdout,0) print "\n" end harris_a_node = doc.root.elements["//a[@name='harris']"] movie_root = harris_a_node.parent.parent print "\nmr\n" if($debug) movie_root.write($stdout,0) if($debug) print "\n" if($debug) s_no_theater,s_found_harris,s_found_regent,s_found_melwood \ = (0..4).to_a() ms_no_movie,ms_movie \ = (0..1).to_a() theaters = [] movie = nil state = s_no_theater mini_state = ms_no_movie theater = nil end_of_movies = false movie_root.elements.each("*") {|el| a_theater = nil h_theater = nil if(el.attributes["name"] == "harris") h_theater = el else h_theater = el.elements["a[@name='harris']"] end a_theater = h_theater if h_theater r_theater = nil if(el.attributes["name"] == "regent") r_theater = el else r_theater = el.elements["a[@name='regent']"] end a_theater = r_theater if r_theater m_theater = nil if(el.attributes["name"] == "melwood") m_theater = el else m_theater = el.elements["a[@name='melwood']"] end m_theater = el.elements["a[@name='melwood']"] a_theater = m_theater if m_theater if(a_theater) ntheater = TheaterRaw.new ntheater.name = a_theater.attributes["name"] print "theater #{ntheater.name}\n" if($debug) case ntheater.name when /harris/ state=s_found_harris when /regent/ state=s_found_regent when /melwood/ state=s_found_melwood end theater.movies_raw.push movie if(movie!=nil) movie = nil theaters.push theater if(theater!=nil) theater = ntheater theater.movies_raw = [] mini_state = ms_no_movie end u_gallery = el.elements["*//u"] if(u_gallery) u_gallery_text = get_all_text(u_gallery) print "u_gallery = ",u_gallery_text,"\n" if($debug) if(u_gallery_text =~ /photography/) theater.movies_raw.push movie if(movie!=nil) movie = nil theaters.push theater if(theater!=nil) theater = nil mini_state = ms_no_movie state=s_no_theater end_of_movies = true print "found end of movies\n" if($debug) end end if(!end_of_movies) i_movie = el.elements[".//font[@size='6']//i"] i_movie = el.elements[".//i//font[@size='6']"] if(!i_movie) i_movie = el.elements[".//font[@size='7']//i"] if(!i_movie) i_movie = el.elements[".//i//font[@size='7']"] if(!i_movie) i_movie_text = get_all_text(i_movie) if(theater && !i_movie.nil? && i_movie_text!=nil) theater.movies_raw.push movie if(movie!=nil) print "i_movie\n" if($debug) i_movie.write($stdout) if($debug) movie_name = i_movie_text movie_name.strip! movie_name = title_case(movie_name) print "movie found #{movie_name}\n" if($debug) movie = MovieRaw.new movie.name = movie_name movie.times_raw = [] end el.elements.each(".//b/font[@size='4']") {|r_time| text = [] size = 0 text_node = r_time.get_text while(text_node) text.push text_node.to_s() if(text_node.kind_of? REXML::Text) text_node = text_node.next_sibling end movie.times_raw += text if(movie) } end } theater.movies_raw.push movie if(theater!=nil && movie!=nil) theaters.push theater if(theater!=nil) theaters.each {|theater| theater.movies_raw = theater.movies_raw.collect {|movie_raw| vals = convert_filmmakers_raw_to_times(movie_raw.times_raw) times = vals[0] notes = vals[1] movie_raw.times_raw = times movie_raw.notes = notes if(times.size > 0 || notes.size > 0) movie_raw else nil end } theater.movies_raw.reject! {|movie_raw| movie_raw.nil? } if(uptodate) theater.uptodate = uptodate end } theaters end yahoo_movie_times = \ [ "times_yahoo_fri.html",\ "times_yahoo_sat.html",\ "times_yahoo_sun.html",\ "times_yahoo_mon.html",\ "times_yahoo_tue.html",\ "times_yahoo_wed.html",\ "times_yahoo_thu.html",\ ] # format for day overrides is replace nil with [FRI,SAT,TUE,WED] or whatever days you need in order # starting with FRI # cm debug weight theater name # fm debug weight theater name anchor name # cp debug weight theater name source file name overrides for bold days, star days, () days # yh debug weight theater name yahoo theater name # ck debug weight theater name theater_parms=\ [ \ ["cm", false, 1.0, "CMU", [] ],\ ["yh", false, 1.1, "Manor", ["Cinemagic Manor Theatre" ] ],\ ["yh", false, 1.1, "Squill", ["Cinemagic Squirrel Hill" ] ],\ ["fm", false, 1.1, "Harris", ["harris" ] ],\ ["yh", false, 1.0, "Harris", ["Harris Theatre" ] ],\ ["fm", false, 1.1, "Melwood", ["melwood"] ],\ ["yh", false, 1.0, "Melwood", ["Melwood Screening Room" ] ],\ ["fm", false, 1.1, "Regent", ["regent" ] ],\ ["yh", false, 1.0, "Regent", ["Regent Square Theatre" ] ],\ ["cp", false, 1.1, "Waterworks", ["times_cp_waterworks.html" , nil, nil, nil] ],\ ["yh", false, 1.0, "Waterworks", ["Waterworks Cinemas"] ],\ ["cp", false, 1.1, "LoewsWater", ["times_cp_loewsWATER.html" , nil, nil, nil] ],\ ["yh", false, 1.0, "LoewsWater", ["Loews Waterfront Theatre" ] ],\ ["yh", false, 1.0, "Denis", ["Cinemagic Denis"] ],\ ["ck", false, 2.3, "Galleria6", ["times_carmike_galleria6_"] ], \ ["yh", false, 1.1, "Galleria6", ["Carmike Galleria 6"] ],\ ["cp", false, 1.0, "Galleria6", ["times_cp_carmikeGAL.html" , nil, nil, nil] ],\ ["ck", false, 2.3, "Southland9", ["times_carmike_southland9_"] ], \ ["yh", false, 1.1, "Southland9", ["Carmike Southland 9"] ],\ ["cp", false, 1.0, "Southland9", ["times_cp_carmikeSOUTH.html" , nil, nil, nil] ],\ ["cp", false, 1.1, "Destinta20BV", ["times_cp_destinta20B.html" , nil, nil, nil] ],\ ["yh", false, 1.0, "Destinta20BV", ["Destinta Theatres - Chartiers Valley 20"] ],\ ["yh", false, 1.1, "ShowcaseEast", ["Showcase Cinemas Pittsburgh East" ] ],\ ["cp", false, 1.0, "ShowcaseEast", ["times_cp_showcaseE.html" , nil, nil, nil] ],\ ["yh", false, 1.1, "ShowcaseWest", ["Showcase Cinemas Pittsburgh West"] ],\ ["cp", false, 1.0, "ShowcaseWest", ["times_cp_showcaseW.html" , nil, nil, nil] ],\ ["yh", false, 1.1, "ShowcaseNorth", ["Showcase Cinemas Pittsburgh North"] ],\ ["cp", false, 1.0, "ShowcaseNorth", ["times_cp_showcaseN.html" , nil, nil, nil] ],\ ["yh", false, 1.1, "Northway8", ["Northway Mall Cinemas 8"] ],\ ["cp", false, 1.0, "Northway8", ["times_cp_northway8.html", nil, nil, nil] ],\ #["yh", false, 1.0, "Bellevue", ["Cinemagic Bellevue"] ],\ #dead? ["cp", false, 1.1, "Destinta22NV", ["times_cp_destinta22NV.html" , nil, nil, nil] ],\ ["yh", false, 1.0, "Destinta22NV", ["Destinta Theatres - Plaza East 22"] ],\ ["ck", false, 2.3, "Carmike10", ["times_carmike_carmike10_"] ], \ ["cp", false, 1.1, "Carmike10", ["times_cp_carmike10.html" , nil, nil, nil] ],\ ["yh", false, 1.0, "Carmike10", ["Carmike 10 - Pittsburgh"] ],\ ["ck", false, 2.3, "MaxiSavers", ["times_carmike_maxisavers12_"] ], \ ["cp", false, 1.1, "MaxiSavers", ["times_cp_carmikeMAX.html" , nil, nil, nil] ],\ ["yh", false, 1.0, "MaxiSavers", ["Carmike Maxi Saver 12" ] ],\ ["yh", false, 1.0, "CargSciCtr", ["Omnimax Theatre - Carnegie Science Center" ] ],\ ] fm_theaters = nil # merge movie times from all sources for the movie name indicated def merge_movies(gtheaters, movie_name) movie_common = Movie.new movie_common.name = movie_name movie_common.times = [] movie_common.notes = [] print "merging movie '#{movie_common.name}'\n" if($debug) theater_movies = gtheaters.collect {|theater| tmovie = theater.movies.find {|tmovie| tmovie.name == movie_name } if(tmovie) [theater,tmovie] else [theater,nil] end } theater_movies.each {|tm| if(tm[1]) tm[1].times.sort! {|a,b| a<=>b } end } if($debug) print "theater_movies\n" p theater_movies.collect{|tm| tm[1]} end prev_time = nil # calculate common votes_for = 0.0 votes_against = 0.0 theater_movies.each_with_index {|tm,idx| weight = tm[0].weight if(tm[1]) votes_for += weight else votes_against += weight end } if(votes_for >= votes_against) # movie voted in begin first_times = theater_movies.collect {|tm| if(tm[1]) if(prev_time) tm[1].times.find {|time| (prev_time <=> time) == -1 } else tm[1].times[0] end else nil end } first_times.reject! {|time| time.nil?} times_left = (first_times.size != 0) if(times_left) first_times.sort! {|a,b| a<=>b} first_time = first_times[0] prev_time = first_time print "first_time='#{first_time}'\n" if($debug) weight_times = theater_movies.collect {|tm| [tm[0].weight, if(tm[1]) time = tm[1].times.find {|t| t==first_time} else nil end ] } p times if($debug) votes_for = 0.0 votes_against = 0.0 for i in 0..(weight_times.size()-1) weight = weight_times[i][0] if(weight_times[i][1]) votes_for += weight else votes_against += weight end end print "votes for:#{votes_for} against:#{votes_against}\n" if($debug) if(votes_for >= votes_against) print "adding '#{first_time}' to common times\n" if($debug) movie_common.times.push first_time end end end while(times_left) else # movie voted down movie_common = nil end # calculate diffs movie_diffs = [] theater_movies.each {|tm| if(tm[1]) movie_diff = Movie.new movie_diff.name = movie_name movie_diff.times = [] movie_diff.notes = [tm[0].source+":adds"] movie_diff.notes += tm[1].notes if(movie_common) movie_diff.times = tm[1].times - movie_common.times if($debug) print "this times\n" p tm[1].times print "common times\n" p movie_common.times print "diff times\n" p movie_diff.times end else movie_diff.notes.push "entire movie" movie_diff.times = tm[1].times end if(movie_diff.times.size > 0 || tm[1].notes.size > 0) movie_diffs.push(movie_diff) end end if(movie_common) movie_diff = Movie.new movie_diff.name = movie_name movie_diff.times = [] movie_diff.notes = [tm[0].source+":dels"] if(tm[1]) movie_diff.notes += tm[1].notes movie_diff.times = movie_common.times - tm[1].times if(movie_diff.times.size > 0) movie_diffs.push(movie_diff) end if($debug) print "common times\n" p movie_common.times print "this times\n" p tm[1].times print "diff times\n" p movie_diff.times end else movie_diff.notes.push "entire movie" movie_diffs.push(movie_diff) end end } [movie_common, movie_diffs ] end # merge data from different theater sources # generates merged version and differences between versions def merge_theaters(theaters) gtheaters = theaters.reject {|theater| theater.uptodate == -1 } p gtheaters if($debug) movies = gtheaters.collect {|theater| theater.movies } movies = movies.flatten movie_names = movies.collect {|movie| movie.name } movie_names.sort! {|a,b| a <=> b } movie_names.uniq! theater_common = Theater.new() theater_common.source = (gtheaters.collect {|theater| theater.source}).join(",") theater_common.uptodate = 1 theater_common.name = theaters[0].name theater_common.movies = [] theater_common.weight = 1 theater_diff = Theater.new() theater_diff.source = "diff" theater_diff.uptodate = 1 theater_diff.name = theaters[0].name theater_diff.movies = [] theater_diff.weight = 1 movie_names.each {|movie_name| vals=merge_movies(gtheaters,movie_name) movies_common = vals[0] movies_diffs = vals[1] if(movies_common) theater_common.movies.push movies_common end if(!movies_diffs.nil?) theater_diff.movies += movies_diffs end } [theater_common, theater_diff] end if($mode == $MODE_GRAB) # read config file config_file = File.new("movie.cfg") config_data = config_file.gets(nil) config_file.close() config_doc = REXML::Document.new config_data config_doc.elements.each("translations/translate") { |trans| print "adding translation from '#{trans.attributes["pre"]}' to '#{trans.attributes["post"]}'\n" $movie_name_translations[trans.attributes["pre"]] = trans.attributes["post"] } config_doc = nil config_data = nil theater = nil theaters = theater_parms.collect {|tp| print "parsing theater #{tp[3]}:#{tp[0]}\n" old_debug=$debug $debug = tp[1] parser_args = tp[4] case tp[0] when "ck" theater_raw=read_times_carmike(parser_args[0]) theater_raw.debug_print() if ($debug) theater = Theater.new theater.movies_from_raw(theater_raw.movies_raw) theater.uptodate = theater_raw.uptodate when "cm" theater_raw=read_times_cmu theater_raw.debug_print() if ($debug) theater = Theater.new theater.movies_from_raw(theater_raw.movies_raw) theater.sort = false theater.uptodate = theater_raw.uptodate when "cp" theater_raw=read_times_cp(parser_args[0],parser_args[1],parser_args[2],parser_args[3]) theater_raw.debug_print() if ($debug) theater = Theater.new theater.movies_from_raw(theater_raw.movies_raw) theater.uptodate = theater_raw.uptodate when "yh" theater_raw=read_times_yahoo(yahoo_movie_times,parser_args[0]) theater_raw.debug_print() if ($debug) theater = Theater.new theater.movies_from_raw(theater_raw.movies_raw) theater.uptodate = theater_raw.uptodate when "fm" if(!fm_theaters) fm_theaters_raw=read_times_filmmakers("times_filmmakers.html","times_filmmakers.txt") fm_theaters = fm_theaters_raw.collect {|theater_raw| #theater_raw.debug_print() if ($debug) theater = Theater.new theater.name = theater_raw.name theater.movies_from_raw(theater_raw.movies_raw) theater.uptodate = theater_raw.uptodate theater } end theater_name = parser_args[0] theater = fm_theaters.find {|th| th.name == theater_name } end $debug=old_debug theater.name = tp[3] theater.source = tp[0] theater.weight = tp[2] theater } # output parsed data out_file_ind = File.open("output_ind.old","w") theaters.each {|theater| print "outputting theater #{theater.name}:#{theater.source}\n" if($debug) theater.print_formatted(out_file_ind) } out_file_ind.close() # output parsed data in intermediate format out_file_ind = File.open("output_ind","w") theaters.each {|theater| print "outputting theater #{theater.name}:#{theater.source}\n" if($debug) theater.print_intermediate(out_file_ind) } out_file_ind.close() #$debug=true print "about to merge theaters\n" # merge theater data from different sources merged_theaters=[] i=0 while(i