# FR statistics script.
# The script performs the obvious task of reading elements.xml and
# printing the number of items in each category to be reviewed on
# the given date (default: tomorrow).
# However, it also provides a statistics history.  It does this by
# always noting the maximum number of items to be reviewed on the
# given date, and if that maximum be the greatest recorded so far,
# it overwrites the historical data.  Thus, to ensure that the
# history is always accurate, the script should be run immediately
# after reviews have been completed on a given day and FR has been
# closed.
# Optional arguments:
# 	-a n	show statistics n days ahead (default 1, i.e., tomorrow)
# 	-s	summarise, i.e., don't show categories of <1% of total
# Thus, one can specify a negative argument to -a to see how many
# reviews were performed in the past
# Comments etc. to michael at the domain of landcroft dot co
# dot uk

require "rexml/document"
require "rexml/streamlistener"
require "time"

SecsInDay = 24 * 60 * 60

StatsDir = "stats/"

def hash_total(the_hash)
	the_total = 0
	the_hash.each_value {|count|
		the_total += count
	}
	the_total
end

def show_figures(total, the_hash, totals, day_num)
	threshold = total / $show_minor
	oddments = 0
	the_hash.each_pair {|category, count|
		if count > threshold then
			print sprintf("%5.0d%6.0d%6.2f ", count, totals[category], 100.0*count/totals[category]), category, "\n"
		else
			oddments += count
		end
	}
	print sprintf("%5.0d TOTAL (%d oddments) on day %d\n", total, oddments, day_num)
end

class Listener

	include REXML::StreamListener

	def tag_start(name, attributes)
		case name
		when "fullrecall"
			@version = attributes["core_version"].to_i
			if not(@version == 10 or @version == 12) then
				warn "Unknown version;  results not guaranteed"
			end
			current = Time.now.getlocal
			@reqd_day = ((current.to_i+current.gmt_offset) / SecsInDay) - (attributes["time_of_start"].to_i / SecsInDay).round + $advance
		when "q", "a", "Q", "A"
			# nothing
		when "category"
			if @version == 12 then
				@category << attributes["name"]
			end
		when "item"
			category =
				case @version
				when 10
					attributes["ctgr"]
				when 12
					@category.join(" / ")
				end
			time_attr =
				case @version
				when 10
					"tm_t_rpt"
				when 12
					"tmtrpt"
				end
			if attributes[time_attr].to_i == @reqd_day then
				@count[category] = @count[category] + 1
			end
			@total[category] = @total[category] + 1
		else
			warn "Unexpected tag <" + name + ">"
		end
	end

	def tag_end(name)
		case name
		when "category"
			if @version == 12 then
				@category.pop
			end
		when "fullrecall"
			# Pull historical figures, if possible
			stats_file = StatsDir + @reqd_day.to_s
			historical = Hash.new
			if File.exists?(stats_file) then
				File.new(stats_file, "r").each {|line|
					parts = line.split(/:/, 2)
					historical[parts[1].chomp] = parts[0].to_i
				}
			end
			hist_total = hash_total(historical)
			cnt_total = hash_total(@count)
			if hist_total >= cnt_total then
				# More from history --- use historical figures
				show_figures(hist_total, historical, @total, @reqd_day)
			else
				# More from current --- use current figures
				show_figures(cnt_total, @count, @total, @reqd_day)
				writeback = File.new(stats_file, "w")
				@count.each_pair {|category, count|
					writeback.print count, ":", category, "\n"
				}
			end
		end
	end

	def initialize(*argument)
		@category = [ ]
		@count = { }
		@count.default = 0
		@total = { }
		@total.default = 0
	end
end

$show_minor = 1.0
$advance = 1

while ARGV.length > 0 do
	case ARGV.shift
	when '-a'
		$advance = ARGV.shift.to_i
	when '-s'
		$show_minor = 100.0
	when '-p'
		$show_minor = 1.0 / ARGV.shift.to_f
	end
end

REXML::Parsers::StreamParser.new(File.new("elements.xml", "r"), Listener.new).parse
