class MarkovDB def initialize() @prefixes={} @leadcount=2 self.reset_context() end def reset_context() @w_lc=0 @w_lead=[] end def add_file(filename,weight) File.open(filename,"rb") do |f| while line=f.gets add_chunk_weight(line,weight) end end end def add_chunk(text) add_chunk_weight(text,1) end def add_chunk_weight(text,weight) #words=text.split(/[\s,\.:;"'?!\-]+/) words=text.scan(/(?:(?:[a-zA-Z0-9]\'[a-zA-Z0-9])|[a-zA-Z0-9])+/) #words=text.scan(/[a-zA-Z0-9]+/) #puts words[0].to_s words.each do |word| word.downcase! if(@w_lc < @leadcount) @w_lead[@w_lc]=word @w_lc += 1 else leadstring=@w_lead.join(" ") if(@prefixes[leadstring] == nil) @prefixes[leadstring]=MarkovPrefix.new end @prefixes[leadstring].add_word(word,weight) #puts "Adding: " + leadstring + " => " + word @w_lead.shift @w_lead[@w_lc - 1]=word end end end def generate(num) generated = 0 lc=0 lead=[] accum="" return nil if @prefixes.size == 0 while generated < num do if(lc < @leadcount) temp=rand(@prefixes.size) @prefixes.each_key do |key| if(temp == 0) lead=key.split lc=@leadcount accum += "*** " + key + " " generated += 2 break end temp -= 1 end end leadwords=lead.join(" "); lead.shift if(@prefixes[leadwords] == nil) lead=[] lc=0 else lead[@leadcount-1]=@prefixes[leadwords].get_word() accum += lead[@leadcount-1] + " " generated += 1 end end return accum end def to_s temp="" @prefixes.each_key do |idx| temp += idx + ": " + @prefixes[idx].to_s + "\n" end return temp end end class MarkovPrefix def initialize() @total_weight=0; @suffix_table={} @dirty=false end def add_word(word,weight) @suffix_table[word]=0 if @suffix_table[word] == nil @suffix_table[word] += weight @total_weight += weight @dirty=true end def resort() @sorted_table=@suffix_table.sort @dirty=false end def get_word() if(@dirty) self.resort() end num=rand(@total_weight) @sorted_table.each do |arr| key=arr[0] val=arr[1] num -= val return key if(num < 0) end end def to_s temp="" @suffix_table.each_key do |idx| temp += idx.to_s + "(" + @suffix_table[idx].to_s + ") " end return temp end end class FileWeightPair def initialize(wrd,wt) @weight=wt @inputfile = wrd end def weight return @weight end def inputfile return @inputfile end end class CommandLineMuncher def initialize @files=[] @weight = 100 @words = 1000 end def munch(arg) if(arg[0] == 45) mat=arg.scan(/-([a-zA-Z]+)=([a-zA-Z0-9._]+)/) case mat[0][0] when "w" @weight = mat[0][1].to_i when "num" @words = mat[0][1].to_i when "cfg" File.open(mat[0][1]) do |f| while line=f.gets line.split(/[\s]/).each do |word| self.munch(word) end end end end else @files[@files.size]=FileWeightPair.new(arg,@weight) if arg.strip != "" end end def getfiles return @files end def numwords return @words end end crap=MarkovDB.new cmd=CommandLineMuncher.new ARGV.each do |arg| cmd.munch(arg) end cmd.getfiles.each do |data| crap.add_file(data.inputfile , data.weight) end txt=crap.generate(cmd.numwords) puts txt