-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclassifier.rb
More file actions
38 lines (33 loc) · 1020 Bytes
/
classifier.rb
File metadata and controls
38 lines (33 loc) · 1020 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
require 'ngram'
require 'csv'
require 'nbayes'
class NameClassifier
def initialize(csv)
@nbayes_s = NBayes::Base.new
@nbayes_c = NBayes::Base.new
@ngram = NGram.new({:size=>3, :padchar=>' '})
reader = CSV.open(csv, "r")
reader.shift
reader.each do |row|
ng = @ngram.parse(row[0].downcase).flatten
@nbayes_c.train(ng, row[2])
/(^.*[A-Z]) ([A-Z][a-z].*)/ =~ row[0].gsub(/'/, '')
if $1 != nil && $2 != nil
firstname = $2
ng = @ngram.parse(firstname.downcase).flatten
@nbayes_s.train(ng, row[1])
end
end
reader.close
end
def classify_country(fullname)
c = @nbayes_c.classify(@ngram.parse(fullname).flatten)
# sort array by probability then convert to hash
c_hash = Hash[*c.sort{|a, b| b[1] <=> a[1]}[0..4].flatten]
c_hash.each { |k, v| c_hash[k] = "%.7f" % v }
end
def classify_sex(firstname)
s = @nbayes_s.classify(@ngram.parse(firstname).flatten)
s.each { |k, v| s[k] = "%.3f" % v }
end
end