Interpret correctly the signs in the affinity matrix

This commit is contained in:
Dimitri Lozeve 2020-02-22 17:28:23 +01:00
parent 34d9cd000d
commit 6d30b54570

View file

@ -31,27 +31,28 @@ function loadclassification()::Classification
clf clf
end end
function loadcostsdf()::DataFrame function loadaffinitiesdf()::DataFrame
df = CSV.read("data/associations.csv", copycols=true) df = CSV.read("data/associations.csv", copycols=true)
colnames = String.(names(df)) colnames = String.(names(df))
colnames = Symbol.(Unicode.normalize.(colnames, casefold=true, stripmark=true)) colnames = Symbol.(Unicode.normalize.(colnames, casefold=true, stripmark=true))
rename!(df, colnames) rename!(df, colnames)
df.name = colnames[2:end] df.name = colnames[2:end]
# df = coalesce.(df, 0.0) # df = coalesce.(df, 0.0)
@info "loaded cost matrix for $(size(df, 1)) plants" @info "loaded affinity matrix for $(size(df, 1)) plants"
df df
end end
function computecost(plant1::Symbol, plant2::Symbol, costs_df::DataFrame, classification::Classification)::Float64 "Compute the cost between two plants, using their families if necessary."
function computecost(plant1::Symbol, plant2::Symbol, affinities_df::DataFrame, classification::Classification)::Float64
@debug "computecost($plant1, $plant2)" @debug "computecost($plant1, $plant2)"
if plant1 in names(costs_df) && plant2 in names(costs_df) if plant1 in names(affinities_df) && plant2 in names(affinities_df)
cost = costs_df[costs_df.name .== plant1, plant2][1] affinity = affinities_df[affinities_df.name .== plant1, plant2][1]
else else
cost = missing affinity = missing
end end
if !ismissing(cost) if !ismissing(affinity)
return cost return -affinity
end end
parent1 = getfirstparent(plant1, classification) parent1 = getfirstparent(plant1, classification)
@ -60,24 +61,25 @@ function computecost(plant1::Symbol, plant2::Symbol, costs_df::DataFrame, classi
return 0.0 return 0.0
end end
@debug "computecost($(parent1.name), $(parent2.name))" @debug "computecost($(parent1.name), $(parent2.name))"
if parent1.name in names(costs_df) && parent2.name in names(costs_df) if parent1.name in names(affinities_df) && parent2.name in names(affinities_df)
cost = costs_df[costs_df.name .== parent1.name, parent2.name][1] affinity = affinities_df[affinities_df.name .== parent1.name, parent2.name][1]
end end
if !ismissing(cost) if !ismissing(affinity)
return cost return -affinity
end end
return 0.0 return 0.0
end end
function costsmatrix(plants::Vector{Symbol}, costs_df::DataFrame, classification::Classification)::Matrix{Float64} "Compute the costs matrix for all plants"
[computecost(plant1, plant2, costs_df, classification) for plant1 in plants, plant2 in plants] function costsmatrix(plants::Vector{Symbol}, affinities_df::DataFrame, classification::Classification)::Matrix{Float64}
[computecost(plant1, plant2, affinities_df, classification) for plant1 in plants, plant2 in plants]
end end
function loadcosts() function loadcosts()
plants = loadplants() plants = loadplants()
clf = loadclassification() clf = loadclassification()
costs_df = loadcostsdf() affinities_df = loadaffinitiesdf()
costs = costsmatrix(plants.name, costs_df, clf) costs = costsmatrix(plants.name, affinities_df, clf)
end end