83 lines
2.5 KiB
Julia
83 lines
2.5 KiB
Julia
using Logging
|
|
using Unicode
|
|
|
|
using DataFrames
|
|
using CSV
|
|
using JSON
|
|
|
|
function loadplants()::DataFrame
|
|
plants = CSV.read("data/plants.csv")
|
|
@info "loaded $(size(plants, 1)) plants"
|
|
plants.name = Symbol.(plants.name)
|
|
plants
|
|
end
|
|
|
|
function loadgarden(plants::Vector{Symbol})::Tuple{Matrix{Int}, Matrix{Bool}}
|
|
garden = CSV.read("data/garden.csv")
|
|
garden = coalesce.(garden, "")
|
|
mask = convert(Matrix, garden .== "empty")
|
|
garden = Unicode.normalize.(garden, casefold=true, stripmark=true)
|
|
garden = indexin(convert(Matrix, garden), String.(plants))
|
|
garden = replace(garden, nothing=>0)
|
|
@assert size(garden) == size(mask)
|
|
@info "loaded garden of size $(size(garden))"
|
|
garden, mask
|
|
end
|
|
|
|
function loadclassification()::Classification
|
|
clf = JSON.parsefile("data/classification.json")
|
|
clf = Classification(clf)
|
|
@debug "loaded classification of type $(clf.type)"
|
|
clf
|
|
end
|
|
|
|
function loadcostsdf()::DataFrame
|
|
df = CSV.read("data/associations.csv", copycols=true)
|
|
colnames = String.(names(df))
|
|
colnames = Symbol.(Unicode.normalize.(colnames, casefold=true, stripmark=true))
|
|
rename!(df, colnames)
|
|
df.name = colnames[2:end]
|
|
# df = coalesce.(df, 0.0)
|
|
@info "loaded cost matrix for $(size(df, 1)) plants"
|
|
df
|
|
end
|
|
|
|
function computecost(plant1::Symbol, plant2::Symbol, costs_df::DataFrame, classification::Classification)::Float64
|
|
@debug "computecost($plant1, $plant2)"
|
|
if plant1 in names(costs_df) && plant2 in names(costs_df)
|
|
cost = costs_df[costs_df.name .== plant1, plant2][1]
|
|
else
|
|
cost = missing
|
|
end
|
|
|
|
if !ismissing(cost)
|
|
return cost
|
|
end
|
|
|
|
parent1 = getfirstparent(plant1, classification)
|
|
parent2 = getfirstparent(plant2, classification)
|
|
if isnothing(parent1) || isnothing(parent2)
|
|
return 0.0
|
|
end
|
|
@debug "computecost($(parent1.name), $(parent2.name))"
|
|
if parent1.name in names(costs_df) && parent2.name in names(costs_df)
|
|
cost = costs_df[costs_df.name .== parent1.name, parent2.name][1]
|
|
end
|
|
|
|
if !ismissing(cost)
|
|
return cost
|
|
end
|
|
|
|
return 0.0
|
|
end
|
|
|
|
function costsmatrix(plants::Vector{Symbol}, costs_df::DataFrame, classification::Classification)::Matrix{Float64}
|
|
[computecost(plant1, plant2, costs_df, classification) for plant1 in plants, plant2 in plants]
|
|
end
|
|
|
|
function loadcosts()
|
|
plants = loadplants()
|
|
clf = loadclassification()
|
|
costs_df = loadcostsdf()
|
|
costs = costsmatrix(plants.name, costs_df, clf)
|
|
end
|