Compute costs matrix from phylogenetic tree and reorganise sources

This commit is contained in:
Dimitri Lozeve 2020-02-22 16:41:25 +01:00
parent c117b9cccb
commit 34d9cd000d
5 changed files with 136 additions and 215 deletions

View file

@ -1,52 +0,0 @@
name,Ail,Asperge,Ciboulette,Echalote,Oignon,Poireau,Fève,Haricot,Pois,Aneth,Carotte,Celeri,Cerfeuil,Fenouil,Panais,Persil,Aubergine,Poivron,Pomme de terre,Tomate,Chou,Chou rave,Chou rouge,Navet,Radis,Concombre,Cornichon,Cucurbita,Courgette,Melon,Artichaut,Basilic,Bette,Betterave,Capucine,Chicoree,Cresson,Epinard,Fraisier,Laitue,Maïs,Menthe,Oeillet d'inde,Sarriette,Sauge,Scorsonère,Souci,Tétragone,Tournesol,Vigne,Lavande
Ail,,,,,1,,-1,-1,-1,1,1,1,1,1,1,1,-1,-1,1,1,,,,,,,,,,,,,,1,,,,,1,1,,,,,,,1,,,,
Asperge,,,,,,1,-1,1,-1,1,1,1,1,1,1,1,-1,-1,-1,1,,,,,,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Ciboulette,,,,,,,-1,-1,-1,1,1,1,1,1,1,1,-1,-1,-1,-1,,,,,,,,1,1,,,,,,,,,,,,,,,,,,,,,,
Echalote,,,,,-1,,-1,-1,-1,1,1,1,1,1,1,1,-1,-1,-1,-1,,,,,,,,1,1,,,,,,,,,,1,,,,,,,,,,,,
Oignon,1,,,-1,,-1,-1,-1,-1,1,1,1,1,1,1,1,-1,-1,-1,1,-1,,,,,1,1,1,1,,1,,1,1,,,,,1,1,,,,,,1,1,,,,
Poireau,,1,,,-1,1,-1,-1,-1,1,1,1,1,1,1,1,-1,-1,-1,1,,,,,,,,,,1,,,-1,,,,,1,1,1,,,,,,1,1,,,,
Feve,-1,-1,-1,-1,-1,-1,,,,,,,,,1,,,,,,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,
Haricot,-1,1,-1,-1,-1,-1,,1,-1,,1,1,,,1,,1,,1,,1,1,1,1,1,1,1,1,1,1,,,1,1,,,,1,1,1,2,,,,,,,1,,,
Pois,-1,-1,-1,-1,-1,-1,,-1,1,,1,1,,,,,,,1,-1,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,1,,,,,,,,,,,
Aneth,1,1,1,1,1,1,,,,1,1,1,1,1,1,1,,,,,1,1,1,1,1,,,,,,,,,,,,,,,1,,,,,,,,,,,
Carotte,1,1,1,1,1,1,,1,1,1,1,1,1,1,1,1,,,,1,1,1,1,1,1,,,,,,,,1,-1,,1,,1,,1,,-1,,,,1,,,,,
Celeri,1,1,1,1,1,1,,1,1,1,1,1,1,1,1,1,,,,1,1,1,1,1,1,,,,,,,,1,,,,,,,,-1,,,,,,,,,,
Cerfeuil,1,1,1,1,1,1,,,,1,1,1,1,1,1,1,,,,,1,1,1,1,-1,,,,,,,,,,,,,,,1,,,,,,,,,,,
Fenouil,1,1,1,1,1,1,,,,1,1,1,1,1,1,1,,,,-1,1,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,
Panais,1,1,1,1,1,1,1,1,,1,1,1,1,1,1,1,,,,1,1,1,1,1,1,,,,,,,,,,,,,,,-1,,,,,,,,,,,
Persil,1,1,1,1,1,1,,,,1,1,1,1,1,1,1,,,,1,1,1,1,1,1,,,,,,,,,,,,,,1,-1,,,,,,,,,,,
Aubergine,-1,-1,-1,-1,-1,-1,,1,,,,,,,,,,1,-1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Poivron,-1,-1,-1,-1,-1,-1,,,,,,,,,,,1,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Pomme de terre,1,-1,-1,-1,-1,-1,,1,1,,,,,,,,-1,,,-1,,,,,-1,,,,-1,,,,,,,,,,,,1,,,,,,1,,,,
Tomate,1,1,-1,-1,-1,1,,,-1,,1,1,,-1,1,1,,1,-1,1,1,,-1,,1,,,,,,,1,,-1,,,,1,,1,1,,,,,,1,1,,,
Chou,,,,,-1,,1,1,1,1,1,1,1,1,1,1,,,,1,,,,,,1,1,,,,1,,,,,,,1,-1,1,,,,1,,,,,,,
Chou rave,,,,,,,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,1,,,,,
Chou rouge,,,,,,,1,1,1,1,1,1,1,1,1,1,,,,-1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Navet,,,,,,,1,1,1,1,1,1,1,1,1,1,,,,,,,,,,,,,,,,,1,,,,,1,,,,,,,,,,,,,
Radis,,,,,,,1,1,1,1,1,1,-1,1,1,1,,,-1,1,,,,,,-1,-1,,,,1,,1,,,,1,1,,1,,,,,,1,,,,,
Concombre,,1,,,1,,1,1,1,,,,,,,,,,,,1,,,,-1,,,-1,-1,,,1,,,,,,,,1,1,,1,,,,,,,,
Cornichon,,1,,,1,,1,1,1,,,,,,,,,,,,1,,,,-1,,,-1,-1,,,1,,,,,,,,1,1,,1,,,,,,,,
Cucurbita,,,1,1,1,,1,1,1,,,,,,,,,,,,,,,,,-1,-1,,,,,,,,,,,,,,1,,,,,,,,,,
Courgette,,,1,1,1,,1,1,1,,,,,,,,,,-1,,,,,,,-1,-1,,,,,,,,,,,,,,,,,,,,,1,,,
Melon,,,,,,1,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,1,1,,,,,,,,,,
Artichaut,,,,,1,,1,1,1,,,,,,,,,,,,1,,,,1,,,,,,,,,,,,,,,1,,,,,,,,,,,
Basilic,,,,,,,1,1,1,,,,,,,,,,,1,,,,,,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Bette,,,,,1,-1,1,1,,,1,1,,,,,,,,,,1,,1,1,,,,,,,,,,,,,,,1,,,,,,,,,,,
Betterave,1,,,,1,,1,1,,,-1,,,,,,,,,-1,,,,,,,,,,,,,,,,,,1,,1,-1,,,,,,,,,,
Capucine,,,,,,,1,1,1,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,
Chicorée,,,,,,,1,1,1,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,
Cresson,,,,,,,1,1,1,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,
Epinard,,,,,,1,1,1,1,,1,,,,,,,,,1,1,,,1,1,,,,,,,,,1,,,,,1,1,,,,,,,,,,,
Fraisier,1,,,1,1,1,1,1,1,,,,,,,1,,,,,-1,,,,,,,,,,,,,,,,,1,,1,,,,,1,,,,,,
Laitue,1,,,,1,1,1,1,1,1,1,,1,,-1,-1,,,,1,1,,,,1,1,1,,,1,1,,1,1,,,,1,1,1,,,,,,1,,,-1,,
Maïs,,,,,,,1,2,1,,,-1,,,,,,,1,1,,,,,,1,1,1,,1,,,,-1,,,,,,,,,,,,,,,,,
Menthe,,,,,,,1,1,1,,-1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Oeillet d'inde,,,,,,,1,1,1,,,,,,,,,,,,,,,,,1,1,,,,,,,,,,,,,,,,,,,,,,,,
Sarriette,,,,,,,1,1,1,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Sauge,,,,,,,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,
Scorsonère,,,,,1,1,1,1,1,,1,,,,,,,,,,,1,,,1,,,,,,,,,,,,,,,1,,,,,,,,,,,
Souci,1,,,,1,1,1,1,1,,,,,,,,,,1,1,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,
Tétragone,,,,,,,1,1,1,,,,,,,,,,,1,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,
Tournesol,,,,,,,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-1,,,,,,,,,,,
Vigne,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Lavande,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1 name Ail Asperge Ciboulette Echalote Oignon Poireau Fève Haricot Pois Aneth Carotte Celeri Cerfeuil Fenouil Panais Persil Aubergine Poivron Pomme de terre Tomate Chou Chou rave Chou rouge Navet Radis Concombre Cornichon Cucurbita Courgette Melon Artichaut Basilic Bette Betterave Capucine Chicoree Cresson Epinard Fraisier Laitue Maïs Menthe Oeillet d'inde Sarriette Sauge Scorsonère Souci Tétragone Tournesol Vigne Lavande
2 Ail 1 -1 -1 -1 1 1 1 1 1 1 1 -1 -1 1 1 1 1 1 1
3 Asperge 1 -1 1 -1 1 1 1 1 1 1 1 -1 -1 -1 1 1 1
4 Ciboulette -1 -1 -1 1 1 1 1 1 1 1 -1 -1 -1 -1 1 1
5 Echalote -1 -1 -1 -1 1 1 1 1 1 1 1 -1 -1 -1 -1 1 1 1
6 Oignon 1 -1 -1 -1 -1 -1 1 1 1 1 1 1 1 -1 -1 -1 1 -1 1 1 1 1 1 1 1 1 1 1 1
7 Poireau 1 -1 1 -1 -1 -1 1 1 1 1 1 1 1 -1 -1 -1 1 1 -1 1 1 1 1 1
8 Feve -1 -1 -1 -1 -1 -1 1 1 1 1 1 1 1 1 1 1 1
9 Haricot -1 1 -1 -1 -1 -1 1 -1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1
10 Pois -1 -1 -1 -1 -1 -1 -1 1 1 1 1 -1 1 1 1 1 1 1 1 1 1 1 1
11 Aneth 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
12 Carotte 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -1 1 1 1 -1 1
13 Celeri 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -1
14 Cerfeuil 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -1 1
15 Fenouil 1 1 1 1 1 1 1 1 1 1 1 1 1 -1 1 1 1 1 1
16 Panais 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -1
17 Persil 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -1
18 Aubergine -1 -1 -1 -1 -1 -1 1 1 -1
19 Poivron -1 -1 -1 -1 -1 -1 1 1
20 Pomme de terre 1 -1 -1 -1 -1 -1 1 1 -1 -1 -1 -1 1 1
21 Tomate 1 1 -1 -1 -1 1 -1 1 1 -1 1 1 1 -1 1 1 -1 1 1 -1 1 1 1 1 1
22 Chou -1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -1 1 1
23 Chou rave 1 1 1 1 1 1 1 1 1 1 1 1
24 Chou rouge 1 1 1 1 1 1 1 1 1 1 -1
25 Navet 1 1 1 1 1 1 1 1 1 1 1 1
26 Radis 1 1 1 1 1 1 -1 1 1 1 -1 1 -1 -1 1 1 1 1 1 1
27 Concombre 1 1 1 1 1 1 -1 -1 -1 1 1 1 1
28 Cornichon 1 1 1 1 1 1 -1 -1 -1 1 1 1 1
29 Cucurbita 1 1 1 1 1 1 -1 -1 1
30 Courgette 1 1 1 1 1 1 -1 -1 -1 1
31 Melon 1 1 1 1 1 1 1
32 Artichaut 1 1 1 1 1 1 1
33 Basilic 1 1 1 1 1 1
34 Bette 1 -1 1 1 1 1 1 1 1 1
35 Betterave 1 1 1 1 -1 -1 1 1 -1
36 Capucine 1 1 1 1
37 Chicorée 1 1 1 1 1
38 Cresson 1 1 1 1
39 Epinard 1 1 1 1 1 1 1 1 1 1 1 1
40 Fraisier 1 1 1 1 1 1 1 1 -1 1 1 1
41 Laitue 1 1 1 1 1 1 1 1 1 -1 -1 1 1 1 1 1 1 1 1 1 1 1 1 1 -1
42 Maïs 1 2 1 -1 1 1 1 1 1 1 -1
43 Menthe 1 1 1 -1
44 Oeillet d'inde 1 1 1 1 1
45 Sarriette 1 1 1 1
46 Sauge 1 1 1 1
47 Scorsonère 1 1 1 1 1 1 1 1 1
48 Souci 1 1 1 1 1 1 1 1 1
49 Tétragone 1 1 1 1 1
50 Tournesol 1 1 1 -1
51 Vigne
52 Lavande

View file

@ -1,51 +0,0 @@
Ail
Asperge
Ciboulette
Echalote
Oignon
Poireau
Feve
Haricot
Pois
Aneth
Carotte
Celeri
Cerfeuil
Fenouil
Panais
Persil
Aubergine
Poivron
Pomme de terre
Tomate
Chou
Chou rave
Chou rouge
Navet
Radis
Concombre
Cornichon
Cucurbita
Courgette
Melon
Artichaut
Basilic
Bette
Betterave
Capucine
Chicorée
Cresson
Epinard
Fraisier
Laitue
Maïs
Menthe
Oeillet d'inde
Sarriette
Sauge
Scorsonère
Souci
Tétragone
Tournesol
Vigne
Lavande

View file

@ -1,15 +1,10 @@
module GardenOptim module GardenOptim
using Logging using Logging
using Unicode
using DataFrames
using DocStringExtensions using DocStringExtensions
using CSV
using JSON
using Tables
export loadclassification, loadplants, loadgarden, loadcosts export loadplants, loadgarden, loadclassification, loadcosts
export update!, randomgardenevolution!, outputgarden export update!, randomgardenevolution!, outputgarden
@template (FUNCTIONS, METHODS, MACROS) = @template (FUNCTIONS, METHODS, MACROS) =
@ -18,112 +13,8 @@ export update!, randomgardenevolution!, outputgarden
$(DOCSTRING) $(DOCSTRING)
""" """
mutable struct Classification include("classification.jl")
type::Symbol include("loaddata.jl")
name::Symbol
bio::String
children::Vector{Classification}
parent::Classification
function Classification(classif::Dict{String, Any})
children = [Classification(d) for d in get(classif, "children", [])]
type = Symbol(Unicode.normalize(classif["type"], casefold=true, stripmark=true))
name = Symbol(Unicode.normalize(classif["name"], casefold=true, stripmark=true))
classif = new(type, name, get(classif, "bio", ""), children)
for child in children
child.parent = classif
end
classif
end
end
function loadclassification()
clf = JSON.parsefile("data/classification.json")
clf = Classification(clf)
@debug "loaded classification of type $(clf.type)"
clf
end
function loadplants()::DataFrame
plants = CSV.read("data/plants.csv")
@info "loaded $(size(plants, 1)) plants"
plants.name = Symbol.(plants.name)
plants
end
function loadgarden(plants::Vector{String})::Tuple{Matrix{Int}, Matrix{Bool}}
garden = CSV.read("data/garden.csv")
garden = coalesce.(garden, "")
mask = convert(Matrix, garden .== "empty")
garden = indexin(convert(Matrix, garden), plants)
garden = replace(garden, nothing=>0)
@assert size(garden) == size(mask)
@info "loaded garden of size $(size(garden))"
garden, mask
end
function loadcosts()::DataFrame
df = CSV.read("data/associations.csv", copycols=true)
colnames = String.(names(df))
colnames = Symbol.(Unicode.normalize.(colnames, casefold=true, stripmark=true))
rename!(df, colnames)
df.name = colnames[2:end]
# df = coalesce.(df, 0.0)
@info "loaded cost matrix for $(size(df, 1)) plants"
df
end
# function loadcosts()::Matrix{Float64}
# df = CSV.read("data/costs.csv")
# df = coalesce.(df, 0) # replace missing values by 0
# costs = convert(Matrix, df[:, 2:end])
# @info "loaded cost matrix of size $(size(costs))"
# # ensure the matrix is symmetric: keep the max of itself and its transpose
# costs = Float64.(max.(costs, permutedims(costs)))
# end
function getparent(name::Symbol, classification::Classification)
if classification.name == name
return classification.parent
else
for child in classification.children
parent = getparent(name, child)
if !isnothing(parent)
return parent
end
end
end
end
function computecost(costs::DataFrame, plant1::Symbol, plant2::Symbol, classification::Classification)::Float64
@debug "$plant1 and $plant2"
if plant1 in names(costs) && plant2 in names(costs)
cost = costs[costs.name .== plant1, plant2][1]
else
@debug "$plant1 and $plant2 not in costs"
cost = missing
end
if !ismissing(cost)
return cost
end
@debug "missing"
try
parent1 = getparent(plant1, classification).name
computecost(costs, parent1, plant2, classification)
catch UndefRefError
return missing
end
try
parent2 = getparent(plant2, classification).name
computecost(costs, plant1, parent2, classification)
catch UndefRefError
return missing
end
end
"Return a random index to be filled from the garden mask." "Return a random index to be filled from the garden mask."
function randomindex(mask::Matrix{Bool})::Int function randomindex(mask::Matrix{Bool})::Int

50
src/classification.jl Normal file
View file

@ -0,0 +1,50 @@
using Unicode
mutable struct Classification
type::Symbol
name::Symbol
bio::String
children::Vector{Classification}
parent::Classification
function Classification(classif::Dict{String, Any})
children = [Classification(d) for d in get(classif, "children", [])]
type = Symbol(Unicode.normalize(classif["type"], casefold=true, stripmark=true))
name = Symbol(Unicode.normalize(classif["name"], casefold=true, stripmark=true))
classif = new(type, name, get(classif, "bio", ""), children)
for child in children
child.parent = classif
end
classif
end
end
function Base.show(io::IO, clf::Classification)
if length(clf.children) < 2
childrentext = " with $(length(clf.children)) child"
else
childrentext = " with $(length(clf.children)) children"
end
biotext = ""
if clf.bio != ""
biotext = " ($(clf.bio))"
end
print("Classification(", clf.type, " ", clf.name, biotext, childrentext, ")")
end
function getfirstparent(name::Symbol, classification::Classification)
if classification.name == name
parent = classification
while parent.parent.name != :god
parent = parent.parent
end
return parent
else
for child in classification.children
parent = getfirstparent(name, child)
if !isnothing(parent)
return parent
end
end
end
end

83
src/loaddata.jl Normal file
View file

@ -0,0 +1,83 @@
using Logging
using Unicode
using DataFrames
using CSV
using JSON
function loadplants()::DataFrame
plants = CSV.read("data/plants.csv")
@info "loaded $(size(plants, 1)) plants"
plants.name = Symbol.(plants.name)
plants
end
function loadgarden(plants::Vector{Symbol})::Tuple{Matrix{Int}, Matrix{Bool}}
garden = CSV.read("data/garden.csv")
garden = coalesce.(garden, "")
mask = convert(Matrix, garden .== "empty")
garden = Unicode.normalize.(garden, casefold=true, stripmark=true)
garden = indexin(convert(Matrix, garden), String.(plants))
garden = replace(garden, nothing=>0)
@assert size(garden) == size(mask)
@info "loaded garden of size $(size(garden))"
garden, mask
end
function loadclassification()::Classification
clf = JSON.parsefile("data/classification.json")
clf = Classification(clf)
@debug "loaded classification of type $(clf.type)"
clf
end
function loadcostsdf()::DataFrame
df = CSV.read("data/associations.csv", copycols=true)
colnames = String.(names(df))
colnames = Symbol.(Unicode.normalize.(colnames, casefold=true, stripmark=true))
rename!(df, colnames)
df.name = colnames[2:end]
# df = coalesce.(df, 0.0)
@info "loaded cost matrix for $(size(df, 1)) plants"
df
end
function computecost(plant1::Symbol, plant2::Symbol, costs_df::DataFrame, classification::Classification)::Float64
@debug "computecost($plant1, $plant2)"
if plant1 in names(costs_df) && plant2 in names(costs_df)
cost = costs_df[costs_df.name .== plant1, plant2][1]
else
cost = missing
end
if !ismissing(cost)
return cost
end
parent1 = getfirstparent(plant1, classification)
parent2 = getfirstparent(plant2, classification)
if isnothing(parent1) || isnothing(parent2)
return 0.0
end
@debug "computecost($(parent1.name), $(parent2.name))"
if parent1.name in names(costs_df) && parent2.name in names(costs_df)
cost = costs_df[costs_df.name .== parent1.name, parent2.name][1]
end
if !ismissing(cost)
return cost
end
return 0.0
end
function costsmatrix(plants::Vector{Symbol}, costs_df::DataFrame, classification::Classification)::Matrix{Float64}
[computecost(plant1, plant2, costs_df, classification) for plant1 in plants, plant2 in plants]
end
function loadcosts()
plants = loadplants()
clf = loadclassification()
costs_df = loadcostsdf()
costs = costsmatrix(plants.name, costs_df, clf)
end