!sfacts evaluate_fit \
    --num-format '%.5f' \
    --scores mgen_error fwd_genotype_error rev_genotype_error bc_error unifrac_error entropy_error \
    --outpath sim.eval_all_fits.tsv \
    sim.world.nc \
    sim.filt.ss-0.fit.world.nc \
    sim.filt.ss-0.fit2.world.nc \
    sim.filt.fit3.world.nc
!column -t sim.eval_all_fits.tsv

score               sim.world.nc  sim.filt.ss-0.fit.world.nc  sim.filt.ss-0.fit2.world.nc  sim.filt.fit3.world.nc
mgen_error          0.01831       0.01824                     0.01804                      0.01766
fwd_genotype_error  0.00000       0.00000                     0.00000                      0.00000
rev_genotype_error  0.00000       0.00001                     0.00177                      0.00196
bc_error            0.00000       0.00618                     0.00833                      0.00833
unifrac_error       0.00000       0.04412                     0.04649                      0.06635
entropy_error       0.00000       0.02890                     0.04393                      0.04393


import sfacts as sf
import xarray as xr

sim = sf.World.load('sim.world.nc')
fit = sf.World.load('sim.filt.ss-0.fit.world.nc')
fit = sf.World(fit.data.assign_coords(position=fit.position))

sim = sim.sel(position=fit.position, sample=fit.sample)


mgen_error = sf.evaluation.metagenotype_error2(fit, discretized=True)
fwd_genotype_error = sf.evaluation.discretized_weighted_genotype_error(sim, fit)
rev_genotype_error = sf.evaluation.discretized_weighted_genotype_error(fit, sim)
bc_error = sf.evaluation.braycurtis_error(sim, fit)
unifrac_error = sf.evaluation.unifrac_error(sim, fit)
entropy_error = sf.evaluation.community_entropy_error(sim, fit)


sf.plot.plot_community(
    sim,
    col_linkage_func=lambda w: w.metagenotype.linkage('sample'),
    row_linkage_func=lambda w: w.genotype.linkage('strain'),
    col_colors_func=lambda w: xr.Dataset(dict(
        mg=mgen_error[1],
        bc=bc_error[1],
        uf=unifrac_error[1],
        ent=entropy_error[1].abs(),
    )),
    row_colors_func=lambda w: xr.Dataset(dict(
        fwd=fwd_genotype_error[1],
    )),
)
sf.plot.plot_community(
    fit,
    col_linkage_func=lambda w: w.metagenotype.linkage('sample'),
    row_linkage_func=lambda w: w.genotype.linkage('strain'),
    col_colors_func=lambda w: xr.Dataset(dict(
        mg=mgen_error[1],
        bc=bc_error[1],
        uf=unifrac_error[1],
        ent=entropy_error[1].abs(),
    )),
    row_colors_func=lambda w: xr.Dataset(dict(
        rev=rev_genotype_error[1],
    )),
)
sf.plot.plot_metagenotype(
    fit,
    col_linkage_func=lambda w: w.metagenotype.linkage('sample'),
    row_linkage_func=lambda w: w.metagenotype.linkage('position'),
    col_colors_func=lambda w: xr.Dataset(dict(
        mg=mgen_error[1],
        bc=bc_error[1],
        uf=unifrac_error[1],
        ent=entropy_error[1].abs(),
    )),
    row_colors_func=lambda w: xr.Dataset(dict(
        _=w.metagenotype.mean_depth("position"),
    )),
)
sf.plot.plot_genotype(
    fit,
    row_linkage_func=lambda w: w.genotype.linkage('strain'),
    col_linkage_func=lambda w: w.metagenotype.linkage('position'),
    row_colors_func=lambda w: xr.Dataset(dict(
        rev=rev_genotype_error[1],
    )),
)
sf.plot.plot_genotype(
    sim,
    row_linkage_func=lambda w: w.genotype.linkage('strain'),
    col_linkage_func=lambda w: w.metagenotype.linkage('position'),
    row_colors_func=lambda w: xr.Dataset(dict(
        fwd=fwd_genotype_error[1],
    )),
)

<seaborn.matrix.ClusterGrid at 0x145d46f90>


import scipy as sp
import matplotlib.pyplot as plt

mg_dist = sim.metagenotype.pdist()
uf_dist = fit.unifrac_pdist()

plt.scatter(sp.spatial.distance.squareform(mg_dist), sp.spatial.distance.squareform(uf_dist))
sp.stats.spearmanr(sp.spatial.distance.squareform(mg_dist), sp.spatial.distance.squareform(uf_dist))

SignificanceResult(statistic=0.5460012636245404, pvalue=4.081769548834536e-96)

Fit Evaluation¶

Epilogue: Selecting hyperparameters¶