!sfacts nmf_init \
    --verbose \
    --num-strains 15 \
    --random-seed 0 \
    sim.filt.ss-0.mgen.nc sim.filt.ss-0.approx-nmf.world.nc

2023-05-19 15:26:49,890 Input metagenotype shapes: Frozen({'sample': 50, 'position': 500, 'allele': 2}).


!sfacts clust_init \
    --verbose \
    --num-strains 15 \
    sim.filt.ss-0.mgen.nc sim.filt.ss-0.approx-clust.world.nc

2023-05-19 15:26:54,002 Input metagenotype shapes: Frozen({'sample': 50, 'position': 500, 'allele': 2}).
/Users/byronsmith/anaconda3/envs/StrainFacts-dev2/lib/python3.11/site-packages/sklearn/cluster/_agglomerative.py:983: FutureWarning: Attribute `affinity` was deprecated in version 1.2 and will be removed in 1.4. Use `metric` instead
  warnings.warn(
2023-05-19 15:26:54,055 Clustering approximated 15 strains.


import sfacts as sf

approx_nmf = sf.World.load('sim.filt.ss-0.approx-nmf.world.nc')
approx_clust = sf.World.load('sim.filt.ss-0.approx-clust.world.nc')

sf.plot.plot_community(
    approx_nmf,
    col_linkage_func=lambda w: w.metagenotype.linkage("sample"),
)
sf.plot.plot_community(
    approx_clust,
    col_linkage_func=lambda w: w.metagenotype.linkage("sample"),
)

<seaborn.matrix.ClusterGrid at 0x14cfd05d0>


sf.plot.plot_genotype(
    approx_nmf,
    col_linkage_func=lambda w: w.metagenotype.linkage("position"),
)
sf.plot.plot_genotype(
    approx_clust,
    col_linkage_func=lambda w: w.metagenotype.linkage("position"),
)

<seaborn.matrix.ClusterGrid at 0x14d579010>


!sfacts fit \
    --verbose \
    --init-from sim.filt.ss-0.approx-nmf.world.nc --init-vars genotype \
    --num-strains 15 \
    --random-seed 0 \
    sim.filt.ss-0.mgen.nc sim.filt.ss-0.fit2.world.nc

2023-05-19 15:27:04,876 Initialization data shapes: Frozen({'strain': 15, 'sample': 50, 'position': 500, 'allele': 2}).
2023-05-19 15:27:04,876 Initializing ['genotype'] with provided values.
2023-05-19 15:27:04,877 START: Fitting 15 strains with data shape Frozen({'sample': 50, 'position': 500, 'allele': 2}).
  1%|| 9206/1000000 [00:49<1:28:11, 187.25it/s, NLP=+1.11e+05, delta=+0.00e+00, lag100=+0.00e+00, lr=+7.63e-07]
2023-05-19 15:27:54,064 Converged: NLP=1.11281e+05
/Users/byronsmith/Projects/StrainFacts/sfacts/model_zoo/components.py:310: UserWarning: Using LogTriangle as an approximation for random sampling. This is probably a bad idea.
  warnings.warn(
2023-05-19 15:27:54,086 END: Fitting 15 strains with data shape Frozen({'sample': 50, 'position': 500, 'allele': 2}). (took 49 seconds)
2023-05-19 15:27:54,093 Average metagenotype error: 0.018038189722351845


fit = sf.World.load('sim.filt.ss-0.fit.world.nc')

sf.plot.plot_community(
    fit,
    col_linkage_func=lambda w: w.metagenotype.linkage("sample"),
    row_linkage_func=lambda w: w.genotype.linkage("strain")
)
sf.plot.plot_genotype(
    fit,
    row_linkage_func=lambda w: w.genotype.linkage("strain")
)

<seaborn.matrix.ClusterGrid at 0x14d997190>


!sfacts fit_geno \
    --verbose \
    --hyperparameters gamma_hyper=1.01 \
    --chunk-size=250 \
    --random-seed=0 \
    sim.filt.ss-0.fit2.world.nc sim.filt.ss-0.mgen.nc sim.filt.ss-0.fit3.geno.nc

2023-05-19 15:27:59,790 Selecting genotype block 0 as [0, 500) from 500 positions.
2023-05-19 15:27:59,791 START: Fitting genotype for 500 positions.
2023-05-19 15:27:59,791 Conditioned on provided community with 15 strains and 50 samples.
2023-05-19 15:27:59,791 Iteratively fitting genotype by chunks.
2023-05-19 15:27:59,791 START: Chunk [0, 250).
  0%|| 0/1000000 [00:00<?, ?it/s]/Users/byronsmith/Projects/StrainFacts/sfacts/model_zoo/components.py:310: UserWarning: Using LogTriangle as an approximation for random sampling. This is probably a bad idea.
  warnings.warn(
  1%|| 5971/1000000 [00:22<1:02:31, 264.98it/s, NLP=+5.45e+04, delta=+0.00e+00, lag100=+0.00e+00, lr=+7.63e-07]
2023-05-19 15:28:22,345 Converged: NLP=5.44817e+04
/Users/byronsmith/Projects/StrainFacts/sfacts/model_zoo/components.py:310: UserWarning: Using LogTriangle as an approximation for random sampling. This is probably a bad idea.
  warnings.warn(
2023-05-19 15:28:22,361 END: Chunk [0, 250). (took 23 seconds)
2023-05-19 15:28:22,362 START: Chunk [250, 500).
  0%|| 0/1000000 [00:00<?, ?it/s]/Users/byronsmith/Projects/StrainFacts/sfacts/model_zoo/components.py:310: UserWarning: Using LogTriangle as an approximation for random sampling. This is probably a bad idea.
  warnings.warn(
  1%|| 5722/1000000 [00:20<1:00:06, 275.68it/s, NLP=+5.45e+04, delta=+0.00e+00, lag100=+0.00e+00, lr=+7.63e-07]
2023-05-19 15:28:43,120 Converged: NLP=5.44825e+04
/Users/byronsmith/Projects/StrainFacts/sfacts/model_zoo/components.py:310: UserWarning: Using LogTriangle as an approximation for random sampling. This is probably a bad idea.
  warnings.warn(
2023-05-19 15:28:43,134 END: Chunk [250, 500). (took 21 seconds)
2023-05-19 15:28:43,135 START: Concatenating chunks.
2023-05-19 15:28:43,138 END: Concatenating chunks. (took 0 seconds)
2023-05-19 15:28:43,138 END: Fitting genotype for 500 positions. (took 43 seconds)


!sfacts fit_geno \
    --verbose \
    --hyperparameters gamma_hyper=1.01 \
    --chunk-size=250 \
    --random-seed=0 \
    sim.filt.ss-0.fit2.world.nc sim.filt.ss-1.mgen.nc sim.filt.ss-1.fit3.geno.nc

2023-05-19 15:28:48,305 Selecting genotype block 0 as [0, 499) from 499 positions.
2023-05-19 15:28:48,306 START: Fitting genotype for 499 positions.
2023-05-19 15:28:48,306 Conditioned on provided community with 15 strains and 50 samples.
2023-05-19 15:28:48,306 Iteratively fitting genotype by chunks.
2023-05-19 15:28:48,306 START: Chunk [0, 250).
  0%|| 0/1000000 [00:00<?, ?it/s]/Users/byronsmith/Projects/StrainFacts/sfacts/model_zoo/components.py:310: UserWarning: Using LogTriangle as an approximation for random sampling. This is probably a bad idea.
  warnings.warn(
  1%|| 5955/1000000 [00:21<1:00:43, 272.82it/s, NLP=+5.46e+04, delta=+0.00e+00, lag100=+0.00e+00, lr=+7.63e-07]
2023-05-19 15:29:10,154 Converged: NLP=5.45907e+04
/Users/byronsmith/Projects/StrainFacts/sfacts/model_zoo/components.py:310: UserWarning: Using LogTriangle as an approximation for random sampling. This is probably a bad idea.
  warnings.warn(
2023-05-19 15:29:10,171 END: Chunk [0, 250). (took 22 seconds)
2023-05-19 15:29:10,171 START: Chunk [250, 499).
  0%|| 0/1000000 [00:00<?, ?it/s]/Users/byronsmith/Projects/StrainFacts/sfacts/model_zoo/components.py:310: UserWarning: Using LogTriangle as an approximation for random sampling. This is probably a bad idea.
  warnings.warn(
  1%|| 5958/1000000 [00:21<58:54, 281.23it/s, NLP=+5.42e+04, delta=+0.00e+00, lag100=+0.00e+00, lr=+7.63e-07]
2023-05-19 15:29:31,359 Converged: NLP=5.42109e+04
/Users/byronsmith/Projects/StrainFacts/sfacts/model_zoo/components.py:310: UserWarning: Using LogTriangle as an approximation for random sampling. This is probably a bad idea.
  warnings.warn(
2023-05-19 15:29:31,377 END: Chunk [250, 499). (took 21 seconds)
2023-05-19 15:29:31,377 START: Concatenating chunks.
2023-05-19 15:29:31,381 END: Concatenating chunks. (took 0 seconds)
2023-05-19 15:29:31,381 END: Fitting genotype for 499 positions. (took 43 seconds)


!sfacts concat_geno \
    --metagenotype sim.filt.mgen.nc \
    --community sim.filt.ss-0.fit2.world.nc \
    --outpath sim.filt.fit3.world.nc \
    sim.filt.ss-0.fit3.geno.nc sim.filt.ss-1.fit3.geno.nc

Advanced Model Fitting¶