dadi_two_epoch / app_natparams.py
RyanGutenkunst's picture
Add version featuring natural parameters, and update .gitignore
7f5bef2
import gzip, os, tempfile
import dadi, dadi.Demes, matplotlib.pyplot as plt, numpy as np
import demesdraw, gradio as gr
import app
output_yaml = """description: Single-population bottleneck demography.
time_units: generations
post_decline_fraction: ratio_of_modern_individuals_to_ancient_individuals
parameters:
generations: {0:.1f}
post_decline_fraction: {1:.3f}
"""
def dadi_main_natparams(log10Na, log10Nc, log10Tgen, mu, L, fname, state=None):
Na, Nc, Tgen = 10**log10Na, 10**log10Nc, 10**log10Tgen
nu = Nc/Na
log10T = np.log10(Tgen/(2*Na))
theta = 4 * mu * L * Na
results = list(app.dadi_main(np.log10(nu), log10T, theta, mu, L, fname, state))
results[1] = str(int(Na))
results[2] = str(int(Nc))
results[3] = str(int(Tgen))
del results[4]
return results
description = """This app uses [dadi](https://dadi.readthedocs.io) to simulate two-epoch demographic histories for comparison with population genomic data.
The two-epoch model posits that the population size changed instantaneously at some time in the past.
The parameter *Na* is the ancestral effective population size.
The parameter *Nc* is the contemporary effective population size.
The parameter *Tgen* is number of generations in the past at which the size change happened.
Use the sliders to adjust the values of *Na*, *Nc*, and *Tgen*.
Note that the slides adjust the log10 values of the parameters, because they can vary over orders of magnitude in real data.
Notice how the allele frequency spectrum changes.
(Parameterizations with small *Nc/Na* and large *Tgen* can take a long time to compute.)
The *Na*, *Nc*, and *Tgen* fields report the non-log10 values of those parameters.
Also plotted is a [demesdraw](https://grahamgower.github.io/demesdraw) representation of the demographic model.
You can upload a file containing population genomic data to compare your models with. (First, click the "X" button to clear the *Data file* field.)
If you upload a VCF file (.vcf or .vcf.gz), the app will lump all samples in that file into a single population to analyze.
(The dadi VCF parser is not fast, so be patient when loading VCF data.)
You can also upload a single-population dadi frequency spectrum (.fs) file.
The preloaded data are from the [2024 GHIST bottleneck challenge](https://www.synapse.org/Synapse:syn51614781/wiki/626440).
You can also download a YAML file with your current parameter settings, for easy submission to the [GHIST](https://ghi.st) bottleneck demographic history challenges.
"""
article = app.article
# Set up Gradio
twoepoch_natparams = gr.Interface(
fn=dadi_main_natparams,
inputs=[gr.Slider(0, 6, 0.001, randomize=True, label='log10(Na)'),
gr.Slider(0, 6, 0.001, randomize=True, label='log10(Nc)'),
gr.Slider(0, 6, 0.001, randomize=True, label='log10(Tgen)'),
gr.Number(label="Mutation rate (mu)", value=1.26e-8),
gr.Number(label="Sequence length (L)", value=1e8),
gr.File('GHIST_2024_bottleneck.fs', label="Data file (.vcf, .vcf.gz, or .fs)"),
gr.State(value=None)],
outputs=[gr.Plot(),
gr.Textbox(label="Na (Ancestral effective population size)", lines=1),
gr.Textbox(label="Nc (Contemporary effective population size)", lines=1),
gr.Textbox(label="T (in generations)", lines=1),
gr.Plot(),
gr.DownloadButton(label='Download YAML'),
gr.State()],
clear_btn=None,
live=True,
title="Two-epoch Demographic History Modeling with dadi",
description=description,
article=article,
flagging_mode="never",
)
if __name__ == "__main__":
twoepoch_natparams.launch() # Uncomment this line to run the Gradio interface directly