From b36a9cea73cc95e9fb76e37b3ea36e92959bc6fc Mon Sep 17 00:00:00 2001 From: "Nicholas H.Tollervey" Date: Fri, 29 May 2026 15:47:10 +0100 Subject: [PATCH 1/2] Add PyScript examples for msprime Generated by apply_llm_response.py from prompts/msprime/response.toml. Examples included: - ancestry_basics: Simulating ancestry - recombination_and_mutations: Recombination and mutations - demography_two_populations: Demography: two populations Generated-By: apply_llm_response.py --- examples/msprime/README.md | 18 +++++ examples/msprime/ancestry_basics/code.py | 44 +++++++++++ examples/msprime/ancestry_basics/config.toml | 1 + examples/msprime/ancestry_basics/setup.py | 45 +++++++++++ .../demography_two_populations/code.py | 77 +++++++++++++++++++ .../demography_two_populations/config.toml | 1 + .../demography_two_populations/setup.py | 24 ++++++ examples/msprime/order.json | 5 ++ .../recombination_and_mutations/code.py | 70 +++++++++++++++++ .../recombination_and_mutations/config.toml | 1 + .../recombination_and_mutations/setup.py | 24 ++++++ 11 files changed, 310 insertions(+) create mode 100644 examples/msprime/README.md create mode 100644 examples/msprime/ancestry_basics/code.py create mode 100644 examples/msprime/ancestry_basics/config.toml create mode 100644 examples/msprime/ancestry_basics/setup.py create mode 100644 examples/msprime/demography_two_populations/code.py create mode 100644 examples/msprime/demography_two_populations/config.toml create mode 100644 examples/msprime/demography_two_populations/setup.py create mode 100644 examples/msprime/order.json create mode 100644 examples/msprime/recombination_and_mutations/code.py create mode 100644 examples/msprime/recombination_and_mutations/config.toml create mode 100644 examples/msprime/recombination_and_mutations/setup.py diff --git a/examples/msprime/README.md b/examples/msprime/README.md new file mode 100644 index 0000000..e780b80 --- /dev/null +++ b/examples/msprime/README.md @@ -0,0 +1,18 @@ +# msprime Examples + +Each sub-directory contains a self-contained example. The order in +which the examples are to appear is specified in `order.json` (an +array of directory names in the expected order). + +In each example directory you'll find: + +* `config.toml` - must conform to the specification outlined here: + https://docs.pyscript.net/latest/user-guide/configuration/ This is + parsed and ultimately turned into a JSON representation as part of + the package's API object. +* `setup.py` - Python code for contextual and environmental setup, + NOT SEEN BY THE END USER, but is run before the `code.py` code is + evaluated. Allows us to create useful (IPython) shims, avoid + repeating boilerplate and whatnot. +* `code.py` - the actual code added to the editor which forms the + practical example of using the package. diff --git a/examples/msprime/ancestry_basics/code.py b/examples/msprime/ancestry_basics/code.py new file mode 100644 index 0000000..3310a7f --- /dev/null +++ b/examples/msprime/ancestry_basics/code.py @@ -0,0 +1,44 @@ +""" +A first look at msprime: simulating the genealogy of a sample. + +msprime simulates the ancestral history of a sample of chromosomes +drawn from a population. The result is a "tree sequence" -- a compact +representation of all the genealogical trees along a chromosome. + +Docs: https://tskit.dev/msprime/docs/stable/ +""" +from IPython.core.display import display, HTML + +heading("A small coalescent simulation") +note( + "We sample 6 diploid individuals (so 12 chromosomes) from a " + "population of effective size 10,000 and simulate their shared " + "ancestry back to a common ancestor." +) + +ancestry = msprime.sim_ancestry( + samples=6, + population_size=10_000, + random_seed=42, +) + +note(f"Tree sequence summary:") +display(HTML(f"
{ancestry}
"), append=True) + +# With no recombination there is a single tree spanning the whole +# sequence. Pull it out and look at it. +tree = ancestry.first() +note( + f"Number of trees: {ancestry.num_trees}. " + f"Time to most recent common ancestor (TMRCA): " + f"{tree.tmrca(0, 1):.0f} generations " + f"(for samples 0 and 1)." +) + +heading("The genealogy as text") +note( + "Tskit can render trees as ASCII art. Leaves are present-day " + "samples; internal nodes are inferred ancestors with their " + "ages in generations." +) +display(HTML(f"
{tree.draw_text()}
"), append=True) diff --git a/examples/msprime/ancestry_basics/config.toml b/examples/msprime/ancestry_basics/config.toml new file mode 100644 index 0000000..f064b8e --- /dev/null +++ b/examples/msprime/ancestry_basics/config.toml @@ -0,0 +1 @@ +packages = ["msprime", "matplotlib", "numpy"] diff --git a/examples/msprime/ancestry_basics/setup.py b/examples/msprime/ancestry_basics/setup.py new file mode 100644 index 0000000..3405b9f --- /dev/null +++ b/examples/msprime/ancestry_basics/setup.py @@ -0,0 +1,45 @@ +""" +Shim IPython's display API onto PyScript so example code written in a +Jupyter/IPython idiom runs unmodified in the browser. +""" + +import sys +import types +import js +from pyscript import window, HTML, display as _display + +js.alert = window.alert + + +def display(*args, **kwargs): + return _display( + *args, **kwargs, target=__pyscript_display_target__, + ) + + +ipython = types.ModuleType("IPython") +core = types.ModuleType("IPython.core") +core_display = types.ModuleType("IPython.core.display") +core_display.display = display +core_display.HTML = HTML +ipython.core = core +core.display = core_display +ipython.get_ipython = lambda: None +ipython.display = core_display +sys.modules["IPython"] = ipython +sys.modules["IPython.core"] = core +sys.modules["IPython.core.display"] = core_display +sys.modules["IPython.display"] = core_display + + +def heading(text, level=2): + display(HTML(f"{text}"), append=True) + + +def note(text): + display(HTML(f"

{text}

"), append=True) + + +import numpy as np +import matplotlib.pyplot as plt +import msprime diff --git a/examples/msprime/demography_two_populations/code.py b/examples/msprime/demography_two_populations/code.py new file mode 100644 index 0000000..695b55a --- /dev/null +++ b/examples/msprime/demography_two_populations/code.py @@ -0,0 +1,77 @@ +# --------------------------------------------------------------------- +# Building a demographic model: two populations that split in the past. +# --------------------------------------------------------------------- + +heading("Two populations diverging from a common ancestor") +note( + "We define a Demography with two present-day populations, A and B, " + "that split from an ancestral population ANC 2,000 generations ago. " + "Then we sample 20 diploids from each and compare their genetic " + "diversity (pi) and divergence (Fst)." +) + +demography = msprime.Demography() +demography.add_population(name="A", initial_size=10_000) +demography.add_population(name="B", initial_size=5_000) +demography.add_population(name="ANC", initial_size=10_000) +demography.add_population_split( + time=2_000, derived=["A", "B"], ancestral="ANC", +) + +note("Population table from the demography:") +display(HTML(f"
{demography.debug()}
"), append=True) + +ancestry = msprime.sim_ancestry( + samples={"A": 20, "B": 20}, + demography=demography, + sequence_length=2_000_000, + recombination_rate=1e-8, + random_seed=99, +) +mutated = msprime.sim_mutations(ancestry, rate=1e-8, random_seed=99) + +# Sample sets, expressed as lists of sample node IDs per population. +samples_A = mutated.samples(population=0) +samples_B = mutated.samples(population=1) + +# Diversity within each population, and Fst between them. +pi_A = mutated.diversity(sample_sets=samples_A) +pi_B = mutated.diversity(sample_sets=samples_B) +fst = mutated.Fst(sample_sets=[samples_A, samples_B]) + +note( + f"Variant sites: {mutated.num_sites:,}. " + f"Population A pi = {pi_A:.5f}, " + f"Population B pi = {pi_B:.5f}, " + f"Fst(A, B) = {fst:.4f}." +) + +heading("Allele frequency spectrum, per population") +note( + "The site frequency spectrum (SFS) counts variants by how many " + "samples carry the derived allele. Population A has more " + "diversity, so its spectrum extends further to the right." +) + +afs_A = mutated.allele_frequency_spectrum( + sample_sets=[samples_A], polarised=True, span_normalise=False, +) +afs_B = mutated.allele_frequency_spectrum( + sample_sets=[samples_B], polarised=True, span_normalise=False, +) + +# Drop the monomorphic (0 and n) bins for plotting. +freqs_A = afs_A[1:-1] +freqs_B = afs_B[1:-1] +x = np.arange(1, len(freqs_A) + 1) + +fig, ax = plt.subplots(figsize=(9, 4)) +width = 0.4 +ax.bar(x - width / 2, freqs_A, width, color="steelblue", label="A") +ax.bar(x + width / 2, freqs_B, width, color="indianred", label="B") +ax.set_title("Site frequency spectrum") +ax.set_xlabel("Derived allele count in sample") +ax.set_ylabel("Number of variant sites") +ax.legend() +fig.tight_layout() +display(fig, append=True) diff --git a/examples/msprime/demography_two_populations/config.toml b/examples/msprime/demography_two_populations/config.toml new file mode 100644 index 0000000..f064b8e --- /dev/null +++ b/examples/msprime/demography_two_populations/config.toml @@ -0,0 +1 @@ +packages = ["msprime", "matplotlib", "numpy"] diff --git a/examples/msprime/demography_two_populations/setup.py b/examples/msprime/demography_two_populations/setup.py new file mode 100644 index 0000000..fc9530d --- /dev/null +++ b/examples/msprime/demography_two_populations/setup.py @@ -0,0 +1,24 @@ +"""Lighter setup for later cells: same names, no IPython shim.""" +import js +from pyscript import window, HTML, display as _display + +js.alert = window.alert + + +def display(*args, **kwargs): + return _display( + *args, **kwargs, target=__pyscript_display_target__, + ) + + +def heading(text, level=2): + display(HTML(f"{text}"), append=True) + + +def note(text): + display(HTML(f"

{text}

"), append=True) + + +import numpy as np +import matplotlib.pyplot as plt +import msprime diff --git a/examples/msprime/order.json b/examples/msprime/order.json new file mode 100644 index 0000000..0d06b5f --- /dev/null +++ b/examples/msprime/order.json @@ -0,0 +1,5 @@ +[ + "ancestry_basics", + "recombination_and_mutations", + "demography_two_populations" +] diff --git a/examples/msprime/recombination_and_mutations/code.py b/examples/msprime/recombination_and_mutations/code.py new file mode 100644 index 0000000..34f076b --- /dev/null +++ b/examples/msprime/recombination_and_mutations/code.py @@ -0,0 +1,70 @@ +# --------------------------------------------------------------------- +# Adding recombination and layering mutations onto the genealogy. +# --------------------------------------------------------------------- + +heading("Recombination breaks the chromosome into pieces") +note( + "With recombination, different segments of the chromosome have " + "different genealogical trees. We simulate a 1 Mb chromosome " + "for 10 diploid samples." +) + +ancestry = msprime.sim_ancestry( + samples=10, + population_size=10_000, + sequence_length=1_000_000, + recombination_rate=1e-8, + random_seed=7, +) + +note( + f"Trees along the 1 Mb chromosome: " + f"{ancestry.num_trees}." +) + +# Each tree spans an interval. Show the first few. +spans = [] +for tree in ancestry.trees(): + spans.append((tree.interval.left, tree.interval.right, tree.num_edges)) + if len(spans) >= 5: + break + +note("First five trees (interval and edge count):") +rows = "".join( + f"{l:,.0f}{r:,.0f}{e}" + for l, r, e in spans +) +display(HTML( + "" + "" + f"{rows}
leftrightedges
" +), append=True) + +heading("Sprinkle mutations onto the genealogy") +note( + "sim_mutations adds neutral mutations along the branches at a " + "given per-site, per-generation rate. The result is genetic " + "variation we can analyze." +) + +mutated = msprime.sim_mutations( + ancestry, + rate=1e-8, + random_seed=7, +) + +note( + f"Variant sites generated: " + f"{mutated.num_sites}." +) + +# Plot the distribution of variant positions along the chromosome. +positions = np.array([site.position for site in mutated.sites()]) + +fig, ax = plt.subplots(figsize=(9, 3)) +ax.hist(positions, bins=40, color="seagreen", edgecolor="white") +ax.set_title("Distribution of mutations along the 1 Mb chromosome") +ax.set_xlabel("Position (bp)") +ax.set_ylabel("Number of variant sites") +fig.tight_layout() +display(fig, append=True) diff --git a/examples/msprime/recombination_and_mutations/config.toml b/examples/msprime/recombination_and_mutations/config.toml new file mode 100644 index 0000000..f064b8e --- /dev/null +++ b/examples/msprime/recombination_and_mutations/config.toml @@ -0,0 +1 @@ +packages = ["msprime", "matplotlib", "numpy"] diff --git a/examples/msprime/recombination_and_mutations/setup.py b/examples/msprime/recombination_and_mutations/setup.py new file mode 100644 index 0000000..fc9530d --- /dev/null +++ b/examples/msprime/recombination_and_mutations/setup.py @@ -0,0 +1,24 @@ +"""Lighter setup for later cells: same names, no IPython shim.""" +import js +from pyscript import window, HTML, display as _display + +js.alert = window.alert + + +def display(*args, **kwargs): + return _display( + *args, **kwargs, target=__pyscript_display_target__, + ) + + +def heading(text, level=2): + display(HTML(f"{text}"), append=True) + + +def note(text): + display(HTML(f"

{text}

"), append=True) + + +import numpy as np +import matplotlib.pyplot as plt +import msprime From 9ae4d7e2202ff6adf807a1b34a1e6a76a038ed8f Mon Sep 17 00:00:00 2001 From: "Nicholas H.Tollervey" Date: Thu, 4 Jun 2026 17:46:46 +0100 Subject: [PATCH 2/2] Fix imports. --- examples/msprime/ancestry_basics/code.py | 5 +++++ examples/msprime/ancestry_basics/setup.py | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/msprime/ancestry_basics/code.py b/examples/msprime/ancestry_basics/code.py index 3310a7f..1d12b92 100644 --- a/examples/msprime/ancestry_basics/code.py +++ b/examples/msprime/ancestry_basics/code.py @@ -9,6 +9,11 @@ """ from IPython.core.display import display, HTML +import numpy as np +import matplotlib.pyplot as plt +import msprime + + heading("A small coalescent simulation") note( "We sample 6 diploid individuals (so 12 chromosomes) from a " diff --git a/examples/msprime/ancestry_basics/setup.py b/examples/msprime/ancestry_basics/setup.py index 3405b9f..84faac4 100644 --- a/examples/msprime/ancestry_basics/setup.py +++ b/examples/msprime/ancestry_basics/setup.py @@ -38,8 +38,3 @@ def heading(text, level=2): def note(text): display(HTML(f"

{text}

"), append=True) - - -import numpy as np -import matplotlib.pyplot as plt -import msprime