pyscript · ntoll · May 29, 2026 · Jun 4, 2026
diff --git a/examples/fastparquet/README.md b/examples/fastparquet/README.md
@@ -0,0 +1,18 @@
+# fastparquet Examples
+
+Each sub-directory contains a self-contained example. The order in
+which the examples are to appear is specified in `order.json` (an
+array of directory names in the expected order).
+
+In each example directory you'll find:
+
+* `config.toml` - must conform to the specification outlined here:
+  https://docs.pyscript.net/latest/user-guide/configuration/ This is
+  parsed and ultimately turned into a JSON representation as part of
+  the package's API object.
+* `setup.py` - Python code for contextual and environmental setup,
+  NOT SEEN BY THE END USER, but is run before the `code.py` code is
+  evaluated. Allows us to create useful (IPython) shims, avoid
+  repeating boilerplate and whatnot.
+* `code.py` - the actual code added to the editor which forms the
+  practical example of using the package.
diff --git a/examples/fastparquet/column_selection_and_categories/code.py b/examples/fastparquet/column_selection_and_categories/code.py
@@ -0,0 +1,55 @@
+# ---------------------------------------------------------------------
+# Reading just the columns you need, and decoding low-cardinality
+# string columns as pandas Categoricals for memory savings.
+# ---------------------------------------------------------------------
+
+heading("Selective column reads")
+note(
+    "Parquet's columnar layout means you can read just the columns "
+    "you need without scanning the rest. We'll write a wider dataset "
+    "and then load only two of its columns."
+)
+
+# A wider table: orders from a fictional online bookshop.
+n_orders = 2000
+genres = ["Fiction", "Non-fiction", "Poetry", "Science", "History"]
+countries = ["UK", "US", "DE", "FR", "JP", "BR"]
+
+orders = pd.DataFrame({
+    "order_id": np.arange(n_orders),
+    "genre": pd.Categorical(rng.choice(genres, size=n_orders), categories=genres),
+    "country": rng.choice(countries, size=n_orders),
+    "price": rng.uniform(5, 35, size=n_orders).round(2),
+    "quantity": rng.integers(1, 6, size=n_orders),
+    "discount": rng.uniform(0, 0.4, size=n_orders).round(3),
+    "shipping": rng.uniform(0, 8, size=n_orders).round(2),
+})
+
+write("/tmp/orders.parq", orders, compression="SNAPPY")
+parquet_file = ParquetFile("/tmp/orders.parq")
+
+note(f"All columns in the file: <code>{parquet_file.columns}</code>")
+
+# Load only two columns, and treat 'genre' as a categorical.
+slim = parquet_file.to_pandas(
+    columns=["genre", "price"],
+    categories=["genre"],
+)
+
+note("Loaded only <code>genre</code> and <code>price</code>:")
+display(slim.head(), append=True)
+note(
+    f"<code>genre</code> dtype is "
+    f"<strong>{slim['genre'].dtype}</strong> "
+    f"with categories <code>{list(slim['genre'].cat.categories)}</code>."
+)
+
+# A quick aggregate and bar chart from the slim frame.
+avg_by_genre = slim.groupby("genre", observed=True)["price"].mean().sort_values()
+
+fig, ax = plt.subplots(figsize=(8, 4))
+avg_by_genre.plot(kind="barh", ax=ax, color="teal")
+ax.set_title("Average book price by genre")
+ax.set_xlabel("Average price ($)")
+fig.tight_layout()
+display(fig, append=True)
diff --git a/examples/fastparquet/column_selection_and_categories/config.toml b/examples/fastparquet/column_selection_and_categories/config.toml
@@ -0,0 +1 @@
+packages = ["fastparquet", "pandas", "numpy", "matplotlib"]
diff --git a/examples/fastparquet/column_selection_and_categories/setup.py b/examples/fastparquet/column_selection_and_categories/setup.py
@@ -0,0 +1,27 @@
+"""Setup for cell 2: same names as cell 1, without the IPython shim."""
+import js
+from pyscript import window, HTML, display as _display
+
+js.alert = window.alert
+
+
+def display(*args, **kwargs):
+    return _display(
+        *args, **kwargs, target=__pyscript_display_target__,
+    )
+
+
+def heading(text, level=2):
+    display(HTML(f"<h{level}>{text}</h{level}>"), append=True)
+
+
+def note(text):
+    display(HTML(f"<p>{text}</p>"), append=True)
+
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from fastparquet import ParquetFile, write
+
+rng = np.random.default_rng(7)
diff --git a/examples/fastparquet/order.json b/examples/fastparquet/order.json
@@ -0,0 +1,5 @@
+[
+    "write_and_read",
+    "column_selection_and_categories",
+    "row_groups_and_filters"
+]
diff --git a/examples/fastparquet/row_groups_and_filters/code.py b/examples/fastparquet/row_groups_and_filters/code.py
@@ -0,0 +1,71 @@
+# ---------------------------------------------------------------------
+# Splitting a file into row groups and using filters to skip groups
+# whose statistics rule them out. This is the foundation of fast
+# analytical reads on large Parquet datasets.
+# ---------------------------------------------------------------------
+
+heading("Row groups: chunking a file for selective reads")
+note(
+    "A row group is a horizontal slice of the table. Each group "
+    "stores per-column min/max statistics, so a reader can skip "
+    "entire groups that can't possibly match a filter."
+)
+
+# Half a million sensor readings, sorted by timestamp so row groups
+# correspond to contiguous time windows.
+n_readings = 50_000
+sensors = pd.DataFrame({
+    "timestamp": pd.date_range("2026-01-01", periods=n_readings, freq="min"),
+    "sensor_id": rng.integers(0, 20, size=n_readings),
+    "value": rng.normal(100, 15, size=n_readings).round(3),
+}).sort_values("timestamp").reset_index(drop=True)
+
+# row_group_offsets carves the table into chunks at the given row
+# indices. Here: ten groups of 5,000 rows each.
+offsets = list(range(0, n_readings, 5_000))
+write(
+    "/tmp/sensors.parq",
+    sensors,
+    row_group_offsets=offsets,
+    compression="SNAPPY",
+)
+
+parquet_file = ParquetFile("/tmp/sensors.parq")
+note(
+    f"File has <strong>{len(parquet_file.row_groups)}</strong> row groups, "
+    f"<strong>{parquet_file.count()}</strong> rows total."
+)
+
+# Per-row-group time ranges from the file's statistics.
+stats = parquet_file.statistics
+group_ranges = pd.DataFrame({
+    "min_timestamp": pd.to_datetime(stats["min"]["timestamp"]),
+    "max_timestamp": pd.to_datetime(stats["max"]["timestamp"]),
+    "min_value": stats["min"]["value"],
+    "max_value": stats["max"]["value"],
+})
+note("First few row-group statistics:")
+display(group_ranges.head().round(3), append=True)
+
+# A filter is a list of (column, op, value) tuples. fastparquet uses
+# row-group statistics to skip groups that can't match.
+target_sensor = 7
+filters = [
+    ("sensor_id", "==", target_sensor),
+    ("value", ">", 120.0),
+]
+
+filtered = parquet_file.to_pandas(filters=filters)
+# Filters operate at row-group granularity, so apply them again
+# row-wise for an exact result.
+exact = filtered[
+    (filtered["sensor_id"] == target_sensor)
+    & (filtered["value"] > 120.0)
+]
+
+heading("Filtered result")
+note(
+    f"Rows where <code>sensor_id == {target_sensor}</code> and "
+    f"<code>value &gt; 120</code>: <strong>{len(exact)}</strong>."
+)
+display(exact.head(), append=True)
diff --git a/examples/fastparquet/row_groups_and_filters/config.toml b/examples/fastparquet/row_groups_and_filters/config.toml
@@ -0,0 +1 @@
+packages = ["fastparquet", "pandas", "numpy"]
diff --git a/examples/fastparquet/row_groups_and_filters/setup.py b/examples/fastparquet/row_groups_and_filters/setup.py
@@ -0,0 +1,26 @@
+"""Setup for cell 3: same names again, no IPython shim."""
+import js
+from pyscript import window, HTML, display as _display
+
+js.alert = window.alert
+
+
+def display(*args, **kwargs):
+    return _display(
+        *args, **kwargs, target=__pyscript_display_target__,
+    )
+
+
+def heading(text, level=2):
+    display(HTML(f"<h{level}>{text}</h{level}>"), append=True)
+
+
+def note(text):
+    display(HTML(f"<p>{text}</p>"), append=True)
+
+
+import numpy as np
+import pandas as pd
+from fastparquet import ParquetFile, write
+
+rng = np.random.default_rng(7)
diff --git a/examples/fastparquet/write_and_read/code.py b/examples/fastparquet/write_and_read/code.py
@@ -0,0 +1,57 @@
+"""
+A first taste of fastparquet: write a DataFrame to a Parquet file
+in the in-memory virtual filesystem, then read it back.
+
+Parquet is a columnar storage format that's compact, fast, and
+preserves dtypes. fastparquet is a pure-Python(+Cython) implementation
+that integrates closely with pandas.
+
+Docs: https://fastparquet.readthedocs.io
+"""
+from IPython.core.display import display, HTML
+import numpy as np
+import pandas as pd
+from fastparquet import ParquetFile, write
+
+rng = np.random.default_rng(7)
+
+
+# A small synthetic dataset: temperature readings from weather stations.
+n_rows = 500
+stations = ["Reykjavik", "Lisbon", "Cairo", "Singapore", "Wellington"]
+
+readings = pd.DataFrame({
+    "station": rng.choice(stations, size=n_rows),
+    "timestamp": pd.date_range("2026-01-01", periods=n_rows, freq="h"),
+    "temperature_c": rng.normal(15, 8, size=n_rows).round(2),
+    "humidity_pct": rng.uniform(30, 95, size=n_rows).round(1),
+})
+
+heading("1. Original DataFrame")
+note("Five hundred hourly readings from five weather stations.")
+display(readings.head(), append=True)
+
+# Write to a Parquet file. Pyodide gives us a writable in-memory
+# filesystem, so we can use a regular path.
+path = "/tmp/weather.parq"
+write(path, readings, compression="SNAPPY")
+note(f"Wrote {len(readings)} rows to <code>{path}</code> with Snappy compression.")
+
+# Read it back. ParquetFile gives us metadata first, without loading
+# the whole file into memory.
+parquet_file = ParquetFile(path)
+
+heading("2. Parquet file metadata")
+note(
+    f"Columns: <code>{parquet_file.columns}</code><br>"
+    f"Rows: <strong>{parquet_file.count()}</strong><br>"
+    f"Row groups: <strong>{len(parquet_file.row_groups)}</strong>"
+)
+
+heading("3. Round-tripped DataFrame")
+roundtripped = parquet_file.to_pandas()
+display(roundtripped.head(), append=True)
+note(
+    f"Dtypes preserved? "
+    f"<strong>{(readings.dtypes == roundtripped.dtypes).all()}</strong>"
+)
diff --git a/examples/fastparquet/write_and_read/config.toml b/examples/fastparquet/write_and_read/config.toml
@@ -0,0 +1 @@
+packages = ["fastparquet", "pandas", "numpy"]
diff --git a/examples/fastparquet/write_and_read/setup.py b/examples/fastparquet/write_and_read/setup.py
@@ -0,0 +1,41 @@
+"""
+Shim IPython's display API onto PyScript so example code written in a
+Jupyter/IPython idiom runs unmodified in the browser.
+"""
+
+import sys
+import types
+import js
+from pyscript import window, HTML, display as _display
+
+js.alert = window.alert
+
+
+def display(*args, **kwargs):
+    """Wrap pyscript.display so output lands in the example target."""
+    return _display(
+        *args, **kwargs, target=__pyscript_display_target__,
+    )
+
+
+ipython = types.ModuleType("IPython")
+core = types.ModuleType("IPython.core")
+core_display = types.ModuleType("IPython.core.display")
+core_display.display = display
+core_display.HTML = HTML
+ipython.core = core
+core.display = core_display
+ipython.get_ipython = lambda: None
+ipython.display = core_display
+sys.modules["IPython"] = ipython
+sys.modules["IPython.core"] = core
+sys.modules["IPython.core.display"] = core_display
+sys.modules["IPython.display"] = core_display
+
+
+def heading(text, level=2):
+    display(HTML(f"<h{level}>{text}</h{level}>"), append=True)
+
+
+def note(text):
+    display(HTML(f"<p>{text}</p>"), append=True)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		packages = ["fastparquet", "pandas", "numpy", "matplotlib"]