stac-geoparquet¶
stac-geoparquet is a relatively new specification that describes how to store STAC items in a geoparquet. Use stacrs to read and write stac-geoparquet.
In [2]:
Copied!
import os.path
import stacrs
import humanize
items = stacrs.search(
"https://landsatlook.usgs.gov/stac-server",
collections="landsat-c2l2-sr",
intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
sortby="-properties.datetime",
max_items=1000,
)
stacrs.write("items.json", items) # GeoJSON feature collection
stacrs.write("items.ndjson", items) # newline-delimited JSON
stacrs.write("items.parquet", items) # stac-geoparquet
stacrs.write( # compressed stac-geoparquet
"items-compressed.parquet", items, format="parquet[snappy]"
)
for file_name in (
"items.json",
"items.ndjson",
"items.parquet",
"items-compressed.parquet",
):
print(f"{file_name}: {humanize.naturalsize(os.path.getsize(file_name))}")
items = stacrs.read("items-compressed.parquet")
assert len(items["features"]) == 1000
import os.path
import stacrs
import humanize
items = stacrs.search(
"https://landsatlook.usgs.gov/stac-server",
collections="landsat-c2l2-sr",
intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
sortby="-properties.datetime",
max_items=1000,
)
stacrs.write("items.json", items) # GeoJSON feature collection
stacrs.write("items.ndjson", items) # newline-delimited JSON
stacrs.write("items.parquet", items) # stac-geoparquet
stacrs.write( # compressed stac-geoparquet
"items-compressed.parquet", items, format="parquet[snappy]"
)
for file_name in (
"items.json",
"items.ndjson",
"items.parquet",
"items-compressed.parquet",
):
print(f"{file_name}: {humanize.naturalsize(os.path.getsize(file_name))}")
items = stacrs.read("items-compressed.parquet")
assert len(items["features"]) == 1000
items.json: 21.4 MB items.ndjson: 21.4 MB items.parquet: 488.2 kB items-compressed.parquet: 178.8 kB
In [3]:
Copied!
import contextily
import stacrs
from geopandas import GeoDataFrame
url = "https://github.com/gadomski/2024-09-FOSS4G-NA-stac-rs/raw/refs/heads/main/benches/1000-sentinel-2-items-stac-rs.parquet"
items = stacrs.search(
url, intersects={"type": "Point", "coordinates": [-105.1019, 40.1672]}
)
data_frame = GeoDataFrame.from_features(items, crs="EPSG:4326")
axis = data_frame.plot(facecolor="none")
contextily.add_basemap(axis, crs=data_frame.crs.to_string())
import contextily
import stacrs
from geopandas import GeoDataFrame
url = "https://github.com/gadomski/2024-09-FOSS4G-NA-stac-rs/raw/refs/heads/main/benches/1000-sentinel-2-items-stac-rs.parquet"
items = stacrs.search(
url, intersects={"type": "Point", "coordinates": [-105.1019, 40.1672]}
)
data_frame = GeoDataFrame.from_features(items, crs="EPSG:4326")
axis = data_frame.plot(facecolor="none")
contextily.add_basemap(axis, crs=data_frame.crs.to_string())
Search to¶
If you want to save the results of a STAC API search directly to a file, use search_to
.
This will generally be more performant than using search
then write
.
In [4]:
Copied!
stacrs.search_to("items-compressed.parquet",
"https://landsatlook.usgs.gov/stac-server",
collections="landsat-c2l2-sr",
intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
sortby="-properties.datetime",
max_items=1000,
format="parquet[snappy]",
)
stacrs.search_to("items-compressed.parquet",
"https://landsatlook.usgs.gov/stac-server",
collections="landsat-c2l2-sr",
intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
sortby="-properties.datetime",
max_items=1000,
format="parquet[snappy]",
)
Out[4]:
1000