stac-geoparquet¶
stac-geoparquet is a relatively new specification that describes how to store STAC items in a geoparquet. Use stacrs to read and write stac-geoparquet.
In [2]:
Copied!
import os.path
import stacrs
import humanize
items = stacrs.search(
"https://landsatlook.usgs.gov/stac-server",
collections="landsat-c2l2-sr",
intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
sortby="-properties.datetime",
max_items=1000,
)
stacrs.write("items.json", items) # GeoJSON feature collection
stacrs.write("items.ndjson", items) # newline-delimited JSON
stacrs.write("items.parquet", items) # stac-geoparquet
stacrs.write( # compressed stac-geoparquet
"items-compressed.parquet", items, format="parquet[snappy]"
)
for file_name in (
"items.json",
"items.ndjson",
"items.parquet",
"items-compressed.parquet",
):
print(f"{file_name}: {humanize.naturalsize(os.path.getsize(file_name))}")
items = stacrs.read("items-compressed.parquet")
assert len(items["features"]) == 1000
import os.path
import stacrs
import humanize
items = stacrs.search(
"https://landsatlook.usgs.gov/stac-server",
collections="landsat-c2l2-sr",
intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
sortby="-properties.datetime",
max_items=1000,
)
stacrs.write("items.json", items) # GeoJSON feature collection
stacrs.write("items.ndjson", items) # newline-delimited JSON
stacrs.write("items.parquet", items) # stac-geoparquet
stacrs.write( # compressed stac-geoparquet
"items-compressed.parquet", items, format="parquet[snappy]"
)
for file_name in (
"items.json",
"items.ndjson",
"items.parquet",
"items-compressed.parquet",
):
print(f"{file_name}: {humanize.naturalsize(os.path.getsize(file_name))}")
items = stacrs.read("items-compressed.parquet")
assert len(items["features"]) == 1000
items.json: 21.4 MB items.ndjson: 21.4 MB items.parquet: 488.2 kB items-compressed.parquet: 178.8 kB
In [3]:
Copied!
import contextily
import stacrs
from geopandas import GeoDataFrame
url = "https://github.com/gadomski/2024-09-FOSS4G-NA-stac-rs/raw/refs/heads/main/benches/1000-sentinel-2-items-stac-rs.parquet"
items = stacrs.search(
url, intersects={"type": "Point", "coordinates": [-105.1019, 40.1672]}
)
data_frame = GeoDataFrame.from_features(items, crs="EPSG:4326")
axis = data_frame.plot(facecolor="none")
contextily.add_basemap(axis, crs=data_frame.crs.to_string())
import contextily
import stacrs
from geopandas import GeoDataFrame
url = "https://github.com/gadomski/2024-09-FOSS4G-NA-stac-rs/raw/refs/heads/main/benches/1000-sentinel-2-items-stac-rs.parquet"
items = stacrs.search(
url, intersects={"type": "Point", "coordinates": [-105.1019, 40.1672]}
)
data_frame = GeoDataFrame.from_features(items, crs="EPSG:4326")
axis = data_frame.plot(facecolor="none")
contextily.add_basemap(axis, crs=data_frame.crs.to_string())
Search to¶
If you want to save the results of a STAC API search directly to a file, use search_to
.
This will generally be more performant than using search
then write
.
In [4]:
Copied!
stacrs.search_to(
"items-compressed.parquet",
"https://landsatlook.usgs.gov/stac-server",
collections="landsat-c2l2-sr",
intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
sortby="-properties.datetime",
max_items=1000,
format="parquet[snappy]",
)
stacrs.search_to(
"items-compressed.parquet",
"https://landsatlook.usgs.gov/stac-server",
collections="landsat-c2l2-sr",
intersects={"type": "Point", "coordinates": [-105.119, 40.173]},
sortby="-properties.datetime",
max_items=1000,
format="parquet[snappy]",
)
Out[4]:
1000
Search performance¶
Here's a performance comparison when searching with stacrs versus pystac-client.
In [3]:
Copied!
import time
from pystac_client import Client
import stacrs
client = Client.open("https://stac.eoapi.dev")
start = time.time()
item_search = client.search(collections=["WildFires-LosAngeles-Jan-2025"])
items = list(item_search.items_as_dicts())
end = time.time()
print(f"pystac-client: {end - start}s ({len(items)} items)")
start = time.time()
items = await stacrs.search(
"https://stac.eoapi.dev", collections=["WildFires-LosAngeles-Jan-2025"]
)
end = time.time()
print(f"stacrs: {end - start}s ({len(items)} items)")
import time
from pystac_client import Client
import stacrs
client = Client.open("https://stac.eoapi.dev")
start = time.time()
item_search = client.search(collections=["WildFires-LosAngeles-Jan-2025"])
items = list(item_search.items_as_dicts())
end = time.time()
print(f"pystac-client: {end - start}s ({len(items)} items)")
start = time.time()
items = await stacrs.search(
"https://stac.eoapi.dev", collections=["WildFires-LosAngeles-Jan-2025"]
)
end = time.time()
print(f"stacrs: {end - start}s ({len(items)} items)")
pystac-client: 7.5231709480285645s (467 items) stacrs: 7.2893760204315186s (467 items)