Source code for stac_check.lint

import importlib.metadata
import importlib.resources
import json
import os
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple, Union

import requests
import yaml
from dotenv import load_dotenv
from stac_validator.utilities import is_valid_url
from stac_validator.validate import StacValidate

load_dotenv()


[docs] @dataclass class Linter: """A class for linting STAC JSON files and generating validation messages. Args: item (Union[str, dict]): A URL, file name, or dictionary representing a STAC JSON file. config_file (Optional[str], optional): A path to a YAML configuration file. Defaults to None. assets (bool, optional): A boolean value indicating whether to validate assets. Defaults to False. links (bool, optional): A boolean value indicating whether to validate links. Defaults to False. recursive (bool, optional): A boolean value indicating whether to perform recursive validation. Defaults to False. max_depth (Optional[int], optional): An optional integer indicating the maximum depth to validate recursively. Defaults to None. assets_open_urls (bool): Whether to open assets URLs when validating assets. Defaults to True. headers (dict): HTTP headers to include in the requests. pydantic (bool, optional): A boolean value indicating whether to use pydantic validation. Defaults to False. verbose (bool, optional): A boolean value indicating whether to enable verbose output. Defaults to False. Attributes: data (dict): A dictionary representing the STAC JSON file. message (dict): A dictionary containing the validation message for the STAC JSON file. config (dict): A dictionary containing the configuration settings. asset_type (str): A string representing the asset type, if one is specified. version (str): A string representing the version of the STAC standard used in the STAC JSON file. validator_version (str): A string representing the version of the STAC validator used to validate the STAC JSON file. validate_all (dict): A dictionary containing the validation message for all STAC JSON files found recursively, if recursive validation was performed. valid_stac (bool): A boolean value indicating whether the STAC JSON file is valid. error_type (str): A string representing the type of error in the STAC JSON file, if one exists. error_msg (str): A string representing the error message in the STAC JSON file, if one exists. invalid_asset_format (List[str]): A list of URLs with invalid asset formats, if assets were validated. invalid_asset_request (List[str]): A list of URLs with invalid asset requests, if assets were validated. invalid_link_format (List[str]): A list of URLs with invalid link formats, if links were validated. invalid_link_request (List[str]): A list of URLs with invalid link requests, if links were validated. schema (List[str]): A list of the STAC JSON file's JSON schema files. object_id (str): A string representing the STAC JSON file's ID. file_name (str): A string representing the name of the file containing the STAC JSON data. best_practices_msg (str): A string representing best practices messages for the STAC JSON file. geometry_errors_msg (str): A string representing geometry-related error messages for the STAC JSON file. Methods: parse_config(config_file: Optional[str] = None) -> Dict: Parses a YAML configuration file and returns a dictionary with the configuration settings. def get_asset_name(self, file: Union[str, Dict] = None) -> str: Returns the name of a file. load_data(self, file: Union[str, Dict]) -> Dict: Loads a STAC JSON file from a URL or file path and returns a dictionary representation. validate_file(self, file: Union[str, dict]) -> Dict[str, Any]: Validates a STAC JSON file and returns a dictionary with the validation message. recursive_validation(self, file: Union[str, Dict[str, Any]]) -> str: Validates a STAC JSON file recursively and returns a dictionary with the validation message. set_update_message(self) -> str: Sets a message regarding the recommended version of the STAC JSON file standard. check_links_assets(self, num_links: int, url_type: str, format_type: str) -> List[str]: Checks whether the STAC JSON file has links or assets with invalid formats or requests. check_error_type(self) -> str: Checks whether the STAC JSON file has an error type. check_error_message(self) -> str: Checks whether the STAC JSON file has an error message. def check_summaries(self) -> bool: Checks whether the STAC JSON file has summaries. check_bloated_links(self, max_links: Optional[int] = 20) -> bool: Checks whether the STAC JSON file has bloated links. check_bloated_metadata(self, max_properties: Optional[int] = 20) -> bool: Checks whether the STAC JSON file has bloated metadata. check_datetime_null(self) -> bool: Checks whether the STAC JSON file has a null datetime. check_unlocated(self) -> bool: Checks whether the STAC JSON file has unlocated items. check_geometry_null(self) -> bool: Checks whether the STAC JSON file has a null geometry. check_searchable_identifiers(self) -> bool: Checks whether the STAC JSON file has searchable identifiers. check_bbox_antimeridian(self) -> bool: Checks if a bbox that crosses the antimeridian is correctly formatted. check_percent_encoded(self) -> bool: Checks whether the STAC JSON file has percent-encoded characters. check_thumbnail(self) -> bool: Checks whether the STAC JSON file has a thumbnail. check_links_title_field(self) -> bool: Checks whether the STAC JSON file has a title field in its links. check_links_self(self) -> bool: Checks whether the STAC JSON file has a self link. check_item_id_file_name(self) -> bool: Checks whether the filename of an Item conforms to the STAC specification. check_catalog_file_name(self) -> str: Checks whether the filename of a Catalog or Collection conforms to the STAC specification. create_best_practices_dict(self) -> Dict[str, Any]: Creates a dictionary with best practices recommendations for the STAC JSON file. create_best_practices_msg(self) -> List[str]: Creates a message with best practices recommendations for the STAC JSON file. create_geometry_errors_msg(self) -> List[str]: Creates a message with geometry-related error messages for the STAC JSON file. """ item: Union[str, Dict] config_file: Optional[str] = None assets: bool = False links: bool = False recursive: bool = False max_depth: Optional[int] = None assets_open_urls: bool = True headers: Dict = field(default_factory=dict) pydantic: bool = False verbose: bool = False def __post_init__(self): # Check if pydantic validation is requested but not installed if self.pydantic: try: importlib.import_module("stac_pydantic") except ImportError: import warnings warnings.warn( "stac-pydantic is not installed. Pydantic validation will be disabled. " "Install it with: pip install stac-check[pydantic]", UserWarning, stacklevel=2, ) self.pydantic = False self.data = self.load_data(self.item) self.message = self.validate_file(self.item) self.config = self.parse_config(self.config_file) from .utilities import determine_asset_type # Set message fields using the get_message_field method self.asset_type = self.get_message_field("asset_type") # If asset_type is not in message, determine it from the data if self.asset_type == "" and isinstance(self.data, dict): self.asset_type = determine_asset_type(self.data) self.version = self.get_message_field("version") self.valid_stac = self.get_message_field("valid_stac") self.validator_version = importlib.metadata.distribution( "stac-validator" ).version self.validate_all = self.recursive_validation(self.item) # Set error and info fields self.error_type = self.get_message_field("error_type") self.error_msg = self.get_message_field("error_message") self.failed_schema = self.get_message_field("failed_schema") self.recommendation = self.get_message_field("recommendation") self.verbose_error_msg = self.get_message_field("error_verbose") self.invalid_asset_format = ( self.check_links_assets(10, "assets", "format") if self.assets else None ) self.invalid_asset_request = ( self.check_links_assets(10, "assets", "request") if self.assets else None ) self.invalid_link_format = ( self.check_links_assets(10, "links", "format") if self.links else None ) self.invalid_link_request = ( self.check_links_assets(10, "links", "request") if self.links else None ) self.schema = self.message["schema"] if "schema" in self.message else [] self.object_id = self.data["id"] if "id" in self.data else "" self.file_name = self.get_asset_name(self.item) self.best_practices_msg = self.create_best_practices_msg() self.geometry_errors_msg = self.create_geometry_errors_msg()
[docs] @staticmethod def parse_config(config_file: Optional[str] = None) -> Dict: """Parse the configuration file for STAC checks. The method first looks for a file path specified in the `STAC_CHECK_CONFIG` environment variable. If the variable is defined, the method loads the YAML configuration file located at that path. Otherwise, it loads the default configuration file packaged with the `stac-check` module. If `config_file` is specified, the method also loads the YAML configuration file located at that path and merges its contents with the default or environment-based configuration. Args: config_file (str): The path to the YAML configuration file. Returns: A dictionary containing the parsed configuration values. Raises: IOError: If `config_file` is specified but cannot be read. yaml.YAMLError: If any YAML syntax errors occur while parsing the configuration file(s). """ default_config_file = os.getenv("STAC_CHECK_CONFIG") if default_config_file: with open(default_config_file) as f: default_config = yaml.load(f, Loader=yaml.FullLoader) else: config_file_path = importlib.resources.files("stac_check").joinpath( "stac-check.config.yml" ) with importlib.resources.as_file(config_file_path) as path: with open(path) as f: default_config = yaml.load(f, Loader=yaml.FullLoader) if config_file: with open(config_file) as f: config = yaml.load(f, Loader=yaml.FullLoader) default_config.update(config) return default_config
[docs] def get_asset_name(self, file: Union[str, Dict] = None) -> str: """Extracts the name of an asset from its file path or from a STAC item asset dictionary. Args: file (Union[str, dict], optional): A string representing the file path to the asset or a dictionary representing the asset as specified in a STAC item's `assets` property. Returns: A string containing the name of the asset. Raises: TypeError: If the input `file` is not a string or a dictionary. """ if isinstance(file, str): return os.path.basename(file).split(".")[0] else: return file["id"]
[docs] def load_data(self, file: Union[str, Dict]) -> Dict: """Loads JSON data from a file or URL. Args: file (Union[str, Dict]): A string representing the path to a JSON file or a dictionary containing the JSON data. Returns: A dictionary containing the loaded JSON data. Raises: TypeError: If the input `file` is not a string or dictionary. ValueError: If `file` is a string that doesn't represent a valid URL or file path. requests.exceptions.RequestException: If there is an error making a request to a URL. JSONDecodeError: If the JSON data cannot be decoded. FileNotFoundError: If the specified file cannot be found. """ if isinstance(file, str): if is_valid_url(file): resp = requests.get(file, headers=self.headers) data = resp.json() else: with open(file) as json_file: data = json.load(json_file) return data else: return file
[docs] def validate_file(self, file: Union[str, dict]) -> Dict[str, Any]: """Validates the given file path or STAC dictionary against the validation schema. Args: file (Union[str, dict]): A string representing the file path to the STAC file or a dictionary representing the STAC item. Returns: A dictionary containing the results of the validation, including the status of the validation and any errors encountered. Raises: ValueError: If `file` is not a valid file path or STAC dictionary. """ if isinstance(file, str): stac = StacValidate( file, links=self.links, assets=self.assets, assets_open_urls=self.assets_open_urls, headers=self.headers, pydantic=self.pydantic, verbose=self.verbose, ) stac.run() elif isinstance(file, dict): stac = StacValidate( assets_open_urls=self.assets_open_urls, headers=self.headers, pydantic=self.pydantic, ) stac.validate_dict(file) else: raise ValueError("Input must be a file path or STAC dictionary.") return stac.message[0]
[docs] def recursive_validation(self, file: Union[str, Dict[str, Any]]) -> str: """Recursively validate a STAC item or catalog file and its child items. Args: file (Union[str, Dict[str, Any]]): A string representing the file path to the STAC item or catalog, or a dictionary representing the STAC item or catalog. Returns: A string containing the validation message. Raises: TypeError: If the input `file` is not a string or a dictionary. """ if self.recursive: if isinstance(file, str): stac = StacValidate( file, recursive=True, max_depth=self.max_depth, assets_open_urls=self.assets_open_urls, headers=self.headers, pydantic=self.pydantic, ) stac.run() else: stac = StacValidate( recursive=True, max_depth=self.max_depth, assets_open_urls=self.assets_open_urls, headers=self.headers, pydantic=self.pydantic, ) stac.validate_dict(file) return stac.message else: return "Recursive validation is disabled."
[docs] def set_update_message(self) -> str: """Returns a message for users to update their STAC version. Returns: A string containing a message for users to update their STAC version. """ if not self.version: return "Please upgrade to STAC version 1.1.0!" elif self.version != "1.1.0": return f"Please upgrade from version {self.version} to version 1.1.0!" else: return "Thanks for using STAC version 1.1.0!"
[docs] def get_message_field(self, field_name: str) -> str: """Get a field from the validation message. Args: field_name: The name of the field to retrieve (e.g., 'error_type', 'error_message') Returns: The value of the field if it exists, otherwise an empty string. """ return self.message.get(field_name, "")
[docs] def check_summaries(self) -> bool: """Check if a Collection asset has a "summaries" property. Returns: A boolean indicating whether the Collection asset has a "summaries" property. """ if self.asset_type == "COLLECTION": return "summaries" in self.data else: return False
[docs] def check_bloated_metadata(self, max_properties: Optional[int] = 20) -> bool: """Checks whether a STAC item's metadata contains too many properties. Args: max_properties (int, optional): The maximum number of properties that the metadata can contain before it is considered too bloated. Defaults to 20. Returns: bool: True if the number of properties in the metadata exceeds the maximum number of properties specified by `max_properties`, False otherwise. """ if "properties" in self.data: return len(self.data["properties"].keys()) > max_properties return False
[docs] def check_datetime_null(self) -> bool: """Checks if the STAC item has a null datetime property. Returns: bool: A boolean indicating whether the datetime property is null (True) or not (False). """ if "properties" in self.data: if "datetime" in self.data["properties"]: if self.data["properties"]["datetime"] is None: return True else: return False return False
[docs] def check_unlocated(self) -> bool: """Checks if a STAC item is unlocated, i.e., has no geometry but has a bounding box. Returns: bool: True if the STAC item is unlocated, False otherwise. """ if "geometry" in self.data: return ( self.data.get("geometry") is None and self.data.get("bbox") is not None ) else: return False
[docs] def check_geometry_null(self) -> bool: """Checks if a STAC item has a null geometry property. Returns: bool: A boolean indicating whether the geometry property is null (True) or not (False). """ if "geometry" in self.data: return self.data.get("geometry") is None else: return False
[docs] def check_bbox_matches_geometry( self, ) -> Union[bool, Tuple[bool, List[float], List[float], List[float]]]: """Checks if the bbox of a STAC item matches its geometry. This function verifies that the bounding box (bbox) accurately represents the minimum bounding rectangle of the item's geometry. It only applies to items with non-null geometry of type Polygon or MultiPolygon. Returns: Union[bool, Tuple[bool, List[float], List[float], List[float]]]: - True if the bbox matches the geometry or if the check is not applicable (e.g., null geometry or non-polygon type). - When there's a mismatch: a tuple containing (False, calculated_bbox, actual_bbox, differences) """ # Skip check if geometry is null or bbox is not present if ( "geometry" not in self.data or self.data.get("geometry") is None or "bbox" not in self.data or self.data.get("bbox") is None ): return True geometry = self.data.get("geometry") bbox = self.data.get("bbox") # Only process Polygon and MultiPolygon geometries geom_type = geometry.get("type") if geom_type not in ["Polygon", "MultiPolygon"]: return True # Extract coordinates based on geometry type coordinates = [] if geom_type == "Polygon": # For Polygon, use the exterior ring (first element) if len(geometry.get("coordinates", [])) > 0: coordinates = geometry.get("coordinates")[0] elif geom_type == "MultiPolygon": # For MultiPolygon, collect all coordinates from all polygons for polygon in geometry.get("coordinates", []): if len(polygon) > 0: coordinates.extend(polygon[0]) # If no valid coordinates, skip check if not coordinates: return True # Calculate min/max from coordinates lons = [coord[0] for coord in coordinates] lats = [coord[1] for coord in coordinates] calc_bbox = [min(lons), min(lats), max(lons), max(lats)] # Allow for differences that would be invisible when rounded to 6 decimal places # 1e-6 would be exactly at the 6th decimal place, so use 5e-7 to be just under that threshold epsilon = 5e-7 differences = [abs(bbox[i] - calc_bbox[i]) for i in range(4)] if any(diff > epsilon for diff in differences): # Return False along with the calculated bbox, actual bbox, and the differences return (False, calc_bbox, bbox, differences) return True
[docs] def check_searchable_identifiers(self) -> bool: """Checks if the identifiers of a STAC item are searchable, i.e., they only contain lowercase letters, numbers, hyphens, and underscores. Returns: bool: True if the identifiers are searchable, False otherwise. """ if self.asset_type == "ITEM": for letter in self.object_id: if ( letter.islower() or letter.isnumeric() or letter == "-" or letter == "_" ): pass else: return False return True
[docs] def check_percent_encoded(self) -> bool: """Checks if the identifiers of a STAC item are percent-encoded, i.e., they only contain lowercase letters, numbers, hyphens, and underscores. Returns: bool: True if the identifiers are percent-encoded, False otherwise. """ return ( self.asset_type == "ITEM" and "/" in self.object_id or ":" in self.object_id )
[docs] def check_thumbnail(self) -> bool: """Checks if the thumbnail of a STAC item is valid, i.e., it has a valid format. Returns: bool: True if the thumbnail is valid, False otherwise. """ if "assets" in self.data: if "thumbnail" in self.data["assets"]: if "type" in self.data["assets"]["thumbnail"]: if ( "png" in self.data["assets"]["thumbnail"]["type"] or "jpeg" in self.data["assets"]["thumbnail"]["type"] or "jpg" in self.data["assets"]["thumbnail"]["type"] or "webp" in self.data["assets"]["thumbnail"]["type"] ): return True else: return False return True
[docs] def check_item_id_file_name(self) -> bool: if self.asset_type == "ITEM" and self.object_id != self.file_name: return False else: return True
[docs] def check_catalog_file_name(self) -> bool: """Checks whether the filename of a Catalog or Collection conforms to the STAC specification. Returns: bool: True if the filename is valid, False otherwise. """ if isinstance(self.item, str) and ".json" in self.item: if self.asset_type == "CATALOG" and "catalog.json" not in self.item: return False elif self.asset_type == "COLLECTION" and "collection.json" not in self.item: return False return True else: return True
[docs] def check_geometry_coordinates_definite_errors( self, ) -> Union[bool, Tuple[bool, List]]: """Checks if the coordinates in a geometry contain definite errors. This function checks for coordinates that definitely violate the GeoJSON specification: 1. Latitude values (second element) exceed ±90 degrees 2. Longitude values (first element) exceed ±180 degrees This check focuses on definite errors rather than potential/likely errors. For checking potential errors (likely reversed coordinates), use check_geometry_coordinates_order(). Returns: Union[bool, Tuple[bool, List]]: - If no errors: True - If errors found: (False, list_of_invalid_coordinates) """ if "geometry" not in self.data or self.data.get("geometry") is None: return True geometry = self.data.get("geometry") invalid_coords = [] # Function to check a single coordinate pair for definite errors def is_within_valid_ranges(coord): if len(coord) < 2: return True # Not enough elements to check lon, lat = coord[0], coord[1] # Check if latitude (second value) is outside the valid range if abs(lat) > 90: invalid_coords.append((lon, lat, "latitude > ±90°")) return False # Check if longitude (first value) is outside the valid range if abs(lon) > 180: invalid_coords.append((lon, lat, "longitude > ±180°")) return False return True # Function to recursively check all coordinates in a geometry def check_coordinates(coords): if isinstance(coords, list): if coords and isinstance(coords[0], (int, float)): # This is a single coordinate return is_within_valid_ranges(coords) else: # This is a list of coordinates or a list of lists of coordinates return all(check_coordinates(coord) for coord in coords) return True result = check_coordinates(geometry.get("coordinates", [])) if result: return True else: return (False, invalid_coords)
[docs] def check_geometry_coordinates_order(self) -> bool: """Checks if the coordinates in a geometry may be in the incorrect order. This function uses a heuristic to detect coordinates that are likely in the wrong order (latitude, longitude instead of longitude, latitude). It looks for cases where: - The first value (supposed to be longitude) is > 90 degrees - The second value (supposed to be latitude) is < 90 degrees - The first value is more than twice the second value For checking definite errors (values outside valid ranges), use check_geometry_coordinates_definite_errors(). Returns: bool: True if coordinates appear to be in the correct order, False if they may be reversed. """ if "geometry" not in self.data or self.data.get("geometry") is None: return True geometry = self.data.get("geometry") # Function to check if a single coordinate pair is likely in the correct order def is_likely_correct_order(coord): if len(coord) < 2: return True # Not enough elements to check lon, lat = coord[0], coord[1] # Heuristic: If the supposed longitude is > 90 and the supposed latitude is < 90, # and the longitude is more than twice the latitude, it's likely in the correct order if abs(lon) > 90 and abs(lat) < 90 and abs(lon) > abs(lat) * 2: return False return True # Function to recursively check all coordinates in a geometry def check_coordinates(coords): if isinstance(coords, list): if coords and isinstance(coords[0], (int, float)): # This is a single coordinate return is_likely_correct_order(coords) else: # This is a list of coordinates or a list of lists of coordinates return all(check_coordinates(coord) for coord in coords) return True return check_coordinates(geometry.get("coordinates", []))
[docs] def check_bbox_antimeridian(self) -> bool: """ Checks if a bbox that crosses the antimeridian is correctly formatted. According to the GeoJSON spec, when a bbox crosses the antimeridian (180°/-180° longitude), the minimum longitude (bbox[0]) should be greater than the maximum longitude (bbox[2]). This method checks if this convention is followed correctly. Returns: bool: True if the bbox is valid (either doesn't cross antimeridian or crosses it correctly), False if it incorrectly crosses the antimeridian. """ if "bbox" not in self.data: return True bbox = self.data.get("bbox") # Extract the 2D part of the bbox (ignoring elevation if present) if len(bbox) == 4: # 2D bbox [west, south, east, north] west, _, east, _ = bbox elif len(bbox) == 6: # 3D bbox [west, south, min_elev, east, north, max_elev] west, _, _, east, _, _ = bbox # Check if the bbox appears to cross the antimeridian # This is the case when west > east in a valid bbox that crosses the antimeridian # For example: [170, -10, -170, 10] crosses the antimeridian correctly # But [-170, -10, 170, 10] is incorrectly belting the globe # Invalid if bbox "belts the globe" (too wide) if west < east and (east - west) > 180: return False # Otherwise, valid (normal or valid antimeridian crossing) return True
[docs] def create_best_practices_dict(self) -> Dict: """Creates a dictionary of best practices violations for the current STAC object. The violations are determined by a set of configurable linting rules specified in the config file. Returns: A dictionary of best practices violations for the current STAC object. The keys in the dictionary correspond to the linting rules that were violated, and the values are lists of strings containing error messages and recommendations for how to fix the violations. """ best_practices_dict = {} linting_config = self.config["linting"] geometry_validation_config = self.config["geometry_validation"] max_links = self.config["settings"]["max_links"] max_properties = self.config["settings"]["max_properties"] # best practices - item ids should only contain searchable identifiers if ( self.check_searchable_identifiers() == False and linting_config["searchable_identifiers"] == True ): msg_1 = f"Item name '{self.object_id}' should only contain Searchable identifiers" msg_2 = "Identifiers should consist of only lowercase characters, numbers, '_', and '-'" best_practices_dict["searchable_identifiers"] = [msg_1, msg_2] # best practices - item ids should not contain ':' or '/' characters if self.check_percent_encoded() and linting_config["percent_encoded"] == True: msg_1 = f"Item name '{self.object_id}' should not contain ':' or '/'" msg_2 = "https://github.com/radiantearth/stac-spec/blob/master/best-practices.md#item-ids" best_practices_dict["percent_encoded"] = [msg_1, msg_2] # best practices - item ids should match file names if ( not self.check_item_id_file_name() and linting_config["item_id_file_name"] == True ): msg_1 = f"Item file names should match their ids: '{self.file_name}' not equal to '{self.object_id}" best_practices_dict["check_item_id"] = [msg_1] # best practices - collection and catalog file names should be collection.json and catalog.json if ( self.check_catalog_file_name() == False and linting_config["catalog_id_file_name"] == True ): msg_1 = f"Object should be called '{self.asset_type.lower()}.json' not '{self.file_name}.json'" best_practices_dict["check_catalog_id"] = [msg_1] # best practices - collections should contain summaries if ( self.asset_type == "COLLECTION" and self.check_summaries() == False and linting_config["check_summaries"] == True ): msg_1 = "A STAC collection should contain a summaries field" msg_2 = "It is recommended to store information like eo:bands in summaries" best_practices_dict["check_summaries"] = [msg_1, msg_2] # best practices - datetime fields should not be set to null if self.check_datetime_null() and linting_config["null_datetime"] == True: msg_1 = "Please avoid setting the datetime field to null, many clients search on this field" best_practices_dict["datetime_null"] = [msg_1] # best practices - check unlocated items to make sure bbox field is not set if self.check_unlocated() and linting_config["check_unlocated"] == True: msg_1 = "Unlocated item. Please avoid setting the bbox field when geometry is set to null" best_practices_dict["check_unlocated"] = [msg_1] # best practices - recommend items have a geometry if self.check_geometry_null() and linting_config["check_geometry"] == True: msg_1 = "All items should have a geometry field. STAC is not meant for non-spatial data" best_practices_dict["null_geometry"] = [msg_1] # best practices - check if bbox matches geometry bbox_check_result = self.check_bbox_matches_geometry() bbox_mismatch = False if isinstance(bbox_check_result, tuple): bbox_mismatch = not bbox_check_result[0] else: bbox_mismatch = not bbox_check_result if ( bbox_mismatch and geometry_validation_config.get("check_bbox_geometry_match", True) == True ): if isinstance(bbox_check_result, tuple): # Unpack the result _, calc_bbox, actual_bbox, differences = bbox_check_result # Format the bbox values for display calc_bbox_str = ", ".join([f"{v:.6f}" for v in calc_bbox]) actual_bbox_str = ", ".join([f"{v:.6f}" for v in actual_bbox]) # Create a more detailed message about which coordinates differ coordinate_labels = [ "min longitude", "min latitude", "max longitude", "max latitude", ] mismatch_details = [] # Use the same epsilon threshold as in check_bbox_matches_geometry epsilon = 5e-7 for i, (diff, label) in enumerate(zip(differences, coordinate_labels)): if diff > epsilon: mismatch_details.append( f"{label}: calculated={calc_bbox[i]:.6f}, actual={actual_bbox[i]:.6f}, diff={diff:.7f}" ) msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry." msg_2 = f"Calculated bbox from geometry: [{calc_bbox_str}]" msg_3 = f"Actual bbox in metadata: [{actual_bbox_str}]" messages = [msg_1, msg_2, msg_3] if mismatch_details: messages.append("Mismatched coordinates:") messages.extend(mismatch_details) else: # If we got here but there are no visible differences at 6 decimal places, # add a note explaining that the differences are too small to matter messages.append( "Note: The differences are too small to be visible at 6 decimal places and can be ignored." ) best_practices_dict["bbox_geometry_mismatch"] = messages else: msg_1 = "The bbox field does not match the bounds of the geometry. The bbox should be the minimum bounding rectangle of the geometry." best_practices_dict["bbox_geometry_mismatch"] = [msg_1] # check to see if there are too many links if ( self.check_bloated_links(max_links=max_links) and linting_config["bloated_links"] == True ): msg_1 = f"You have {len(self.data['links'])} links. Please consider using sub-collections or sub-catalogs" best_practices_dict["bloated_links"] = [msg_1] # best practices - check for bloated metadata in properties if ( self.check_bloated_metadata(max_properties=max_properties) and linting_config["bloated_metadata"] == True ): msg_1 = f"You have {len(self.data['properties'])} properties. Please consider using links to avoid bloated metadata" best_practices_dict["bloated_metadata"] = [msg_1] # best practices - ensure thumbnail is a small file size ["png", "jpeg", "jpg", "webp"] if ( not self.check_thumbnail() and self.asset_type == "ITEM" and linting_config["check_thumbnail"] == True ): msg_1 = "A thumbnail should have a small file size ie. png, jpeg, jpg, webp" best_practices_dict["check_thumbnail"] = [msg_1] # best practices - ensure that links in catalogs and collections include a title field if not self.check_links_title_field() and linting_config["links_title"] == True: msg_1 = ( "Links in catalogs and collections should always have a 'title' field" ) best_practices_dict["check_links_title"] = [msg_1] # best practices - ensure that links in catalogs and collections include self link if not self.check_links_self() and linting_config["links_self"] == True: msg_1 = "A link to 'self' in links is strongly recommended" best_practices_dict["check_links_self"] = [msg_1] # best practices - ensure that geometry coordinates are in the correct order if ( not self.check_geometry_coordinates_order() and geometry_validation_config["geometry_coordinates_order"] == True ): msg_1 = "Geometry coordinates may be in the wrong order (required order: longitude, latitude)" best_practices_dict["geometry_coordinates_order"] = [msg_1] # best practices - check if geometry coordinates contain definite errors definite_errors_result = self.check_geometry_coordinates_definite_errors() # Check if we have a separate config entry for definite errors, otherwise use the same as order check config_key = "geometry_coordinates_definite_errors" if config_key not in geometry_validation_config: config_key = "geometry_coordinates_order" if ( isinstance(definite_errors_result, tuple) and not definite_errors_result[0] and geometry_validation_config[config_key] ): # We have definite errors with invalid coordinates _, invalid_coords = definite_errors_result # Base message msg_1 = "Geometry coordinates contain invalid values that violate the GeoJSON specification (latitude must be between -90 and 90, longitude between -180 and 180)" # Add details about invalid coordinates (limit to first 5 to avoid excessive output) messages = [msg_1] for i, (lon, lat, reason) in enumerate(invalid_coords[:5]): messages.append(f"Invalid coordinate: [{lon}, {lat}] - {reason}") if len(invalid_coords) > 5: messages.append( f"...and {len(invalid_coords) - 5} more invalid coordinates" ) best_practices_dict["geometry_coordinates_definite_errors"] = messages elif definite_errors_result is False and geometry_validation_config[config_key]: # Simple case (backward compatibility) msg_1 = "Geometry coordinates contain invalid values that violate the GeoJSON specification (latitude must be between -90 and 90, longitude between -180 and 180)" best_practices_dict["geometry_coordinates_definite_errors"] = [msg_1] # Check if a bbox that crosses the antimeridian is correctly formatted if not self.check_bbox_antimeridian() and geometry_validation_config.get( "check_bbox_antimeridian", True ): # Get the bbox values to include in the error message bbox = self.data.get("bbox", []) if len(bbox) == 4: # 2D bbox [west, south, east, north] west, _, east, _ = bbox elif ( len(bbox) == 6 ): # 3D bbox [west, south, min_elev, east, north, max_elev] west, _, _, east, _, _ = bbox msg_1 = f"BBox crossing the antimeridian should have west longitude > east longitude (found west={west}, east={east})" msg_2 = f"Current bbox format appears to be belting the globe instead of properly crossing the antimeridian. Bbox: {bbox}" best_practices_dict["check_bbox_antimeridian"] = [msg_1, msg_2] return best_practices_dict
[docs] def create_best_practices_msg(self) -> List[str]: """ Generates a list of best practices messages based on the results of the 'create_best_practices_dict' method. Returns: A list of strings, where each string contains a best practice message. Each message starts with the 'STAC Best Practices:' base string and is followed by a specific recommendation. Each message is indented with four spaces, and there is an empty string between each message for readability. """ best_practices = list() base_string = "STAC Best Practices: " best_practices.append(base_string) best_practices_dict = self.create_best_practices_dict() # Filter out geometry-related errors as they will be displayed separately geometry_keys = [ "geometry_coordinates_order", "geometry_coordinates_definite_errors", "check_bbox_antimeridian", "check_bbox_geometry_match", "bbox_geometry_mismatch", ] filtered_dict = { k: v for k, v in best_practices_dict.items() if k not in geometry_keys } for _, v in filtered_dict.items(): for value in v: best_practices.extend([value]) best_practices.extend([""]) return best_practices
[docs] def create_geometry_errors_msg(self) -> List[str]: """ Generates a list of geometry-related error messages based on the results of the 'create_best_practices_dict' method. This separates geometry coordinate validation errors from other best practices for clearer presentation. Returns: A list of strings, where each string contains a geometry error message. Each message starts with the 'Geometry Validation Errors [BETA]:' base string and is followed by specific details. Each message is indented with four spaces, and there is an empty string between each message for readability. """ # Check if geometry validation is enabled geometry_config = self.config.get("geometry_validation", {}) if not geometry_config.get("enabled", True): return [] # Geometry validation is disabled geometry_errors = list() base_string = "Geometry Validation [BETA]: " geometry_errors.append(base_string) best_practices_dict = self.create_best_practices_dict() # Extract only geometry-related errors geometry_keys = [ "geometry_coordinates_order", "geometry_coordinates_definite_errors", "check_bbox_antimeridian", "check_bbox_geometry_match", "bbox_geometry_mismatch", ] geometry_dict = { k: v for k, v in best_practices_dict.items() if k in geometry_keys } if not geometry_dict: return [] # No geometry errors found for _, v in geometry_dict.items(): for value in v: geometry_errors.extend([value]) geometry_errors.extend([""]) return geometry_errors