RSO Inspection

This example demonstrates the configuration of a resident space object (RSO) inspection environment, in which a servicer spacecraft circumnavigates a RSO to image the illuminated facets.

[1]:
from importlib.metadata import version
from bsk_rl import sats, obs, act, ConstellationTasking, scene, data
from bsk_rl.obs.relative_observations import rso_imaged_regions
from bsk_rl.utils.orbital import fibonacci_sphere
from bsk_rl.sim import dyn, fsw
import numpy as np
from Basilisk.architecture import bskLogging
from functools import partial
from bsk_rl.utils.orbital import random_orbit, random_unit_vector, relative_to_chief
from Basilisk.utilities.orbitalMotion import elem2rv
from Basilisk.utilities.RigidBodyKinematics import C2MRP

bskLogging.setDefaultLogLevel(bskLogging.BSK_WARNING)

RLlib is actively developed and can change significantly from version to version. For this script, the following version is used:

[2]:
version("ray")  # Parent package of RLlib
[2]:
'2.35.0'

Defining the Satellites

First, the RSO satellite is configured. A simple model is used that has no actuators modelled and just deterministically points nadir.

[3]:
class RSOSat(sats.Satellite):
    observation_spec = [
        obs.SatProperties(dict(prop="one", fn=lambda _: 1.0)),
    ]
    action_spec = [act.NadirPoint(duration=1e9)]
    dyn_type = (dyn.ConjunctionDynModel, dyn.RSODynModel)
    fsw_type = fsw.FSWModel

Arguments for the satellite are configured for smooth pointing behavior.

[4]:
rso_sat_args = dict(conjunction_radius=2.0)

The inspector satellite has a more complex configuration. First, an observation function for the sun vector is defined.

[5]:
def sun_hat_chief(self, other):
    r_SN_N = (
        self.simulator.world.gravFactory.spiceObject.planetStateOutMsgs[
            self.simulator.world.sun_index
        ]
        .read()
        .PositionVector
    )
    r_BN_N = self.dynamics.r_BN_N
    r_SN_N = np.array(r_SN_N)
    r_SB_N = r_SN_N - r_BN_N
    r_SB_N_hat = r_SB_N / np.linalg.norm(r_SB_N)
    HN = other.dynamics.HN
    return HN @ r_SB_N_hat

The inspector satellite is configured with observations relating to the relative state and the mission objectives. The satellite is given an action for impulsively thrusting and drifting. The dynamics and flight software models introduce a maximum range check, collision checking orbital maneuvers, and RSO inspection.

[6]:
class InspectorSat(sats.Satellite):
    observation_spec = [
        obs.SatProperties(
            dict(prop="dv_available", norm=10),
            dict(prop="inclination", norm=np.pi),
            dict(prop="eccentricity", norm=0.1),
            dict(prop="semi_major_axis", norm=7000),
            dict(prop="ascending_node", norm=2 * np.pi),
            dict(prop="argument_of_periapsis", norm=2 * np.pi),
            dict(prop="true_anomaly", norm=2 * np.pi),
            dict(prop="beta_angle", norm=np.pi),
        ),
        obs.ResourceRewardWeight(),
        obs.RelativeProperties(
            dict(prop="r_DC_Hc", norm=500),
            dict(prop="v_DC_Hc", norm=5),
            dict(
                prop="rso_imaged_regions",
                fn=partial(
                    rso_imaged_regions,
                    region_centers=fibonacci_sphere(15),
                    frame="chief_hill",
                ),
            ),
            dict(prop="sun_hat_Hc", fn=sun_hat_chief),
            chief_name="RSO",
        ),
        obs.Eclipse(norm=5700),
        obs.Time(),
    ]
    action_spec = [
        act.ImpulsiveThrustHill(
            chief_name="RSO",
            max_dv=1.0,
            max_drift_duration=5700.0 * 2,
            fsw_action="action_inspect_rso",
        )
    ]
    dyn_type = (dyn.MaxRangeDynModel, dyn.ConjunctionDynModel, dyn.RSOInspectorDynModel)
    fsw_type = (
        fsw.SteeringFSWModel,
        fsw.MagicOrbitalManeuverFSWModel,
        fsw.RSOInspectorFSWModel,
    )

Generous configurations are used for the inspector, allowing for “sloppy” attitude control with a low simulation step rate.

[7]:
inspector_sat_args = dict(
    imageAttErrorRequirement=1.0,
    imageRateErrorRequirement=None,
    instrumentBaudRate=1,
    dataStorageCapacity=1e6,
    batteryStorageCapacity=1e9,
    storedCharge_Init=1e9,
    conjunction_radius=2.0,
    dv_available_init=10.0,
    max_range_radius=1000,
    chief_name="RSO",
    u_max=1.0,
)

Environment Generation

A satellite argument randomizer is defined to configure the initial state of the satellites. The RSO is put into a random orbit with an apogee and perigee between 500 km and 1100 km. The inspector is placed in the region 250 to 750 meters from the RSO, with up to 1 m/s of relative velocity. Finally, the RSO’s attitude and body rate are set up to be in the nadir-pointing initial configuration.

[8]:
def sat_arg_randomizer(satellites):
    # Generate the RSO orbit
    R_E = 6371.0  # km
    a = R_E + np.random.uniform(500, 1100)
    e = np.random.uniform(0.0, min(1 - (R_E + 500) / a, (R_E + 1100) / a - 1))
    chief_orbit = random_orbit(a=a, e=e)

    inspectors = [sat for sat in satellites if "Inspector" in sat.name]
    rso = [satellite for satellite in satellites if satellite.name == "RSO"][0]

    # Generate the inspector initial states.
    args = {}
    for inspector in inspectors:
        relative_randomizer = relative_to_chief(
            chief_name="RSO",
            chief_orbit=chief_orbit,
            deputy_relative_state={
                inspector.name: lambda: np.concatenate(
                    (
                        random_unit_vector() * np.random.uniform(250, 750),
                        random_unit_vector() * np.random.uniform(0, 1.0),
                    )
                ),
            },
        )
        args.update(relative_randomizer([rso, inspector]))

    # Align RSO Hill frame for initial nadir pointing
    mu = rso.sat_args_generator["mu"]
    r_N, v_N = elem2rv(mu, args[rso]["oe"])

    r_hat = r_N / np.linalg.norm(r_N)
    v_hat = v_N / np.linalg.norm(v_N)
    x = r_hat
    z = np.cross(r_hat, v_hat)
    z = z / np.linalg.norm(z)
    y = np.cross(z, x)
    HN = np.array([x, y, z])
    BH = np.eye(3)

    a = chief_orbit.a
    T = np.sqrt(a**3 / mu) * 2 * np.pi
    omega_BN_N = z * 2 * np.pi / T

    args[rso]["sigma_init"] = C2MRP(BH @ HN)
    args[rso]["omega_init"] = BH @ HN @ omega_BN_N

    return args

The scenario is configured to set the RSO geometry as a sphere with 100 points at a radius of 1 meter. Points must be imaged within 30 degrees of their normal, with illumination coming from no more than 60 degrees from normal. The inspector must be within 250 meters to inspect the RSO.

[9]:
scenario = scene.SphericalRSO(
    n_points=100,
    radius=1.0,
    theta_max=np.radians(30),
    range_max=250,
    theta_solar_max=np.radians(60),
)

This scenario uses two rewarders. For the RSO inspection component of the task, a bonus of 1.0 is yielded once at least 90% of the illuminated points have been inspected. The ResourceReward is used to penalize fuel use, with some basic logic add to only apply the reward to the Inspector.

[10]:
rewarders = (
    data.RSOInspectionReward(
        completion_bonus=1.0,
        completion_threshold=0.90,
    ),
    data.ResourceReward(
        resource_fn=lambda sat: sat.fsw.dv_available
        if isinstance(sat.fsw, fsw.MagicOrbitalManeuverFSWModel)
        else 0.0,
        reward_weight=np.random.uniform(0.0, 0.5),
    ),
)

With all the components defined, the environment can be instantiated.

[11]:
env = ConstellationTasking(
    satellites=[
        RSOSat("RSO", sat_args=rso_sat_args),
        InspectorSat("Inspector", sat_args=inspector_sat_args, obs_type=dict),
    ],
    sat_arg_randomizer=sat_arg_randomizer,
    scenario=scenario,
    rewarder=rewarders,
    time_limit=60000,
    sim_rate=5.0,
    log_level="INFO",
)

Environment Interaction

The environment is reset and randomly stepped through.

Future Work: This example will be updated with an actual trained policy in the future.

[12]:
env.reset()
for i in range(4):
    env.step(dict(RSO=0, Inspector=env.action_space("Inspector").sample()))
2026-01-05 18:32:21,339 gym                            INFO       Resetting environment with seed=2701757739
2026-01-05 18:32:21,485 gym                            INFO       <0.00> Environment reset
/opt/hostedtoolcache/Python/3.11.14/x64/lib/python3.11/site-packages/gymnasium/spaces/box.py:130: UserWarning: WARN: Box bound precision lowered by casting to float32
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
2026-01-05 18:32:21,486 gym                            INFO       <0.00> === STARTING STEP ===
2026-01-05 18:32:21,487 sats.satellite.RSO             INFO       <0.00> RSO: action_nadir_point tasked for 1000000000.0 seconds
2026-01-05 18:32:21,488 sats.satellite.RSO             INFO       <0.00> RSO: setting timed terminal event at 1000000000.0
2026-01-05 18:32:21,489 sats.satellite.Inspector       INFO       <0.00> Inspector: Thrusting with inertial dV [-0.3235192  -0.40439709 -0.01619265] with 1038.0245361328125 second drift.
2026-01-05 18:32:21,490 sats.satellite.Inspector       INFO       <0.00> Inspector: setting timed terminal event at 1038.0
2026-01-05 18:32:21,491 sats.satellite.Inspector       INFO       <0.00> Inspector: FSW action action_inspect_rso activated.
2026-01-05 18:32:21,538 sats.satellite.Inspector       INFO       <1010.00> Inspector: Exceeded maximum range of 1000 m from RSO
2026-01-05 18:32:21,544 data.rso_inspection            INFO       <1010.00> Inspected/Illuminated/Total: 0/55/100
2026-01-05 18:32:21,544 data.composition               INFO       <1010.00> ResourceReward reward: {'Inspector': np.float64(-0.17753960823487666)}
2026-01-05 18:32:21,545 data.base                      INFO       <1010.00> Total reward: {'Inspector': np.float64(-0.17753960823487666)}
2026-01-05 18:32:21,546 sats.satellite.Inspector       WARNING    <1010.00> Inspector: failed range_valid check
2026-01-05 18:32:21,558 gym                            INFO       <1010.00> Step reward: {'Inspector': np.float64(-1.1775396082348766)}
2026-01-05 18:32:21,559 gym                            INFO       <1010.00> Episode terminated: ['Inspector']
2026-01-05 18:32:21,559 gym                            INFO       <1010.00> === STARTING STEP ===
2026-01-05 18:32:21,561 sats.satellite.RSO             INFO       <1010.00> RSO: action_nadir_point tasked for 1000000000.0 seconds
2026-01-05 18:32:21,561 sats.satellite.RSO             INFO       <1010.00> RSO: setting timed terminal event at 1000001010.0
2026-01-05 18:32:21,563 sats.satellite.Inspector       INFO       <1010.00> Inspector: Thrusting with inertial dV [ 0.27270076 -0.37851603 -0.73733478] with 3856.35791015625 second drift.
2026-01-05 18:32:21,564 sats.satellite.Inspector       INFO       <1010.00> Inspector: setting timed terminal event at 4866.4
2026-01-05 18:32:21,564 sats.satellite.Inspector       INFO       <1010.00> Inspector: FSW action action_inspect_rso activated.
2026-01-05 18:32:21,722 sats.satellite.Inspector       INFO       <4870.00> Inspector: timed termination at 4866.4
2026-01-05 18:32:21,731 data.rso_inspection            INFO       <4870.00> Inspected/Illuminated/Total: 0/85/100
2026-01-05 18:32:21,732 data.composition               INFO       <4870.00> ResourceReward reward: {'Inspector': np.float64(-0.2989723329990817)}
2026-01-05 18:32:21,732 data.base                      INFO       <4870.00> Total reward: {'Inspector': np.float64(-0.2989723329990817)}
2026-01-05 18:32:21,733 sats.satellite.Inspector       INFO       <4870.00> Inspector: Satellite Inspector requires retasking
2026-01-05 18:32:21,735 gym                            INFO       <4870.00> Step reward: {}
2026-01-05 18:32:21,735 gym                            INFO       <4870.00> === STARTING STEP ===
2026-01-05 18:32:21,737 sats.satellite.RSO             INFO       <4870.00> RSO: action_nadir_point tasked for 1000000000.0 seconds
2026-01-05 18:32:21,737 sats.satellite.RSO             INFO       <4870.00> RSO: setting timed terminal event at 1000004870.0
2026-01-05 18:32:21,738 sats.satellite.Inspector       INFO       <4870.00> Inspector: Thrusting with inertial dV [-0.2533816  -0.48769724  0.42063288] with 3412.158203125 second drift.
2026-01-05 18:32:21,739 sats.satellite.Inspector       INFO       <4870.00> Inspector: setting timed terminal event at 8282.2
2026-01-05 18:32:21,740 sats.satellite.Inspector       INFO       <4870.00> Inspector: FSW action action_inspect_rso activated.
2026-01-05 18:32:21,902 sats.satellite.Inspector       INFO       <8285.00> Inspector: timed termination at 8282.2
2026-01-05 18:32:21,910 data.rso_inspection            INFO       <8285.00> Inspected/Illuminated/Total: 0/86/100
2026-01-05 18:32:21,911 data.composition               INFO       <8285.00> ResourceReward reward: {'Inspector': np.float64(-0.2371440907556596)}
2026-01-05 18:32:21,912 data.base                      INFO       <8285.00> Total reward: {'Inspector': np.float64(-0.2371440907556596)}
2026-01-05 18:32:21,912 sats.satellite.Inspector       INFO       <8285.00> Inspector: Satellite Inspector requires retasking
2026-01-05 18:32:21,913 gym                            INFO       <8285.00> Step reward: {}
2026-01-05 18:32:21,914 gym                            INFO       <8285.00> === STARTING STEP ===
2026-01-05 18:32:21,915 sats.satellite.RSO             INFO       <8285.00> RSO: action_nadir_point tasked for 1000000000.0 seconds
2026-01-05 18:32:21,915 sats.satellite.RSO             INFO       <8285.00> RSO: setting timed terminal event at 1000008285.0
2026-01-05 18:32:21,917 sats.satellite.Inspector       INFO       <8285.00> Inspector: Thrust clamped from 1.0303429529797417 m/s to 1.0 m/s.
2026-01-05 18:32:21,918 sats.satellite.Inspector       INFO       <8285.00> Inspector: Thrusting with inertial dV [-0.28809746  0.18429819  0.9396989 ] with 8032.0615234375 second drift.
2026-01-05 18:32:21,919 sats.satellite.Inspector       INFO       <8285.00> Inspector: setting timed terminal event at 16317.1
2026-01-05 18:32:21,919 sats.satellite.Inspector       INFO       <8285.00> Inspector: FSW action action_inspect_rso activated.
2026-01-05 18:32:22,274 sats.satellite.Inspector       INFO       <16320.00> Inspector: timed termination at 16317.1
2026-01-05 18:32:22,289 data.rso_inspection            INFO       <16320.00> Inspected/Illuminated/Total: 0/86/100
2026-01-05 18:32:22,290 data.composition               INFO       <16320.00> ResourceReward reward: {'Inspector': np.float64(-0.34265125220765735)}
2026-01-05 18:32:22,291 data.base                      INFO       <16320.00> Total reward: {'Inspector': np.float64(-0.34265125220765735)}
2026-01-05 18:32:22,291 sats.satellite.Inspector       INFO       <16320.00> Inspector: Satellite Inspector requires retasking
2026-01-05 18:32:22,293 gym                            INFO       <16320.00> Step reward: {}