RSO Inspection

This example demonstrates the configuration of a resident space object (RSO) inspection environment, in which a servicer spacecraft circumnavigates a RSO to image the illuminated facets.

[1]:
from importlib.metadata import version
from bsk_rl import sats, obs, act, ConstellationTasking, scene, data
from bsk_rl.obs.relative_observations import rso_imaged_regions
from bsk_rl.utils.orbital import fibonacci_sphere
from bsk_rl.sim import dyn, fsw
import numpy as np
from Basilisk.architecture import bskLogging
from functools import partial
from bsk_rl.utils.orbital import random_orbit, random_unit_vector, relative_to_chief
from Basilisk.utilities.orbitalMotion import elem2rv
from Basilisk.utilities.RigidBodyKinematics import C2MRP

bskLogging.setDefaultLogLevel(bskLogging.BSK_WARNING)

RLlib is actively developed and can change significantly from version to version. For this script, the following version is used:

[2]:
version("ray")  # Parent package of RLlib
[2]:
'2.35.0'

Defining the Satellites

First, the RSO satellite is configured. A simple model is used that has no actuators modelled and just deterministically points nadir.

[3]:
class RSOSat(sats.Satellite):
    observation_spec = [
        obs.SatProperties(dict(prop="one", fn=lambda _: 1.0)),
    ]
    action_spec = [act.NadirPoint(duration=1e9)]
    dyn_type = (dyn.ConjunctionDynModel, dyn.RSODynModel)
    fsw_type = fsw.FSWModel

Arguments for the satellite are configured for smooth pointing behavior.

[4]:
rso_sat_args = dict(conjunction_radius=2.0)

The inspector satellite has a more complex configuration. First, an observation function for the sun vector is defined.

[5]:
def sun_hat_chief(self, other):
    r_SN_N = (
        self.simulator.world.gravFactory.spiceObject.planetStateOutMsgs[
            self.simulator.world.sun_index
        ]
        .read()
        .PositionVector
    )
    r_BN_N = self.dynamics.r_BN_N
    r_SN_N = np.array(r_SN_N)
    r_SB_N = r_SN_N - r_BN_N
    r_SB_N_hat = r_SB_N / np.linalg.norm(r_SB_N)
    HN = other.dynamics.HN
    return HN @ r_SB_N_hat

The inspector satellite is configured with observations relating to the relative state and the mission objectives. The satellite is given an action for impulsively thrusting and drifting. The dynamics and flight software models introduce a maximum range check, collision checking orbital maneuvers, and RSO inspection.

[6]:
class InspectorSat(sats.Satellite):
    observation_spec = [
        obs.SatProperties(
            dict(prop="dv_available", norm=10),
            dict(prop="inclination", norm=np.pi),
            dict(prop="eccentricity", norm=0.1),
            dict(prop="semi_major_axis", norm=7000),
            dict(prop="ascending_node", norm=2 * np.pi),
            dict(prop="argument_of_periapsis", norm=2 * np.pi),
            dict(prop="true_anomaly", norm=2 * np.pi),
            dict(prop="beta_angle", norm=np.pi),
        ),
        obs.ResourceRewardWeight(),
        obs.RelativeProperties(
            dict(prop="r_DC_Hc", norm=500),
            dict(prop="v_DC_Hc", norm=5),
            dict(
                prop="rso_imaged_regions",
                fn=partial(
                    rso_imaged_regions,
                    region_centers=fibonacci_sphere(15),
                    frame="chief_hill",
                ),
            ),
            dict(prop="sun_hat_Hc", fn=sun_hat_chief),
            chief_name="RSO",
        ),
        obs.Eclipse(norm=5700),
        obs.Time(),
    ]
    action_spec = [
        act.ImpulsiveThrustHill(
            chief_name="RSO",
            max_dv=1.0,
            max_drift_duration=5700.0 * 2,
            fsw_action="action_inspect_rso",
        )
    ]
    dyn_type = (dyn.MaxRangeDynModel, dyn.ConjunctionDynModel, dyn.RSOInspectorDynModel)
    fsw_type = (
        fsw.SteeringFSWModel,
        fsw.MagicOrbitalManeuverFSWModel,
        fsw.RSOInspectorFSWModel,
    )

Generous configurations are used for the inspector, allowing for “sloppy” attitude control with a low simulation step rate.

[7]:
inspector_sat_args = dict(
    imageAttErrorRequirement=1.0,
    imageRateErrorRequirement=None,
    instrumentBaudRate=1,
    dataStorageCapacity=1e6,
    batteryStorageCapacity=1e9,
    storedCharge_Init=1e9,
    conjunction_radius=2.0,
    dv_available_init=10.0,
    max_range_radius=1000,
    chief_name="RSO",
    u_max=1.0,
)

Environment Generation

A satellite argument randomizer is defined to configure the initial state of the satellites. The RSO is put into a random orbit with an apogee and perigee between 500 km and 1100 km. The inspector is placed in the region 250 to 750 meters from the RSO, with up to 1 m/s of relative velocity. Finally, the RSO’s attitude and body rate are set up to be in the nadir-pointing initial configuration.

[8]:
def sat_arg_randomizer(satellites):
    # Generate the RSO orbit
    R_E = 6371.0  # km
    a = R_E + np.random.uniform(500, 1100)
    e = np.random.uniform(0.0, min(1 - (R_E + 500) / a, (R_E + 1100) / a - 1))
    chief_orbit = random_orbit(a=a, e=e)

    inspectors = [sat for sat in satellites if "Inspector" in sat.name]
    rso = [satellite for satellite in satellites if satellite.name == "RSO"][0]

    # Generate the inspector initial states.
    args = {}
    for inspector in inspectors:
        relative_randomizer = relative_to_chief(
            chief_name="RSO",
            chief_orbit=chief_orbit,
            deputy_relative_state={
                inspector.name: lambda: np.concatenate(
                    (
                        random_unit_vector() * np.random.uniform(250, 750),
                        random_unit_vector() * np.random.uniform(0, 1.0),
                    )
                ),
            },
        )
        args.update(relative_randomizer([rso, inspector]))

    # Align RSO Hill frame for initial nadir pointing
    mu = rso.sat_args_generator["mu"]
    r_N, v_N = elem2rv(mu, args[rso]["oe"])

    r_hat = r_N / np.linalg.norm(r_N)
    v_hat = v_N / np.linalg.norm(v_N)
    x = r_hat
    z = np.cross(r_hat, v_hat)
    z = z / np.linalg.norm(z)
    y = np.cross(z, x)
    HN = np.array([x, y, z])
    BH = np.eye(3)

    a = chief_orbit.a
    T = np.sqrt(a**3 / mu) * 2 * np.pi
    omega_BN_N = z * 2 * np.pi / T

    args[rso]["sigma_init"] = C2MRP(BH @ HN)
    args[rso]["omega_init"] = BH @ HN @ omega_BN_N

    return args

The scenario is configured to set the RSO geometry as a sphere with 100 points at a radius of 1 meter. Points must be imaged within 30 degrees of their normal, with illumination coming from no more than 60 degrees from normal. The inspector must be within 250 meters to inspect the RSO.

[9]:
scenario = scene.SphericalRSO(
    n_points=100,
    radius=1.0,
    theta_max=np.radians(30),
    range_max=250,
    theta_solar_max=np.radians(60),
)

This scenario uses two rewarders. For the RSO inspection component of the task, a bonus of 1.0 is yielded once at least 90% of the illuminated points have been inspected. The ResourceReward is used to penalize fuel use, with some basic logic add to only apply the reward to the Inspector.

[10]:
rewarders = (
    data.RSOInspectionReward(
        completion_bonus=1.0,
        completion_threshold=0.90,
    ),
    data.ResourceReward(
        resource_fn=lambda sat: sat.fsw.dv_available
        if isinstance(sat.fsw, fsw.MagicOrbitalManeuverFSWModel)
        else 0.0,
        reward_weight=np.random.uniform(0.0, 0.5),
    ),
)

With all the components defined, the environment can be instantiated.

[11]:
env = ConstellationTasking(
    satellites=[
        RSOSat("RSO", sat_args=rso_sat_args),
        InspectorSat("Inspector", sat_args=inspector_sat_args, obs_type=dict),
    ],
    sat_arg_randomizer=sat_arg_randomizer,
    scenario=scenario,
    rewarder=rewarders,
    time_limit=60000,
    sim_rate=5.0,
    log_level="INFO",
)

Environment Interaction

The environment is reset and randomly stepped through.

Future Work: This example will be updated with an actual trained policy in the future.

[12]:
env.reset()
for i in range(4):
    env.step(dict(RSO=0, Inspector=env.action_space("Inspector").sample()))
2026-05-19 20:30:20,032 gym                            INFO       Resetting environment with seed=3546264096
2026-05-19 20:30:20,114 utils.orbital                  WARNING    <0.00> Could not find eclipse transitions in next 12000.0 seconds
2026-05-19 20:30:20,115 gym                            INFO       <0.00> Environment reset
/opt/hostedtoolcache/Python/3.11.15/x64/lib/python3.11/site-packages/gymnasium/spaces/box.py:130: UserWarning: WARN: Box bound precision lowered by casting to float32
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
2026-05-19 20:30:20,117 gym                            INFO       <0.00> === STARTING STEP ===
2026-05-19 20:30:20,117 sats.satellite.RSO             INFO       <0.00> RSO: action_nadir_point tasked for 1000000000.0 seconds
2026-05-19 20:30:20,118 sats.satellite.RSO             INFO       <0.00> RSO: setting timed terminal event at 1000000000.0
2026-05-19 20:30:20,119 sats.satellite.Inspector       INFO       <0.00> Inspector: Thrust clamped from 1.0925320585124905 m/s to 1.0 m/s.
2026-05-19 20:30:20,120 sats.satellite.Inspector       INFO       <0.00> Inspector: Thrusting with inertial dV [-0.15522432  0.82498879  0.54341412] with 4277.1318359375 second drift.
2026-05-19 20:30:20,121 sats.satellite.Inspector       INFO       <0.00> Inspector: setting timed terminal event at 4277.1
2026-05-19 20:30:20,121 sats.satellite.Inspector       INFO       <0.00> Inspector: FSW action action_inspect_rso activated.
2026-05-19 20:30:20,162 sats.satellite.Inspector       INFO       <970.00> Inspector: Exceeded maximum range of 1000 m from RSO
2026-05-19 20:30:20,168 data.rso_inspection            INFO       <970.00> Inspected/Illuminated/Total: 0/40/100
2026-05-19 20:30:20,168 data.composition               INFO       <970.00> ResourceReward reward: {'Inspector': np.float64(-0.025964894061342725)}
2026-05-19 20:30:20,169 data.base                      INFO       <970.00> Total reward: {'Inspector': np.float64(-0.025964894061342725)}
2026-05-19 20:30:20,170 sats.satellite.Inspector       WARNING    <970.00> Inspector: failed range_valid check
2026-05-19 20:30:20,173 utils.orbital                  WARNING    <970.00> Could not find eclipse transitions in next 12000.0 seconds
2026-05-19 20:30:20,174 gym                            INFO       <970.00> Step reward: {'Inspector': np.float64(-1.0259648940613428)}
2026-05-19 20:30:20,175 gym                            INFO       <970.00> Episode terminated: ['Inspector']
2026-05-19 20:30:20,175 gym                            INFO       <970.00> === STARTING STEP ===
2026-05-19 20:30:20,176 sats.satellite.RSO             INFO       <970.00> RSO: action_nadir_point tasked for 1000000000.0 seconds
2026-05-19 20:30:20,177 sats.satellite.RSO             INFO       <970.00> RSO: setting timed terminal event at 1000000970.0
2026-05-19 20:30:20,177 sats.satellite.Inspector       INFO       <970.00> Inspector: Thrust clamped from 1.228998602611333 m/s to 1.0 m/s.
2026-05-19 20:30:20,178 sats.satellite.Inspector       INFO       <970.00> Inspector: Thrusting with inertial dV [-0.04106295 -0.16587596  0.98529133] with 9502.6845703125 second drift.
2026-05-19 20:30:20,179 sats.satellite.Inspector       INFO       <970.00> Inspector: setting timed terminal event at 10472.7
2026-05-19 20:30:20,180 sats.satellite.Inspector       INFO       <970.00> Inspector: FSW action action_inspect_rso activated.
2026-05-19 20:30:20,608 sats.satellite.Inspector       INFO       <10475.00> Inspector: timed termination at 10472.7
2026-05-19 20:30:20,627 data.rso_inspection            INFO       <10475.00> Inspected/Illuminated/Total: 0/45/100
2026-05-19 20:30:20,628 data.composition               INFO       <10475.00> ResourceReward reward: {'Inspector': np.float64(-0.025964894061342725)}
2026-05-19 20:30:20,628 data.base                      INFO       <10475.00> Total reward: {'Inspector': np.float64(-0.025964894061342725)}
2026-05-19 20:30:20,629 sats.satellite.Inspector       INFO       <10475.00> Inspector: Satellite Inspector requires retasking
2026-05-19 20:30:20,630 gym                            INFO       <10475.00> Step reward: {}
2026-05-19 20:30:20,631 gym                            INFO       <10475.00> === STARTING STEP ===
2026-05-19 20:30:20,632 sats.satellite.RSO             INFO       <10475.00> RSO: action_nadir_point tasked for 1000000000.0 seconds
2026-05-19 20:30:20,632 sats.satellite.RSO             INFO       <10475.00> RSO: setting timed terminal event at 1000010475.0
2026-05-19 20:30:20,633 sats.satellite.Inspector       INFO       <10475.00> Inspector: Thrusting with inertial dV [0.76378185 0.33515031 0.1256892 ] with 1379.2828369140625 second drift.
2026-05-19 20:30:20,634 sats.satellite.Inspector       INFO       <10475.00> Inspector: setting timed terminal event at 11854.3
2026-05-19 20:30:20,635 sats.satellite.Inspector       INFO       <10475.00> Inspector: FSW action action_inspect_rso activated.
2026-05-19 20:30:20,699 sats.satellite.Inspector       INFO       <11855.00> Inspector: timed termination at 11854.3
2026-05-19 20:30:20,704 data.rso_inspection            INFO       <11855.00> Inspected/Illuminated/Total: 0/45/100
2026-05-19 20:30:20,705 data.composition               INFO       <11855.00> ResourceReward reward: {'Inspector': np.float64(-0.021901295372930823)}
2026-05-19 20:30:20,706 data.base                      INFO       <11855.00> Total reward: {'Inspector': np.float64(-0.021901295372930823)}
2026-05-19 20:30:20,706 sats.satellite.Inspector       INFO       <11855.00> Inspector: Satellite Inspector requires retasking
2026-05-19 20:30:20,707 gym                            INFO       <11855.00> Step reward: {}
2026-05-19 20:30:20,708 gym                            INFO       <11855.00> === STARTING STEP ===
2026-05-19 20:30:20,709 sats.satellite.RSO             INFO       <11855.00> RSO: action_nadir_point tasked for 1000000000.0 seconds
2026-05-19 20:30:20,709 sats.satellite.RSO             INFO       <11855.00> RSO: setting timed terminal event at 1000011855.0
2026-05-19 20:30:20,711 sats.satellite.Inspector       INFO       <11855.00> Inspector: Thrust clamped from 1.3055786218414476 m/s to 1.0 m/s.
2026-05-19 20:30:20,711 sats.satellite.Inspector       INFO       <11855.00> Inspector: Thrusting with inertial dV [-0.33400403  0.65435883  0.67842158] with 4942.16796875 second drift.
2026-05-19 20:30:20,712 sats.satellite.Inspector       INFO       <11855.00> Inspector: setting timed terminal event at 16797.2
2026-05-19 20:30:20,713 sats.satellite.Inspector       INFO       <11855.00> Inspector: FSW action action_inspect_rso activated.
2026-05-19 20:30:20,936 sats.satellite.Inspector       INFO       <16800.00> Inspector: timed termination at 16797.2
2026-05-19 20:30:20,944 data.rso_inspection            INFO       <16800.00> Inspected/Illuminated/Total: 0/45/100
2026-05-19 20:30:20,944 data.composition               INFO       <16800.00> ResourceReward reward: {'Inspector': np.float64(-0.025964894061342725)}
2026-05-19 20:30:20,945 data.base                      INFO       <16800.00> Total reward: {'Inspector': np.float64(-0.025964894061342725)}
2026-05-19 20:30:20,945 sats.satellite.Inspector       INFO       <16800.00> Inspector: Satellite Inspector requires retasking
2026-05-19 20:30:20,947 gym                            INFO       <16800.00> Step reward: {}