In [1]:
import sys
sys.path.insert(0, '../')

import warnings

warnings.simplefilter("ignore")
# warnings.filterwarnings("error", message="overflow encountered in cast")
# warnings.filterwarnings("ignore", message="`alltrue` is deprecated as of NumPy")

import os
import numpy as np
import matplotlib.pyplot as plt
import tables

from astropy.table import Table, join
from astropy.coordinates import SkyCoord
from astropy import units as u

from pathlib import Path
from IPython.display import display, Markdown

from agasc_gaia.datasets import get_agasc_summary
import agasc_gaia.cross_match as xm
from agasc_gaia.gaia_model import get_gaia_model

from agasc_gaia.star_report import Report
from agasc_gaia import star_report
In [2]:
import seaborn as sns
In [3]:
SKA = Path(os.environ['SKA'])
AGASC_FILE = SKA / 'data' / 'agasc' / 'agasc1p7.h5'
In [4]:
generate_reports = True
In [5]:
# @utils.cached(name="comparison_table")
def get_comparison_table():
    print("loading tables")


    print("  - summary")
    agasc_summary = get_agasc_summary()


    print("  - indirect x-matches")
    agasc_indirect = xm.get_agasc_tycho_gsc_gaia_x_match()
    print("  - direct x-matches")
    agasc_direct = xm.get_agasc_gaia_x_match_difficult_fixed()
    agasc_direct = agasc_direct[agasc_direct["best_match"] & (agasc_direct['p_value'] > 0.001)]
    assert len(agasc_indirect), len(np.unique(agasc_indirect['agasc_id']))
    assert len(agasc_direct), len(np.unique(agasc_direct['agasc_id']))

    print("  - AGASC 1.7")
    with tables.open_file(SKA / 'data' / 'agasc' / 'agasc1p7.h5') as h5:
        agasc_1p7 = Table(h5.root.data[:])

    # this removes a star that is duplicated in the AGASC catalog
    agasc_1p7 = agasc_1p7[
        ~(
            (agasc_1p7["AGASC_ID"] == 154534513)
            & (agasc_1p7["MAG_CATID"] == 1)
        )
    ]
    agasc_ids = np.unique(np.concatenate([agasc_indirect['agasc_id'].data, agasc_direct['agasc_id'].data]))
    agasc_1p7 = agasc_1p7[np.in1d(agasc_1p7['AGASC_ID'], agasc_ids)]


    cols = [
        # 'pm_ra',
        # 'pm_dec',
        'mag_aca',
        'mag_aca_err',
        'mag_aca_obs',
        'mag_aca_err_obs',
        'guide',
        'acq',
        'tycho_id',
        'tycho_tdsc_id',
        'mag_catid',
        'mag_band',
    ]

    i = np.searchsorted(agasc_summary['agasc_id'], agasc_indirect['agasc_id'])
    for col in cols:
        agasc_indirect[col] = agasc_summary[col][i]
    agasc_indirect['mag_1p7'] = agasc_summary['mag'][i]

    i = np.searchsorted(agasc_summary['agasc_id'], agasc_direct['agasc_id'])
    for col in cols:
        agasc_direct[col] = agasc_summary[col][i]
    agasc_direct['mag_1p7'] = agasc_summary['mag'][i]

    gaia_model = get_gaia_model()
    cols = ['g_mag', 'rp_mag', 'bp_mag', 'mag_catid', 'mag_band', 'has_mag', "phot_variable_flag", "mad_mag_g_fov"]

    agasc_indirect['mag_aca_pred'] = np.ma.masked_all(len(agasc_indirect), dtype=np.float16)
    agasc_indirect['mag_aca_pred'][agasc_indirect['has_mag'] != 0] = gaia_model.predict(agasc_indirect[agasc_indirect['has_mag'] != 0][cols].to_pandas())

    agasc_direct['mag_aca_pred'] = np.ma.masked_all(len(agasc_direct), dtype=np.float16)
    agasc_direct['mag_aca_pred'][agasc_direct['has_mag'] != 0] = gaia_model.predict(agasc_direct[agasc_direct['has_mag'] != 0][cols].to_pandas())



    print("joining indirect table")
    data = join(
        agasc_1p7,
        agasc_indirect,
        keys_left='AGASC_ID',
        keys_right='agasc_id',
        table_names=['1', '2'],
        uniq_col_name='{col_name}_{table_name}',
        metadata_conflicts='silent',
        join_type='left',
    )

    assert not np.any([col[-2:] == '_1' for col in data.colnames])

    print("joining direct table")
    data = join(
        data,
        agasc_direct,
        keys_left='AGASC_ID',
        keys_right='agasc_id',
        table_names=['indirect', 'direct'],
        uniq_col_name='{col_name}_{table_name}',
        metadata_conflicts='silent',
        join_type='left',
    )

    direct = ~data['agasc_id_direct'].mask
    indirect = ~data['agasc_id_indirect'].mask


    print("removing redundant columns")
    cols = [
        'agasc_id',
        'acq',
        'guide',
        'mag_aca',
        'mag_aca_err',
        'mag_aca_err_obs',
        'mag_aca_obs',
        'tycho_id',
        'tycho_tdsc_id',
        ]

    for col in cols:
        if f'{col}_direct' in data.colnames:
            data.remove_column(f'{col}_direct')
        if f'{col}_indirect' in data.colnames:
            data.remove_column(f'{col}_indirect')


    cols = [
        'acq',
        'gsc2.3',
        'guide',
        'mag',
        'mag_aca',
        'mag_aca_err',
        'mag_aca_err_obs',
        'mag_aca_obs',
        'tycho_id',
        'tycho_tdsc_id',
    ]

    i = np.searchsorted(agasc_summary['agasc_id'], data['AGASC_ID'])
    for col in cols:
        data[col] = agasc_summary[col][i]
    # data['mag_1p7'] = agasc_summary['mag'][i]

    cols = ['agasc_id', 'ra', 'dec', 'epoch', 'pm_ra', 'pm_dec']
    for col in cols:
        data[col] = agasc_summary[col][i]

    print("setting positions")
    data['has_gsc'] = data["XREF_ID1"] != -9999
    data['has_tyc'] = data["XREF_ID3"] != -9999

    data['PM_RA'][data['PM_RA'] == -9999] = 0
    data['PM_DEC'][data['PM_DEC'] == -9999] = 0

    # these are "corrected" 1p7 positions,
    # same as 1p7 except Tycho2 positions were shifted to the actual epoch instead of 2000.
    # This was to avoid errors arising from the 1p7 proper motions
    data["coord"] = SkyCoord(
        ra=data["ra"],
        dec=data["dec"],
        unit="deg"
    )
    data["coord_direct_today"] = SkyCoord(
        ra=data["ra_direct"],
        dec=data["dec_direct"],
        # ra=(
        #     data["ra_direct"]
        #     + data["pm_ra_direct"]
        #     * (2023 - data["epoch_direct"])
        #     / 1000
        #     / 3600
        #     / np.cos(np.deg2rad(data["dec_direct"]))
        # ),
        # dec=(
        #     data["dec_direct"]
        #     + data["pm_dec_direct"]
        #     * (2023 - data["epoch_direct"])
        #     / 1000
        #     / 3600
        # ),
        unit="deg"
    )
    data["coord_indirect_today"] = SkyCoord(
        ra=data["ra_indirect"],
        dec=data["dec_indirect"],
        # ra=(
        #     data["ra_indirect"]
        #     + data["pm_ra_indirect"]
        #     * (2023 - data["epoch_indirect"])
        #     / 1000
        #     / 3600
        #     / np.cos(np.deg2rad(data["dec_indirect"]))
        # ),
        # dec=(
        #     data["dec_indirect"]
        #     + data["pm_dec_indirect"]
        #     * (2023 - data["epoch_indirect"])
        #     / 1000
        #     / 3600
        # ),
        unit="deg"
    )
    data["coord_direct"] = SkyCoord(
        ra=(
            data["ra_direct"]
            + data["pm_ra_direct"]
            * (data["epoch"] - data["epoch_direct"])
            / 1000
            / 3600
            / np.cos(np.deg2rad(data["dec_direct"]))
        ),
        dec=(
            data["dec_direct"]
            + data["pm_dec_direct"]
            * (data["epoch"] - data["epoch_direct"])
            / 1000
            / 3600
        ),
        unit="deg"
    )
    data["coord_indirect"] = SkyCoord(
        ra=(
            data["ra_indirect"]
            + data["pm_ra_indirect"]
            * (data["epoch"] - data["epoch_indirect"])
            / 1000
            / 3600
            / np.cos(np.deg2rad(data["dec_indirect"]))
        ),
        dec=(
            data["dec_indirect"]
            + data["pm_dec_indirect"]
            * (data["epoch"] - data["epoch_indirect"])
            / 1000
            / 3600
        ),
        unit="deg"
    )
    # this is to compare the 1p7 positions to what they would be with the 1p8 proper motion
    # our reference are the "corrected" positions above, and these are the 1p7 positions
    # shifted to their original epoch using the 1p8 proper motion
    data["coord_1p7"] = SkyCoord(
        ra=(
            data["RA"]
            + data["pm_ra_direct"]
            * (data["epoch"] - data["EPOCH"])
            / 1000
            / 3600
            / np.cos(np.deg2rad(data["DEC"]))
        ),
        dec=(
            data["DEC"]
            + data["pm_dec_direct"]
            * (data["epoch"] - data["EPOCH"])
            / 1000
            / 3600
        ),
        unit="deg"
    )

    # the difference between the two methods
    data["d2d"] = data["coord_direct"].separation(data["coord_indirect"]).to(u.arcsec)
    data["d2d_direct"] = data["coord"].separation(data["coord_direct"]).to(u.arcsec)
    data["d2d_indirect"] = data["coord"].separation(data["coord_indirect"]).to(u.arcsec)
    # differences introduced by removing 1p7 proper motion
    data["d2d_1p7"] = data["coord"].separation(data["coord_1p7"]).to(u.arcsec)
    # the difference between the two methods propagated to today
    data["d2d_today"] = data["coord_direct_today"].separation(data["coord_indirect_today"]).to(u.arcsec)

    return data
In [6]:
data = get_comparison_table()
loading tables
  - summary
  - indirect x-matches
  - direct x-matches
  - AGASC 1.7
joining indirect table
joining direct table
removing redundant columns
setting positions
In [7]:
direct = ~data['gaia_id_direct'].mask
indirect = ~data['gaia_id_indirect'].mask

data_intersection = data[indirect & direct]
observed = ~data_intersection['mag_aca_obs'].mask

agree = (data_intersection['gaia_id_direct'] == data_intersection['gaia_id_indirect'])
In [8]:
disagree = data_intersection[~agree]

High Level Plots¶

This first plot is the difference in magnitude and radial offset between 1p7 and Gaia. The lines correspond to the match probability used.

In [9]:
fig, axes = plt.subplot_mosaic([["mag", "d2d"]], figsize=(9, 4))

plt.sca(axes["mag"])
plt.title("$\Delta$ mag")
plt.hist(
    data_intersection['d_mag_direct'],
    bins=np.linspace(-10, 10, 100),
    histtype='step',
    density=True,
    label="direct"
    )
plt.hist(
    data_intersection['d_mag_indirect'],
    bins=np.linspace(-10, 10, 100),
    histtype='step',
    density=True,
    label="indirect"
    )
plt.yscale('log')
plt.xlabel("mag$_{pred}$ - mag$_{Gaia}$")

N = len(data_intersection)
x = np.linspace(-3, 3, 100)
plt.plot(x, xm.d_mag_probability(x))


plt.sca(axes["d2d"])
plt.title("Radial offset")
plt.hist(
    data_intersection['d2d_direct'],
    # bins=np.logspace(-5, 1, 100),
    bins=np.linspace(0, 10, 100),
    histtype='step',
    density=True,
    label="direct",
    )
plt.hist(
    data_intersection['d2d_indirect'],
    # bins=np.logspace(-5, 1, 100),
    bins=np.linspace(0, 10, 100),
    histtype='step',
    density=True,
    label="indirect",
    )
plt.yscale('log')
plt.xlabel("d2d (arcsec)")

N = len(data_intersection['d2d_direct'])
# x = np.logspace(-3, 1, 101)
x = np.linspace(0, 10, 101)
dx = np.diff(x)
x = x[:-1] + dx / 2
# p_d2d = (
#     1*d2d_probability(x, sigma_d2d=.2)
#     + 0.05*d2d_probability(x, sigma_d2d=.5)
#     # + 0.5*d2d_probability(x, sigma_d2d=1.5)
# )
p_d2d = xm.d2d_probability(x)
p_d2d /= np.sum(dx * p_d2d)
plt.plot(x, p_d2d)
p_d2d = xm.gaussian_d2d_probability_(x, sigma_d2d=1.5)
p_d2d /= np.sum(dx * p_d2d)
plt.plot(x, p_d2d)
# p_d2d = d2d_probability(x, sigma_d2d=0.5)
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
# p = 0.1
# p_d2d = (
#     (1-p)*d2d_probability(x, sigma_d2d=0.25) + 
#     p*d2d_probability(x, sigma_d2d=.5)
# )
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
# p = 0.1
# p2 = 0.008
# p_d2d = (
#     (1-p)*xm.gaussian_d2d_probability_(x, sigma_d2d=0.25) + 
#     p*xm.gaussian_d2d_probability_(x, sigma_d2d=.5) +
#     p2*xm.gaussian_d2d_probability_(x, sigma_d2d=.8)
# )
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
# p, p1, p2, p3 = 0.89020772, 0.09891197, 0.00791296, 0.00296736
# p_d2d = (
#     p*xm.gaussian_d2d_probability_(x, sigma_d2d=0.25) + 
#     p1*xm.gaussian_d2d_probability_(x, sigma_d2d=.5) +
#     p2*xm.gaussian_d2d_probability_(x, sigma_d2d=.8) +
#     p3*xm.gaussian_d2d_probability_(x, sigma_d2d=1.2)
# )
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
plt.ylim(ymin=1e-7, ymax=4)
# plt.ylim(ymin=1e-1, ymax=1e6)
plt.xlim((0, 5))
plt.legend()

plt.tight_layout()
No description has been provided for this image
In [10]:
fig, axes = plt.subplot_mosaic([["mag", "d2d"]], figsize=(9, 4))

plt.sca(axes["mag"])
plt.hist(
    data_intersection['d_mag_direct'],
    bins=np.linspace(-10, 10, 100),
    histtype='step',
    label="direct",
)
plt.hist(
    data_intersection['d_mag_indirect'],
    bins=np.linspace(-10, 10, 100),
    histtype='step',
    label="indirect",
)

plt.xlabel("mag_1p8 - mag_1p7")
plt.legend()
plt.title("Magnitude")
plt.yscale('log')


plt.sca(axes["d2d"])
bins = np.linspace(0, 4, 100)
plt.hist(
    data_intersection["d2d_direct"][~agree],
    bins=bins,
    histtype='step',
    label='direct',
)
plt.hist(
    data_intersection["d2d_indirect"][~agree],
    bins=bins,
    histtype='step',
    label='indirect',
)
plt.title("Radial offset")
plt.legend()
plt.xlabel("d2d$_{1p7}$ (arcsec)")

plt.tight_layout()
No description has been provided for this image

Some numbers¶

In [11]:
n_both, n_indirect_no_direct, n_direct_no_indirect = np.count_nonzero(indirect & direct), np.count_nonzero(indirect & ~direct), np.count_nonzero(direct & ~indirect)


n_both_candidates = np.count_nonzero(indirect & direct & (data['guide'] | data['acq']))
n_indirect_no_direct_candidates = np.count_nonzero(indirect & ~direct & (data['guide'] | data['acq']))
n_direct_no_indirect_candidates = np.count_nonzero(direct & ~indirect & (data['guide'] | data['acq']))

n_both_observed = np.count_nonzero(indirect & direct & ~data['mag_aca_obs'].mask)
n_indirect_no_direct_observed = np.count_nonzero(indirect & ~direct & ~data['mag_aca_obs'].mask)
n_direct_no_indirect_observed = np.count_nonzero(direct & ~indirect & ~data['mag_aca_obs'].mask)

mag_out_indirect_2_5 = np.abs(data["mag_pred_indirect"] - data["MAG"]) > 2.5
mag_out_direct_2_5 = np.abs(data["mag_pred_direct"] - data["MAG"]) > 2.5
mag_out_indirect_5 = np.abs(data["mag_pred_indirect"] - data["MAG"]) > 5
mag_out_direct_5 = np.abs(data["mag_pred_direct"] - data["MAG"]) > 5
radial_outlier_direct_3 = data["d2d_direct"] > 3
radial_outlier_indirect_3 = data["d2d_indirect"] > 3
radial_outlier_direct_5 = data["d2d_direct"] > 5
radial_outlier_indirect_5 = data["d2d_indirect"] > 5

n_both_mag_outliers_2_5 = np.count_nonzero(indirect & direct & (mag_out_indirect_2_5 | mag_out_direct_2_5))
n_indirect_mag_outliers_2_5 = np.count_nonzero(indirect & ~direct & mag_out_indirect_2_5)
n_direct_mag_outliers_2_5 = np.count_nonzero(~indirect & direct & mag_out_direct_2_5)

n_both_mag_outliers_5 = np.count_nonzero(indirect & direct & (mag_out_indirect_5 | mag_out_direct_5))
n_indirect_mag_outliers_5 = np.count_nonzero(indirect & ~direct & mag_out_indirect_5)
n_direct_mag_outliers_5 = np.count_nonzero(~indirect & direct & mag_out_direct_5)

n_direct_radial_outliers_3 = np.count_nonzero(~indirect & direct & radial_outlier_direct_3)
n_indirect_radial_outliers_3 = np.count_nonzero(indirect & ~direct & radial_outlier_indirect_3)
n_both_radial_outliers_3 = np.count_nonzero(indirect & direct & radial_outlier_direct_3)

n_direct_radial_outliers_5 = np.count_nonzero(~indirect & direct & radial_outlier_direct_5)
n_indirect_radial_outliers_5 = np.count_nonzero(indirect & ~direct & radial_outlier_indirect_5)
n_both_radial_outliers_5 = np.count_nonzero(indirect & direct & radial_outlier_direct_5)

# "agree" is defined only in the intersection, which implies (indirect & direct)
n_disagree, f_agree = np.count_nonzero(~agree), np.count_nonzero(agree)/n_both
n_disagree_observed = np.count_nonzero(~agree & observed)
n_disagree_candidates = np.count_nonzero(~agree & (data_intersection['guide'] | data_intersection['acq']))

disagree_mag_out_indirect_2_5 = np.abs(data_intersection["mag_pred_indirect"] - data_intersection["MAG"]) > 2.5
disagree_mag_out_direct_2_5 = np.abs(data_intersection["mag_pred_direct"] - data_intersection["MAG"]) > 2.5
disagree_mag_out_indirect_5 = np.abs(data_intersection["mag_pred_indirect"] - data_intersection["MAG"]) > 5
disagree_mag_out_direct_5 = np.abs(data_intersection["mag_pred_direct"] - data_intersection["MAG"]) > 5
disagree_radial_outlier_direct_3 = data_intersection["d2d_direct"] > 3
disagree_radial_outlier_indirect_3 = data_intersection["d2d_indirect"] > 3
disagree_radial_outlier_direct_5 = data_intersection["d2d_direct"] > 5
disagree_radial_outlier_indirect_5 = data_intersection["d2d_indirect"] > 5

n_disagree_both_mag_outliers_2_5 = np.count_nonzero(agree & disagree_mag_out_indirect_2_5)
n_disagree_indirect_mag_outliers_2_5 = np.count_nonzero(~agree & disagree_mag_out_indirect_2_5)
n_disagree_direct_mag_outliers_2_5 = np.count_nonzero(~agree & disagree_mag_out_direct_2_5)

n_disagree_both_mag_outliers_5 = np.count_nonzero(agree & disagree_mag_out_indirect_5)
n_disagree_indirect_mag_outliers_5 = np.count_nonzero(~agree & disagree_mag_out_indirect_5)
n_disagree_direct_mag_outliers_5 = np.count_nonzero(~agree & disagree_mag_out_direct_5)

n_disagree_direct_radial_outliers_3 = np.count_nonzero(~agree & disagree_radial_outlier_direct_3)
n_disagree_indirect_radial_outliers_3 = np.count_nonzero(~agree & disagree_radial_outlier_indirect_3)
n_disagree_both_radial_outliers_3 = np.count_nonzero(agree & disagree_radial_outlier_direct_3)

n_disagree_direct_radial_outliers_5 = np.count_nonzero(~agree & disagree_radial_outlier_direct_5)
n_disagree_indirect_radial_outliers_5 = np.count_nonzero(~agree & disagree_radial_outlier_indirect_5)
n_disagree_both_radial_outliers_5 = np.count_nonzero(agree & disagree_radial_outlier_direct_5)

n_large_d2d_today = np.count_nonzero(data_intersection["d2d_today"] > 5)
n_large_d2d_direct = np.count_nonzero(data_intersection["d2d_direct"] > 3)
n_large_d2d_indirect = np.count_nonzero(data_intersection["d2d_indirect"] > 3)
n_disagree_indirect_mag_outliers_2_5 = np.count_nonzero(abs(data_intersection["mag_pred_indirect"] - data_intersection["MAG"]) > 5 )
n_disagree_direct_mag_outliers_2_5 = np.count_nonzero(abs(data_intersection["mag_pred_direct"] - data_intersection["MAG"]) > 5 )
n_disagree_indirect_p_relative = np.count_nonzero(data_intersection["p_relative_indirect"] < 0.5)
n_disagree_direct_p_relative = np.count_nonzero(data_intersection["p_relative_direct"] < 0.5)

txt = f"""

Disagreements in terms of which stars are matched to a Gaia counterpart:

| condition | both | direct | indirect |
| --- | --- | --- | --- |
| any | {n_both} | {n_direct_no_indirect} ({n_direct_no_indirect/n_both:.1%}) | {n_indirect_no_direct} ({n_indirect_no_direct/n_both:.1%})|
| guide or acq | {n_both_candidates} | {n_direct_no_indirect_candidates} ({n_direct_no_indirect_candidates/n_both_candidates:.1%}) | {n_indirect_no_direct_candidates} ({n_indirect_no_direct_candidates/n_both_candidates:.1%}) |
| observed | {n_both_observed} | {n_direct_no_indirect_observed} ({n_direct_no_indirect_observed/n_both_observed:.1%}) | {n_indirect_no_direct_observed} ({n_indirect_no_direct_observed/n_both_observed:.1%}) |
| d2d > 3 | {n_both_radial_outliers_3} | {n_direct_radial_outliers_3} | {n_indirect_radial_outliers_3} |
| d2d > 5 | {n_both_radial_outliers_5} | {n_direct_radial_outliers_5} | {n_indirect_radial_outliers_5} |
| abs(mag_pred - MAG) > 2.5 | {n_both_mag_outliers_2_5} | {n_direct_mag_outliers_2_5} | {n_indirect_mag_outliers_2_5} |
| abs(mag_pred - MAG) > 5 | {n_both_mag_outliers_5} | {n_direct_mag_outliers_5} | {n_indirect_mag_outliers_5} |

Disagreements among the stars that are matched to a Gaia counterpart in both methods: 

| | in both | disagree |
| --- | --- | --- |
| any | {n_both} | {n_disagree} ({n_disagree/n_both:.3%}) |
| candidates | {n_both_candidates} | {n_disagree_candidates} ({n_disagree_candidates/n_both_candidates:.3%})  |
| observed | {n_both_observed} | {n_disagree_observed} ({n_disagree_observed/n_both_observed:.3%})  |
| d2d_direct > 3 arcsec | | {n_large_d2d_direct} |
| d2d_indirect > 3 arcsec | | {n_large_d2d_indirect} |
| d2d_today > 5 | | {n_large_d2d_today} |
| abs(mag_pred - MAG) > 5 (indirect)| | {n_disagree_indirect_mag_outliers_2_5} |
| abs(mag_pred - MAG) > 5 (direct)| | {n_disagree_direct_mag_outliers_2_5} |
| p_relative_direct < 0.5 | | {n_disagree_direct_p_relative} |
| p_relative_indirect < 0.5 | | {n_disagree_indirect_p_relative} |
"""
display(Markdown(txt))

Disagreements in terms of which stars are matched to a Gaia counterpart:

condition both direct indirect
any 8467466 489879 (5.8%) 13169 (0.2%)
guide or acq 436738 6991 (1.6%) 593 (0.1%)
observed 92432 2194 (2.4%) 1 (0.0%)
d2d > 3 7 418 1174
d2d > 5 2 187 7
abs(mag_pred - MAG) > 2.5 4528 643 1351
abs(mag_pred - MAG) > 5 642 111 503

Disagreements among the stars that are matched to a Gaia counterpart in both methods:

in both disagree
any 8467466 4896 (0.058%)
candidates 436738 379 (0.087%)
observed 92432 43 (0.047%)
d2d_direct > 3 arcsec 7
d2d_indirect > 3 arcsec 259
d2d_today > 5 52
abs(mag_pred - MAG) > 5 (indirect) 632
abs(mag_pred - MAG) > 5 (direct) 74
p_relative_direct < 0.5 3
p_relative_indirect < 0.5 102

Proper Motion¶

In [12]:
fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4), sharex=True, sharey=True)
plt.suptitle("$\Delta PM$")
plt.sca(axes["ra"])
plt.plot(disagree['pm_ra_direct'] - disagree['PM_RA'], disagree['pm_ra_indirect'] - disagree['PM_RA'], '.')
plt.xlabel("$\Delta$PM$_{RA}$ direct")
plt.ylabel("$\Delta$PM$_{RA}$ indirect")
plt.title("RA")

plt.sca(axes["dec"])
plt.plot(disagree['pm_dec_direct'] - disagree['PM_DEC'], disagree['pm_dec_indirect'] - disagree['PM_DEC'], '.')
plt.xlabel("$\Delta$PM$_{DEC}$ direct")
plt.ylabel("$\Delta$PM$_{DEC}$ indirect")
plt.ylim((-1000, 1000))
plt.xlim((-1000, 1000))
plt.title("DEC")

fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4), sharex=True, sharey=True)
plt.suptitle("$\Delta PM$ (zoomed in)")
                               
plt.sca(axes["ra"])
plt.plot(disagree['pm_ra_direct'] - disagree['PM_RA'], disagree['pm_ra_indirect'] - disagree['PM_RA'], '.')
plt.xlabel("$\Delta$PM$_{RA}$ direct")
plt.ylabel("$\Delta$PM$_{RA}$ indirect")
plt.title("RA")

plt.sca(axes["dec"])
plt.plot(disagree['pm_dec_direct'] - disagree['PM_DEC'], disagree['pm_dec_indirect'] - disagree['PM_DEC'], '.')
plt.xlabel("$\Delta$PM$_{DEC}$ direct")
plt.ylabel("$\Delta$PM$_{DEC}$ indirect")
plt.ylim((-100, 100))
plt.xlim((-100, 100))
plt.title("DEC")
Out[12]:
Text(0.5, 1.0, 'DEC')
No description has been provided for this image
No description has been provided for this image
In [13]:
fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4))
                               
plt.suptitle("1p7-Direct")
plt.sca(axes["ra"])
plt.plot(disagree['PM_RA'], disagree['pm_ra_indirect'], '.')
plt.xlabel("PM_RA 1p7")
plt.ylabel("PM_RA indirect")
plt.title("RA")

plt.sca(axes["dec"])
plt.plot(disagree['PM_DEC'], disagree['pm_dec_indirect'], '.')
plt.xlabel("PM_DEC 1p7")
plt.ylabel("PM_DEC indirect")
plt.title("DEC")


fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4))

plt.suptitle("1p7-Direct")

plt.sca(axes["ra"])
plt.plot(disagree['PM_RA'], disagree['pm_ra_direct'], '.')
plt.xlabel("PM_RA 1p7")
plt.ylabel("PM_RA direct")
plt.title("RA")

plt.sca(axes["dec"])
plt.plot(disagree['PM_DEC'], disagree['pm_dec_direct'], '.')
plt.xlabel("PM_DEC 1p7")
plt.ylabel("PM_DEC direct")
plt.title("DEC")
Out[13]:
Text(0.5, 1.0, 'DEC')
No description has been provided for this image
No description has been provided for this image

Radial Offset¶

This section includes plots looking at radial separations. Not much came of it except to realize that the differences are cases where two Gaia stars are near a single AGASC star.

This can be seen in the plot below, which shows the histogram of radial offsets between 1p7 and 1p8 with each method. In log-scale, there is a clear bimodal distribution.

Because the direct method uses magnitudes in the matching probability, I expected a trade-off, where the radial offsets are larger. That is not the case. The radial offsets are smaller with the direct method. That could be because proper motions are better handled?

In [14]:
bins = np.logspace(-3, 2, 100)
plt.hist(
    data_intersection["d2d_direct"][~agree],
    bins=bins,
    histtype='step',
    label='direct-1p7',
)
plt.hist(
    data_intersection["d2d_indirect"][~agree],
    bins=bins,
    histtype='step',
    label='indirect-1p7',
)
# plt.hist(
#     data_intersection["d2d"][~agree],
#     bins=bins,
#     histtype='step',
#     label='indirect-direct',
# )
plt.xscale('log')
plt.legend()
plt.xlabel("d2d (arcsec)")
plt.tight_layout()
No description has been provided for this image

this bimodal distribution in log-scale actually corresponds to a long-tailed distribution

In [15]:
bins = np.linspace(0, 4, 100)
plt.hist(
    data_intersection["d2d_direct"][~agree],
    bins=bins,
    histtype='step',
    label='direct-1p7',
)
plt.hist(
    data_intersection["d2d_indirect"][~agree],
    bins=bins,
    histtype='step',
    label='indirect-1p7',
)
# plt.hist(
#     data_intersection["d2d"][~agree],
#     bins=bins,
#     histtype='step',
#     label='indirect-direct',
# )
plt.legend()
plt.xlabel("d2d (arcsec)")
plt.tight_layout()
No description has been provided for this image

d2d_today is the angular separation between the Gaia matches from both methods as it would be observed today.

In [16]:
plt.hist(
    data_intersection["d2d_today"][~agree],
    bins=np.logspace(-3, 2, 100),
    histtype="step",
)
plt.xscale('log')
plt.yscale('log')
No description has been provided for this image
In [17]:
data_intersection[data_intersection['d2d_today'] > 10][['AGASC_ID', 'd2d_today']]
Out[17]:
Table length=11
AGASC_IDd2d_today
arcsec
int32float64
1377088837.63962906801418
4482892817.38145521107779
5662964824.798016439675187
6410047225.144329520220083
29518225646.28792664226764
32899272828.51402100385447
40082649626.660939304221365
58681160839.33190385339832
68591029614.346379809274778
108240092844.33634978852832
109222454412.218642797037425
In [18]:
plt.hist(
    disagree["d2d_direct"] - disagree["d2d_indirect"],
    bins=np.linspace(-10, 10, 100),
    histtype='step',
    );
plt.yscale('log')
No description has been provided for this image
In [19]:
fig, axes = plt.subplot_mosaic(
    [
        ["direct-indirect", "1p7-indirect"],
        ["1p7-direct", "."]
    ],
    figsize=(9, 8)
)

plt.sca(axes["direct-indirect"])
bins = np.linspace(0, 5, 50)
sns.histplot(
    x=disagree["d2d_direct"],
    y=disagree["d2d_indirect"],  
    bins=[bins, bins],
)
plt.sca(axes["1p7-indirect"])
bins = np.linspace(0, 5, 50)
sns.histplot(
    x=disagree["d2d"],
    y=disagree["d2d_indirect"],  
    bins=[bins, bins],
)
plt.sca(axes["1p7-direct"])
bins = np.linspace(0, 5, 50)
sns.histplot(
    x=disagree["d2d"],
    y=disagree["d2d_direct"],  
    bins=[bins, bins],
)
Out[19]:
<Axes: label='1p7-direct', xlabel='d2d', ylabel='d2d_direct'>
No description has been provided for this image

Magnitude Changes¶

In [20]:
bins = np.linspace(-10, 10, 100)
plt.hist(
    (data_intersection["mag_pred_direct"] - data_intersection["MAG"])[~agree],
    bins=bins,
    histtype='step',
    label='direct',
)
plt.hist(
    (data_intersection["mag_pred_indirect"] - data_intersection["MAG"])[~agree],
    bins=bins,
    histtype='step',
    label='Tycho2-GSC2.3',
)
plt.xlabel('mag_1p8 - mag_1p7')
plt.yscale('log')
plt.legend()
plt.tight_layout()
No description has been provided for this image

Reports¶

In [21]:
agasc_id = 966533624
# agasc_id = 680536288
report = star_report.Report(agasc_id)
report = star_report.Report(agasc_id, alternates={'direct': 5974018828337490944, "indirect": 5974018828340131456})
# report.show_in_notebook()
# report.save_html("bla")
fig = report.plotly_fig()
fig.show()

Checking p_value¶

In [22]:
disagree[disagree['p_value_indirect'] > 0.99][['agasc_id', 'p_value_direct', 'p_value_indirect', 'gaia_id_direct', 'gaia_id_indirect']]
Out[22]:
Table length=0
agasc_idp_value_directp_value_indirectgaia_id_directgaia_id_indirect
int32float32float32int64int64
In [23]:
agasc_id = 530408
report = star_report.Report(agasc_id, alternates=dict( disagree[disagree['agasc_id'] == agasc_id][['gaia_id_direct', 'gaia_id_indirect']][0]))
# report.show_in_notebook()
# report.save_html("bla")
fig = report.plotly_fig()
fig.show()
In [24]:
plt.hist(
    disagree['p_value_direct'],
    bins=np.linspace(0, 1, 100),
    histtype='step',
    );
plt.hist(
    disagree['p_value_indirect'],
    bins=np.linspace(0, 1, 100),
    histtype='step',
    );
plt.yscale('log')
No description has been provided for this image
In [25]:
plt.hist(
    data['p_value_direct'],
    bins=np.linspace(0, 1, 100),
    histtype='step',
    label="direct",
    )
plt.hist(
    data['p_value_indirect'],
    bins=np.linspace(0, 1, 100),
    histtype='step',
    label="indirect",
    )
plt.yscale('log')
plt.xlabel("p-value")
plt.legend()
plt.tight_layout()
No description has been provided for this image

Observed¶

In [26]:
for col in ['d2d_direct', 'd2d_indirect', 'd_mag_direct', 'd_mag_indirect']:
    data_intersection[col].format = "{:.2f}"
for col in ['p_value_direct', 'p_value_indirect']:
    data_intersection[col].format = "{:.4f}"
In [27]:
data_intersection[~agree & observed][[
    'agasc_id',
    'gaia_id_direct',
    'gaia_id_indirect',
    'd_mag_direct',
    'd2d_direct',
    'd_mag_indirect',
    'd2d_indirect',
    'p_value_direct',
    'p_value_indirect',
]].pprint(max_width=-1, max_lines=-1)
 agasc_id     gaia_id_direct     gaia_id_indirect  d_mag_direct d2d_direct d_mag_indirect d2d_indirect p_value_direct p_value_indirect
                                                                  arcsec                     arcsec                                   
---------- ------------------- ------------------- ------------ ---------- -------------- ------------ -------------- ----------------
   6687480    4760816628892160    4760820923834112         0.19       1.25           0.23         0.69         0.3632           0.3498
   9446616 3273969976794445056 3273969972497767552         0.14       0.21           0.16         0.17         0.5646           0.5625
  32243760 3831419417137291904 3831419412842627456         0.18       0.71           0.18         0.88         0.5498           0.0414
  36444624 3897649565189277952 3897649565189298688         0.02       0.27           8.11         0.67         0.6729           0.0001
  52053176 4392691618897267200 4392691618894632064         0.05       0.24           0.17         1.27         0.7053           0.6406
  77604992 2756819802270040064 2756819802269026688         0.27       1.33           0.27         0.40         0.3410           0.2008
  81535712 2568639108730134016 2568639108730133888         0.69       0.46           5.03         3.08         0.0424           0.0000
  87428640 3299310421279505152 3299310421279819776        -0.06       0.71           0.03         0.59         0.9111           0.1986
 170537688 3397818657309068288 3397818657308098048         0.10       0.15           0.14         0.27         0.7330           0.4884
 176951584 3360388906382410752 3360388910675923584        -0.00       0.04            nan         0.67         0.9928              nan
 183638376  607916787837169152  607916783542120448         0.17       0.44           0.25         0.36         0.5880           0.3473
 210767352 4516726426141479808 4516726426144371456         0.10       0.17           7.27         1.33         0.7069           0.0000
 265030112 1282430921954322048 1282430921955436032         0.05       0.07            nan         0.32         0.9218              nan
 295583712 2852925360578759424 2852925364875262080         0.00       0.07           0.18         0.74         0.9758           0.0907
 296757336 2863668486831283840 2863668486830442368         0.18       0.11           0.19         0.19         0.5751           0.4824
 382992664  207915763322753408  207915763325485696        -0.00       0.04            nan         0.35         0.9933              nan
 417858744 1954085707369157888 1954085703068556800         0.14       0.18           0.16         0.35         0.6058           0.5821
 420499768 1957445058987177856 1957445058984715648         0.06       0.04            nan         1.40         0.9176              nan
 496248896  985405277415464448  985405277413204480         0.11       1.15           0.16         0.82         0.6878           0.5409
 541984456 1039008251679233792 1039008255973683840         0.14       0.10            nan         0.30         0.6885              nan
 576595136 1690468547538461184 1690468551833654144         0.04       0.03            nan         0.87         0.9661              nan
 593245120 1115458948720469632 1115458948720469888         0.07       0.04            nan         0.87         0.8892              nan
 623249168 3214697297806159616 3214697293514063616         0.13       0.15           0.31         1.08         0.6591           0.2472
 643965560 3806051759739888256 3806051755444147584         0.42       0.19           0.44         0.24         0.2080           0.1721
 731121880 6333200204490021888 6333200208784059648         0.07       0.07           3.21         1.21         0.8724           0.0001
 766651552 2362388594423841792 2362388594422948480         0.08       0.58           0.15         0.22         0.8048           0.5443
 908462360 6801811888448490624 6801811892745605632         0.08       0.11           0.46         1.00         0.8360           0.0689
 939393312 5440213797128328064 5440213801426139008         0.13       0.20           0.31         0.64         0.6198           0.2590
 966525880 5975153215090484224 5975153210796430336         0.05       0.49           0.16         0.27         0.7706           0.5602
 966533624 5974018828337490944 5974018828340131456         0.06       0.23          11.73         0.83         0.6970           0.0000
 977410600 6793999961413449216 6793999965707654656         0.01       0.19           0.19         0.70         0.8201           0.0780
 997071584 4799566167336113536 4799566171630256000         0.14       0.05           0.62         0.33         0.7106           0.0822
1038767952 6714057120955575424 6714057120952340864         0.12       0.04           6.62         0.96         0.7689           0.0000
1054738896 4913415652886181504 4913415652884481792         0.13       0.10           6.40         0.60         0.7053           0.0003
1057103368 4737095837951376128 4737095837950650624         0.23       1.15           0.23         0.95         0.4613           0.4151
1089212088 5985256936705385344 5985256936695918720         0.14       0.06          12.23         1.08         0.7094           0.0000
1089217664 5984852694390760960 5984852694382630656        -0.01       0.06           2.65         0.93         0.9816           0.0005
1091193912 5942780839701934208 5942780844009984512        -0.16       0.13           1.32         0.56         0.6246           0.0084
1106379424 6561257337207141120 6561257337207686912         0.07       0.06            nan         1.05         0.8844              nan
1141794848 5932241509752072576 5932241509774006656         0.07       0.05          12.80         0.90         0.9015           0.0000
1142837608 5832158120199627392 5832158120187577344         0.05       0.03            nan         0.82         0.9498              nan
1186733048 5911787191757998208 5911787191762479872         0.00       0.05            nan         1.06         0.9865              nan
1201407648 4652097129542392448 4652097125197823232         0.20       0.57           0.23         0.29         0.5154           0.3239
In [28]:
observed_disagree = data_intersection[~agree & observed][[
    'agasc_id',
    'gaia_id_direct',
    'gaia_id_indirect',
    'p_value_direct',
    'p_value_indirect',
    'd_mag_direct',
    'd2d_direct',
    'd_mag_indirect',
    'd2d_indirect'
]]

np.random.seed(1186733048)
# generate_reports = True
if generate_reports:
    print(f"Making report with {len(observed_disagree)} stars")

    description = """
    <p>This list includes stars that have been observed, and the direct and indirect methods disagree. </p>

    <p>This is the comparison that led us to ditch the indirect method. It seems to have two issues</p>
    <ol>
      <li> It does not take magnitude into account. </li>
      <li> Some stars with proper motion seem to be wrongly matched. </li>
    </ol>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>
      <li>
        <a href="report_36444624.html"> 36444624 </a>. The indirect method does not use magnitude as a criterion, and matches a star with the wrong magnitude. The correct match is actually closer if one considers proper motion.
      </li>
      <li>
        <a href="report_52053176.html"> 52053176 </a>. Maybe the indirect method did not consider proper motion?
      </li>
      <li>
        <a href="report_170537688.html"> 170537688 </a>. This one should be a blend of two stars. The direct method chooses one, and the indirect method the other. Still, the indirect method does not choose the closest one.
      </li>
      <li>
        <a href="report_176951584.html"> 176951584 </a>. The indirect method gives the star with the wrong magnitude even though the right one is closer.
      </li>

    </ul>

    <h2> Stars </h2>
    """

    reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/direct-indirect/observed-disagree"
    star_report.make_report_list(
        data=observed_disagree,
        path=reports_dir,
        title='Observed Stars with Different direct/indirect matches',
        description=description,
        alternates=['direct', 'indirect'],
        overwrite=True
    )

Candidates¶

In [29]:
candidates_disagree = data_intersection[~agree & (data_intersection['guide'] | data_intersection['acq'])]
candidates_disagree.sort('mag_aca')
# candidates.pprint(max_width=-1, max_lines=-1)
In [30]:
candidates_disagree = candidates_disagree[[
    'agasc_id',
    'gaia_id_direct',
    'gaia_id_indirect',
    'p_value_direct',
    'p_value_indirect',
    'd_mag_direct',
    'd2d_direct',
    'd_mag_indirect',
    'd2d_indirect'
]][:100]

np.random.seed(1186733048)
# generate_reports = True
if generate_reports:
    print(f"Making report with {len(candidates_disagree)} stars")

    description = """
    <p>This list includes stars that are candidates, and the direct and indirect methods disagree. </p>

    <p>This is the comparison that led us to ditch the indirect method. It seems to have two issues</p>
    <ol>
      <li> It does not take magnitude into account. </li>
      <li> Some stars with proper motion seem to be wrongly matched. </li>
    </ol>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>

    </ul>

    <h2> Stars </h2>
    """

    reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/direct-indirect/candidates-disagree"
    star_report.make_report_list(
        data=candidates_disagree,
        path=reports_dir,
        title='Candidate Guide/Acq Stars with Different direct/indirect matches',
        description=description,
        alternates=['direct', 'indirect'],
        overwrite=True
    )

stars with d2d > 3 arcsec¶

In [31]:
disagree = data_intersection[~agree]
disagree[disagree['d2d'] > 3][['AGASC_ID', 'gaia_id_direct', 'gaia_id_indirect', 'd2d_direct', 'd2d_indirect']]
Out[31]:
Table length=541
AGASC_IDgaia_id_directgaia_id_indirectd2d_directd2d_indirect
arcsecarcsec
int32int64int64float64float64
1185880255558244251017420825555824382154366720.952.87
10881736323090612642318233632309061264231825920.563.38
13770888323588529918985638432358854022678959361.193.21
15990856323656589259050265632365658925957408001.301.76
16255456332047107231220288033204710723122031362.381.10
20201336313138417074236211231313841707360253440.193.66
21893408311520613274361382431152061327387604480.822.66
26616568309462612692816960030946261269281702401.451.83
32637200385662190727355737638566219072735575041.222.28
...............
1181491160584959190784738944058495919078476419840.464.05
1182531872587572118321810816058757211832181086721.312.17
1183204944583253470291104704058325347029110476801.063.69
1183975024582435375226603123258243537522660314881.381.99
1183980544582470253797519744058247025379751971840.863.65
1207468040523826382071267814452382640225692997120.672.98
1209151256523504750868293414452350474743231959041.173.21
1210454568584259365371440998458425936537144103681.141.97
1214128920579901633142276428857990163314227632640.173.15
1214648312579549235799637004857954923622818995200.223.38
In [32]:
agasc_ids = np.unique(np.concatenate(
    [
    [
        1185232,
        1185880,
        2754344,
        2892808,
        7609200,
        8657120,
        10881736,
        11274712,
        11802440,
        1213475776,
        1213613240,
        1214128920,
        1214384968,
        1214519736,
        1214648312,
        1214664424,
        1248595144,
    ],
    disagree[disagree['d2d'] > 3]['AGASC_ID'][:40]
    ]
))
large_d2d = disagree[np.in1d(disagree['agasc_id'], agasc_ids)][[
    'agasc_id',
    'gaia_id_direct',
    'gaia_id_indirect',
    'd_mag_direct',
    'd2d_direct',
    'd_mag_indirect',
    'd2d_indirect',
    'p_value_direct',
    'p_value_indirect',
]]
In [33]:
np.random.seed(1248595144)
# generate_reports = True
if generate_reports:
    print(f"Making report with {len(large_d2d)} stars")

    description = """
    <p>This list includes stars that have been observed, and the direct and indirect methods disagree. </p>

    <p>This is the comparison that led us to ditch the indirect method. It seems to have two issues</p>
    <ol>
      <li> It does not take magnitude into account. </li>
      <li> Some stars with proper motion seem to be wrongly matched. </li>
    </ol>

    <p>Anyway, these are all faint stars.</p>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>

      <li>
        <a href="report_1185880.html"> 1185880 </a>.
        direct method gives a star that is closer in magnitude and angular separation
        no idea why the indirect method gives another
      </li>

      <li>
        <a href="report_10881736.html"> 10881736 </a>.
        two stars close in magnitude with the AGASC star right in between (AGASC was not resolved)
        the direct method gives a star that is MUCH closer in magnitude and angular separation
      </li>

      <li>
        <a href="report_1214128920.html"> 1214128920 </a>.
        the direct method gives a star that is MUCH closer in magnitude and angular separation
      </li>

      <li>
        <a href="report_1214648312.html"> 1214648312 </a>.
        the direct method gives a star that is MUCH closer in magnitude and angular separation
        still, this is a faint star, so irrelevant for us
      </li>
    </ul>

    </ul>

    <h2> Stars </h2>
    """

    reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/direct-indirect/large_d2d"
    star_report.make_report_list(
        data=large_d2d,
        path=reports_dir,
        title='Distance between direct and indirect matches > 3 arcsec',
        description=description,
        alternates=['direct', 'indirect'],
        overwrite=True
    )