In [170]:
import sys
sys.path.insert(0, '../')

import subprocess
import pickle
from agasc_gaia import cross_match as xm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from astropy.table import join
from pathlib import Path

from agasc_gaia import cross_match as xm, gaia_model as gm, config, datasets
from agasc_gaia import star_report
In [171]:
# to silence warnings since 2024.1
import warnings
warnings.simplefilter("ignore")
In [172]:
REPORTS_DIR = Path("/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports")
In [173]:
agasc_difficult = xm.get_agasc_gaia_x_match_difficult()
agasc_gaia_matches = xm.get_agasc_gaia_x_match()
# agasc_gaia_matches_all = xm.get_agasc_gaia_x_match_all()
agasc_summary = datasets.get_agasc_summary()
In [174]:
agasc_difficult = agasc_difficult.group_by(['group'])
arr = np.array(agasc_difficult['agasc_id'])
agasc_difficult['group_size'] = np.concatenate([
    np.repeat(np.unique(arr[i:j]).shape[0], j - i)
    for i, j in zip(agasc_difficult.groups.indices[:-1], agasc_difficult.groups.indices[1:])
])
In [175]:
# some difficult stars have a best match with NaN p_value
# this is NOT a problem, because agasc_difficult includes all _possible_ matches, not just the best ones
# These will get removed downstream
(
    np.count_nonzero(np.isnan(agasc_gaia_matches['mag_pred'])),  # should be zero
    np.count_nonzero(np.isnan(agasc_difficult['p_value']) & agasc_difficult['best_match'])
)
Out[175]:
(0, 29)
In [176]:
# agasc_difficult = agasc_difficult[agasc_difficult['best_match']]
In [177]:
i = np.searchsorted(agasc_summary["agasc_id"], agasc_gaia_matches["agasc_id"])
cols = [
    "mag_aca",
    "mag_aca_err",
    "mag_aca_obs",
    "mag_aca_err_obs",
    "mag_catid",
    "mag_band",
    "random_index",
    "guide",
    "acq"
]
for col in cols:
    agasc_gaia_matches[col] = agasc_summary[col][i]

cols = [
    "pm_ra",
    "pm_dec",
    "epoch",
    "ra",
    "dec",
]
for col in cols:
    agasc_gaia_matches[f"{col}_agasc"] = agasc_summary[col][i]

new_cols = [f"{col}_gaia" for col in cols]
agasc_gaia_matches.rename_columns(cols, new_cols)

# fixing some numbers in AGASC (assuming that entries with -9999 have no proper motion)
agasc_gaia_matches['pm_dec_agasc'][agasc_gaia_matches['pm_dec_agasc'] == -9999] = 0
agasc_gaia_matches['pm_ra_agasc'][agasc_gaia_matches['pm_ra_agasc'] == -9999] = 0
In [178]:
with open(config.FILES["gaia_model"], "rb") as fh:
    gaia_model = pickle.load(fh)

agasc_gaia_matches["mag_aca_pred"] = gaia_model.predict(
    agasc_gaia_matches.to_pandas(),
    with_instrument_bias=False
)

agasc_gaia_matches["mag_aca_pred_err"] = gaia_model.uncertainty(
    agasc_gaia_matches.to_pandas(),
    with_instrument=False
)

Proper Motion¶

Proper motion is a nice way to see whether the cross-match is generally fine, because proper motion is not used directly in the cross-match. It is used to shift positions to a common epoch, but it is not considered in the matching probability.

The vertical line is all the stars with no PM in agasc and with PM in Gaia

In [179]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True)
plt.sca(axes[0])

plt.title("PM RA")
x=np.array(agasc_gaia_matches['pm_ra_agasc'], dtype=float)
y=np.array(agasc_gaia_matches['pm_ra_gaia'], dtype=float)
bins = np.linspace(-200, 200, 200)
sns.histplot(
    x=x, y=y,
    bins=[bins, bins],
)
plt.xlabel("AGASC 1p7")
plt.ylabel("AGASC 1p8")

plt.sca(axes[1])
plt.title("PM DEC")
x=np.array(agasc_gaia_matches['pm_dec_agasc'], dtype=float)
y=np.array(agasc_gaia_matches['pm_dec_gaia'], dtype=float)
bins = np.linspace(-200, 200, 200)
sns.histplot(
    x=x, y=y,
    bins=[bins, bins],
)
plt.xlabel("AGASC 1p7")
plt.ylabel("AGASC 1p8")

plt.tight_layout()
No description has been provided for this image
In [10]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True)
plt.sca(axes[0])
sel = np.abs(agasc_gaia_matches['pm_dec_agasc']) == 0
plt.hist(agasc_gaia_matches['pm_dec_gaia'][sel], bins=np.linspace(-800, 800, 100));
plt.sca(axes[1])
sel = np.abs(agasc_gaia_matches['pm_ra_agasc']) == 0
plt.hist(agasc_gaia_matches['pm_ra_gaia'][sel], bins=np.linspace(-800, 800, 100));

plt.yscale('log')
plt.suptitle('Proper motion Updates from Gaia')
Out[10]:
Text(0.5, 0.98, 'Proper motion Updates from Gaia')
No description has been provided for this image
In [180]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True)
plt.sca(axes[0])
plt.title("PM RA")
sel = np.abs(agasc_gaia_matches['pm_ra_agasc']) > 0
x=np.array(agasc_gaia_matches['pm_ra_agasc'][sel], dtype=float)
y=np.array(agasc_gaia_matches['pm_ra_gaia'][sel], dtype=float)
bins = np.linspace(-200, 200, 200)
sns.histplot(
    x=x, y=y,
    bins=[bins, bins],
)
plt.axhline(10, color='k', linestyle='--')
plt.axhline(-10, color='k', linestyle='--')
x = np.linspace(-200, 200, 100)
plt.plot(x, x+20, color='k', linestyle='--')
plt.plot(x, x-20, color='k', linestyle='--')
plt.xlabel("AGASC 1p7")
plt.ylabel("AGASC 1p8")

plt.sca(axes[1])
plt.title("PM DEC")
sel = np.abs(agasc_gaia_matches['pm_dec_agasc']) > 0
x=np.array(agasc_gaia_matches['pm_dec_agasc'][sel], dtype=float)
y=np.array(agasc_gaia_matches['pm_dec_gaia'][sel], dtype=float)
bins = np.linspace(-200, 200, 200)
sns.histplot(
    x=x, y=y,
    bins=[bins, bins],
)
plt.axhline(10, color='k', linestyle='--')
plt.axhline(-10, color='k', linestyle='--')
x = np.linspace(-200, 200, 100)
plt.plot(x, x+20, color='k', linestyle='--')
plt.plot(x, x-20, color='k', linestyle='--')
plt.xlabel("AGASC 1p7")
plt.ylabel("AGASC 1p8")
plt.tight_layout()
No description has been provided for this image
In [12]:
sel = (
    (
        (np.abs(agasc_gaia_matches['pm_dec_agasc']) > 0)
        & (np.abs(agasc_gaia_matches['pm_dec_gaia']) < 10)
        & (np.abs(agasc_gaia_matches['pm_dec_gaia'] - agasc_gaia_matches['pm_dec_agasc']) > 20)
    )
    | (
        (np.abs(agasc_gaia_matches['pm_ra_agasc']) > 0)
        & (np.abs(agasc_gaia_matches['pm_ra_gaia']) < 10)
        & (np.abs(agasc_gaia_matches['pm_ra_gaia'] - agasc_gaia_matches['pm_ra_agasc']) > 20)
    )
)
pm_outliers = agasc_gaia_matches[sel]
pm_outliers['d_pm'] = np.sqrt(
    (pm_outliers['pm_ra_gaia'] - pm_outliers['pm_ra_agasc'])**2
    + (pm_outliers['pm_dec_gaia'] - pm_outliers['pm_dec_agasc'])**2
)
pm_outliers['d_pm'].format = "{:.1f}"
pm_outliers['p_value'].format = "{:.3f}"
pm_outliers['d2d'].format = "{:.2f}"
pm_outliers['d_mag'].format = "{:.2f}"
pm_outliers.sort('d_pm', reverse=True)
len(pm_outliers)
Out[12]:
3265
In [13]:
fig, axes = plt.subplot_mosaic(
    [
        ['A', '.'],
        ['B', 'C']
    ],
    # sharex=True, sharey=True,
    figsize=(8, 6),
)
plt.sca(axes['A'])
sns.histplot(x=pm_outliers['d_pm'])
plt.xlabel('')
plt.sca(axes['B'])
sns.histplot(x=pm_outliers['d_pm'], y=pm_outliers['p_value'])
plt.xlabel(r'$\Delta PM$')
plt.sca(axes['C'])
sns.histplot(y=pm_outliers['p_value'])
plt.ylabel('')
plt.suptitle('p-value $\Delta PM$ distribution')
plt.tight_layout()
No description has been provided for this image
In [14]:
fig, axes = plt.subplot_mosaic(
    [
        ['A', '.'],
        ['B', 'C']
    ],
    # sharex=True, sharey=True,
    figsize=(8, 6),
)
plt.sca(axes['A'])
sns.histplot(x=pm_outliers['d2d'])
plt.xlabel('')
plt.yscale('log')
plt.sca(axes['B'])
sns.histplot(x=pm_outliers['d2d'], y=pm_outliers['d_mag'])
plt.xlabel('d2d')
plt.sca(axes['C'])
sns.histplot(y=pm_outliers['d_mag'])
plt.ylabel('')
plt.xscale('log')

plt.suptitle('PM outliers d2d-$\Delta mag$ distribution')
plt.tight_layout()
No description has been provided for this image
In [15]:
agasc_id = pm_outliers['agasc_id'][pm_outliers['d2d'] == np.max(pm_outliers['d2d'])][0]
report = star_report.Report(agasc_id)
report.show_in_notebook()
AGASC 930611512 epoch in AGASC: 2000.0, epoch in summary: 1991.59

AGASC entry

Table length=1
agasc_idradecmagmag_acamag_aca_obspm_rapm_decepochtycho_idgsc2.3
int32float64float64float32float32float32int16int16float32str12str10
930611512101.28444394-37.3990146412.3411.95--31-99992000.07100-39-1S3DP000378

Matches

Table length=1
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
930611512557727895530294284812.3412.32-0.021.900.00120.00611.000-1.9121.83511.97True

Match Candidates

Table length=2
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
930611512557727895530294284812.3412.32-0.021.900.00120.00611.000-1.9121.83511.97True
930611512557727895100778342412.3419.056.7013.850.00000.00000.000-1.3823.80718.98False

AGASC Neighborhood (within 20 arcsec)

Table length=1
agasc_idradecd2dmagmag_acamag_aca_obspm_rapm_decpos_catidpm_catidmag_catid
arcsec
int32float64float64float64float32float32float32int16int16uint8uint8uint8
930611512101.28444394-37.399014640.0512.3411.95--31-9999555
In [16]:
# the largest pm outlier
i = 0
agasc_id = pm_outliers['agasc_id'][i]
report = star_report.Report(agasc_id)
report.show_in_notebook()
AGASC 349707128 epoch in AGASC: 2000.0, epoch in summary: 1991.54

AGASC entry

Table length=1
agasc_idradecmagmag_acamag_aca_obspm_rapm_decepochtycho_idgsc2.3
int32float64float64float32float32float32int16int16float32str12str10
349707128294.9660994836.0158320211.5611.49--278-4032000.02668-879-1N2GJ000523

Matches

Table length=1
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
349707128204822290365525491211.5611.44-0.120.161.91900.69871.0002.449-0.27011.31True

Match Candidates

Table length=12
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
349707128204822290365525491211.5611.44-0.120.161.91900.69871.0002.449-0.27011.31True
349707128204822290365888537611.5617.616.053.240.00000.00000.000-3.582-6.79319.72False
349707128204822269748576678411.5620.959.3917.230.00000.00000.000----20.95False
349707128204822290365562227211.5619.808.246.120.00000.00000.000-2.441-4.73020.28False
349707128204822290365888499211.5619.067.506.430.00000.00000.000-5.891-6.78919.88False
349707128204822290364336883211.5619.257.708.310.00000.00000.000-1.080-0.11020.48False
349707128204822290365525952011.5614.943.389.770.00000.00000.000-3.904-0.02814.49False
349707128204822290365888524811.5613.732.1810.520.00000.00000.000-4.348-6.23813.71False
349707128204822290365526080011.5615.373.8110.900.00000.00000.000-0.690-3.92415.375False
349707128204822290364213849611.5619.057.5010.640.00000.00000.000-3.686-4.80519.3False
349707128204822290364213043211.5619.287.7311.950.00000.00000.000-3.701-5.31219.3False
349707128204822290364214976011.5619.708.1513.660.00000.00000.000-1.597-5.02019.75False

AGASC Neighborhood (within 20 arcsec)

Table length=1
agasc_idradecd2dmagmag_acamag_aca_obspm_rapm_decpos_catidpm_catidmag_catid
arcsec
int32float64float64float64float32float32float32int16int16uint8uint8uint8
349707128294.9660994836.015832020.0311.5611.49--278-403555
In [17]:
generate_reports = True
np.random.seed(349707128)
if generate_reports:
    N = 100
    idx = np.arange(len(pm_outliers))
    sel = np.zeros(len(pm_outliers), dtype=bool)
    sel[:N] = True
    print(f"Making report with {np.count_nonzero(sel)} stars")

    description = """
    <p>This list includes the 100 PM outliers with the largest proper motion differences </p>

    <p> Note that the stars with the largest proper motion difference are not the ones with the worst p-value. </p>

    <p>
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>
    """

    reports_dir = REPORTS_DIR / "pm-outliers/large-pm-diff"
    star_report.make_report_list(
        data=pm_outliers[['agasc_id', 'p_value', 'd2d', 'd_mag', 'd_pm']][sel],
        path=reports_dir,
        title='Proper Motion Outliers',
        description=description,
        overwrite=True
    )
Making report with 100 stars
100it [04:04,  2.45s/it]
In [18]:
if generate_reports:
    N = 100
    idx = np.arange(len(pm_outliers))
    sel = np.zeros(len(pm_outliers), dtype=bool)
    sel[np.argsort(pm_outliers['p_value'])[:N]] = True
    print(f"Making report with {np.count_nonzero(sel)} stars")

    description = """
    <p>This list includes the 100 PM outliers with the worst p-value </p>

    <p> Note that the stars with the largest proper motion difference are not the ones with the worst p-value. </p>

    <p>
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>
    """

    reports_dir = REPORTS_DIR / "pm-outliers" / "worst-p-value"
    star_report.make_report_list(
        data=pm_outliers[['agasc_id', 'p_value', 'd2d', 'd_mag', 'd_pm']][sel],
        path=reports_dir,
        title='Proper Motion Outliers',
        description=description,
        overwrite=True
    )
Making report with 100 stars
100it [03:58,  2.38s/it]
In [19]:
if generate_reports:
    idx = np.arange(len(pm_outliers))
    sel = np.zeros(len(pm_outliers), dtype=bool)
    sel[np.random.choice(idx[~sel], replace=False, size=len(pm_outliers)//40)] = True
    print(f"Making report with {np.count_nonzero(sel)} stars")

    description = """
    <p>This list includes 1/40th of the proper motion outliers randomly selected. </p>

    <p> Note that the stars with the largest proper motion difference are not the ones with the worst p-value. </p>

    <p>
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>
    """

    reports_dir = REPORTS_DIR / "pm-outliers" / "random"
    star_report.make_report_list(
        data=pm_outliers[['agasc_id', 'p_value', 'd2d', 'd_mag', 'd_pm']][sel],
        path=reports_dir,
        title='Proper Motion Outliers',
        description=description,
        overwrite=True
    )
Making report with 81 stars
81it [02:59,  2.22s/it]

Catalog Magnitude Outliers¶

These are stars with large differences between AGASC 1.7 and AGASC 1.8 magnitude.

In [20]:
# the astype is needed or I get an endianness error
agasc_gaia_matches["mag_aca"] = agasc_gaia_matches["mag_aca"].astype(np.float64)
In [21]:
sns.histplot(
    x=agasc_gaia_matches["mag_aca"],
    y=agasc_gaia_matches["mag_aca_pred"] - agasc_gaia_matches["mag_aca"],
    bins=(np.linspace(5, 15, 101), np.linspace(-2, 2, 101)),
    cbar_kws=dict(label='Number of stars'),
)
Out[21]:
<Axes: xlabel='mag_aca', ylabel='mag_aca_pred'>
No description has been provided for this image
In [22]:
fig, axes = plt.subplot_mosaic(
    [
        ['.', 'all', 'all', '.'],
        ['bright', 'bright', 'faint', 'faint'],
    ],
    # sharex=True, sharey=True,
    figsize=(12, 8),
)
plt.sca(axes['all'])
plt.hist(
    agasc_gaia_matches["mag_aca_pred"] - agasc_gaia_matches["mag_aca"],
    bins=np.linspace(-6, 6, 100),
    histtype="step",
)
plt.yscale('log')
plt.xlabel('Predicted - Catalog ACA Magnitude')
plt.title("all")

plt.sca(axes['bright'])
sel = (agasc_gaia_matches["mag_aca"] < 8.5)
plt.hist(
    agasc_gaia_matches["mag_aca_pred"][sel] - agasc_gaia_matches["mag_aca"][sel],
    bins=np.linspace(-6, 6, 100),
    histtype="step",
)
plt.yscale('log')
plt.xlabel('Predicted - Catalog ACA Magnitude')
plt.title('mag_aca < 8.5')

plt.sca(axes['faint'])
sel = (agasc_gaia_matches["mag_aca"] > 9.5) & (agasc_gaia_matches["mag_aca"] < 10.3)
plt.hist(
    agasc_gaia_matches["mag_aca_pred"][sel] - agasc_gaia_matches["mag_aca"][sel],
    bins=np.linspace(-6, 6, 100),
    histtype="step",
)
plt.yscale('log')
plt.title('mag_aca > 9.5')
plt.xlabel('Predicted - Catalog ACA Magnitude')

plt.suptitle('Predicted - Catalog ACA Magnitude')

plt.tight_layout()
No description has been provided for this image
In [23]:
sel = (
    (agasc_gaia_matches["mag_aca"] < 10.3)
    & (np.abs(agasc_gaia_matches["mag_aca_pred"] - agasc_gaia_matches["mag_aca"]) > 2.8)
)
mag_outliers = agasc_gaia_matches[sel]

mag_outliers['d_mag_aca'] = mag_outliers["mag_aca_pred"] - mag_outliers["mag_aca"]
mag_outliers['d_mag_aca'].format = "{:.2f}"
mag_outliers['d_mag'].format = "{:.2f}"
mag_outliers['mag_aca'].format = "{:.2f}"
mag_outliers['mag_aca_pred'].format = "{:.2f}"
mag_outliers['mag_aca_obs'].format = "{:.2f}"
mag_outliers['d2d'].format = "{:.3f}"
mag_outliers['p_value'].format = "{:.4f}"

mag_outliers['abs_d_mag_aca'] = np.abs(mag_outliers['d_mag_aca'])


mag_outliers.sort('mag_aca')
# mag_outliers.sort('abs_d_mag_aca', reverse=True)
In [24]:
np.random.seed(1174545944)
if generate_reports:
    print(f"Making report with {len(mag_outliers)} stars")

    description = """
    <p>This list includes the stars with abs(mag_aca_1p8 - mag_aca_1p7) > 2.8 </p>

    <p>
    Note that there are several stars with observed mag_aca. In all cases, the observed mag_aca
    is closer to the 1p8 one.
    </p>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>
      <li>
        <a href="report_879886528.html"> 879886528 </a>. An example of duplicate
        record in AGASC that just happens to be in this dataset.
      </li>
      <li>
        <a href="report_796801904.html"> 796801904 </a>. An example of duplicate
        record in AGASC that just happens to be in this dataset.
      </li>
      <li>
        <a href="report_412493456.html"> 412493456 </a>. A tycho2 pair of stars
        with the same position in Tycho2, but resolved in Gaia. This is the brighter of the two.
      </li>
      <li>
        <a href="report_298718560.html"> 298718560 </a>. One star in AGASC that is
        actually two stars 2 arcsec apart in Gaia. The AGASC star is cross-matched with the brighter
        one, but strictly speaking it should be a blend of the two or a new record is needed.
      </li>
      <li>
        <a href="report_549978408.html"> 549978408 </a>. An example of duplicate
        record in AGASC that just happens to be in this dataset.
      </li>

    </ul>

    <h2> Stars </h2>
    """

    reports_dir = REPORTS_DIR / "mag_aca-outliers"
    star_report.make_report_list(
        data=mag_outliers[['agasc_id', 'p_value', 'd2d', 'd_mag', 'd_mag_aca', 'mag_aca', 'mag_aca_pred', 'mag_aca_obs']],
        path=reports_dir,
        title='Catalog Magnitude Outliers',
        description=description,
        overwrite=True
    )
Making report with 125 stars
125it [05:06,  2.45s/it]
In [25]:
agasc_gaia_matches[(agasc_gaia_matches["mag_aca_pred"] - agasc_gaia_matches["mag_aca"]) > 5]
Out[25]:
Table length=1611
agasc_idra_agascdec_agascra_2016dec_2016pm_ra_agascpm_dec_agascepoch_agascmagmag_catidmag_bandtycho_idgaia_idra_gaiadec_gaiara_errordec_errorpm_ra_gaiapm_dec_gaiaepoch_gaiag_magbp_magrp_magg_mag_errorbp_mag_errorrp_mag_errorrange_mag_g_fovstd_dev_mag_g_fovmad_mag_g_fovphot_variable_flagphot_proc_modenon_single_starhas_magave_gaia_magd2dd2d_2016mag_predd_magp_match_gaia_pmp_match_agasc_pmuse_agasc_pmp_matchlog_p_matchbest_matchp_valuelog_p_valuep_relativelog_p_relativeidxmag_acamag_aca_errmag_aca_obsmag_aca_err_obsrandom_indexguideacqmag_aca_predmag_aca_pred_err
int64float64float64float64float64int16int16float32float32uint8int16bytes12int64float64float64float64float64float16float16float16float16float16float16float16float16float16float32float32float16boolint16int16int16float16float16float16float16float16float32float32boolfloat32float16boolfloat32float16float32float16int64float64int16float32float32int64boolboolfloat64float64
1333040.890482.137660.890482.13766001982.015.4710--27390488425074528000.89049374336060262.137638028874491.0331773757934570.9319427013397217----2016.019.3917.5516.050.0079350.0093460.004116------False00717.660.093260.0932618.052.5720.00253242020.0025324202False0.00253242022.596True0.0097854072.011.0-0.038314.31600093841552754----8923154FalseFalse19.4368042248039250.025585260300380372
2694884.581661.768174.581661.76817001982.015.0110--25479553708675505924.5816742962768181.76816658605757242.02217602729797361.3425837755203247----2016.019.8117.5616.080.009270.0154340.003736------False00717.810.05290.052918.13.0860.00155351320.0015535132False0.00155351322.809True0.0071062452.1481.0-0.0247913.85600090026855552----10772157FalseFalse19.8617058260581060.025585260300380372
2701284.142132.325684.142132.32568001983.014.4511--25482391823062868484.1420671437733592.3256608232094431.27230107784271240.8178788423538208----2016.019.4417.0315.630.0076520.015840.003914------False00717.380.23650.236517.332.8850.00126348540.0012634854False0.00126348542.898True0.00624180722.2051.0-0.0256113.90499973297119150----9624791FalseFalse19.5036986383994170.025585260300380372
3983045.318111.979255.318111.97925001983.014.4211--25473117098883436805.3181268872487831.97924204906509750.83303773403167720.5575965642929077----2016.019.3117.015.60.0068440.0149460.003115------False00717.30.067140.0671417.272.8540.00192838510.0019283851False0.00192838512.715True0.0081142622.091.0-0.0362013.87551----8220216FalseFalse19.3786986383994170.025585260300380372
5262482.218253.193582.218253.19358001983.014.4211--27407320230094245122.21819851528134933.1935780595672731.49103188514709471.236511468887329----2016.019.617.1415.7660.0088040.016370.00336------False00717.50.18520.185217.453.0350.00128272480.0012827248False0.00128272482.893True0.00625537852.2031.0-0.0445713.87553----1939182FalseFalse19.664684802752360.025585260300380372
7864807.482463.354127.482463.35412001983.014.3811--25538481003566894087.4824757282967153.35415143566137931.95358538627624511.158104658126831----2016.019.5217.1715.690.012090.014640.00635------False00717.450.12650.126517.443.0640.00143249530.0014324953False0.00143249532.844True0.0067203572.1721.0-0.0766813.83500003814697353----2780191FalseFalse19.5648308260581060.025585260300380372
7944966.328012.605686.328012.60568001982.014.810--25477426840877921286.32802711455770252.60570472865068541.56396281719207760.805314302444458----2016.019.3617.2715.8050.0073050.011480.003582------False00717.470.10820.108217.83.0020.00157302150.0015730215False0.00157302152.803True0.0071399392.1461.0-0.0881113.64600086212158254----9012161FalseFalse19.4131570858199820.025585260300380372
7962165.158354.556065.158354.55606001983.014.9911--27411602140710442245.1583582919930444.5560562827292213.63314199447631841.7934240102767944----2016.019.8617.3315.8750.00790.015870.00418------False00717.690.032620.0326217.662.6640.00242217750.0024221775False0.00242217752.615True0.0095726032.021.0-0.0907814.44499969482421946----13445884FalseFalse19.9146921921649420.025585260300380372
7965446.534063.92486.534063.9248001983.015.211--25487362990017328646.5340349090546153.92484072447366656.7350215911865233.3833651542663574----2016.020.1717.5516.120.012120.018160.005554------False00717.940.17210.172117.892.6930.00188426780.0018842678False0.00188426782.725True0.0080525632.0941.0-0.0908714.65499973297119151----18126124FalseFalse20.23338108915250.025585260300380372
7971446.15784.531756.15784.53175001983.015.411--25549729694714026246.157764377727954.53173184345946643.32120823860168462.421332359313965----2016.020.518.2216.620.013980.04250.04462------False00718.450.14360.143618.453.0550.00139641620.0013964162False0.00139641622.855True0.0065839492.1821.0-0.0910814.85499954223632850----6844910FalseFalse20.528426618961110.025585260300380372
..............................................................................................................................................................................
895354584265.46998-24.70229265.46998-24.70229001987.012.6116--4068200586367730944265.4699955669825-24.7023342814181750.123906299471855160.09086712449789047-1.082-4.5472016.017.5215.4914.390.0045050.02330.0099------False00715.8050.08680.167415.653.040.00156667540.0013402438False0.00156667542.805True0.0071062452.1480.99998247.6e-062704165112.02999973297119128----13006272FalseFalse17.6450374223199730.025585260300380372
898642872274.42264-29.13874274.42264-29.13874001987.012.7116--4049788611227556864274.4226541309113-29.1388646142114030.52079951763153080.4581608772277832----2016.017.0513.413.150.013150.011080.007477------False00714.530.4510.45114.051.3390.00401449340.0040144934False0.00401449342.396True0.0135159461.8690.999609770.00016952908560912.13000011444091840----7684488FalseFalse17.396432532598580.025585260300380372
899554776281.17974-25.35941281.17974-25.35941001987.013.3416--4073399588285893888281.1797922846329-25.359419632977920.12662805616855620.11990871280431747-3.402-7.7662016.017.4815.2915.030.0104750.016080.005238------False00715.940.32930.173615.5162.1780.00194284180.0034959507False0.00194284182.71True0.0081558352.0880.99932850.00029182953349412.76000022888183619----6870744FalseFalse17.8316511883733640.025585260300380372
1093934200261.95326-46.92814261.95326-46.92814001987.012.8416--5951803883085737216261.953247423593-46.928199103149820.77683234214782710.5084171891212463----2016.017.0213.5513.770.013850.012890.006016------False00714.780.2150.21514.1641.3240.0130122450.013012245False0.0130122451.886True0.0279146471.5540.99724030.00123834709912.26000022888183641----11417706FalseFalse17.5135079853412670.025585260300380372
1124871688135.30385-52.96399135.30385-52.96399001977.013.7610--5323472192280364672135.30324978626697-52.963785852419540.11817181110382080.1138806864619255-2.1722.3632016.018.114.4514.9140.002930.13850.774------False10715.821.3761.49513.90.13650.00434082140.0027803017False0.00434082142.363True0.0141389671.850.88905040.05113893072012.60600090026855554----15931639FalseFalse18.671194306206980.025585260300380372
1143735664256.56151-52.50011256.56151-52.50011001987.012.5616--5924318978646258304256.56150138684734-52.500161184986380.179035797715187070.15503396093845367-5.71-6.2932016.016.9414.1214.130.0086060.014090.005505------False00715.0550.14690.185214.551.9860.00487449860.004423809False0.00487449862.312True0.0154542331.8110.999948862.223e-054224278211.98000049591064540----9712869FalseFalse17.36750655239120.025585260300380372
116523044880.59997-66.0842780.59997-66.08427001975.013.4510--466054456997532467280.60002386779149-66.08441241143110.333610296249389650.54623258113861081.982-0.36042016.017.0614.9315.030.0033040.003950.0007353------False10715.670.49780.518614.641.18750.0041519730.0036087306False0.0041519732.38True0.01379284451.860.914037350.039034265038812.29600048065185554----6766439FalseFalse17.5197334810730220.025585260300380372
1173755360152.55394-67.00502152.55394-67.00502001987.012.3416--5244972387821935104152.55396855330667-67.005018830104081.7270860671997072.1844708919525146----2016.018.2815.3213.6950.020320.022930.00853------False00715.7660.040370.0403715.693.3460.00123631170.0012363117False0.00123631172.908True0.00610759062.2151.00.04293854411.76000022888183647----2700905FalseFalse18.3039141433311130.025585260300380372
1183201912238.04446-62.51303238.04446-62.51303001988.012.3616--5832448803513250048238.04445937082093-62.513114787937880.192618325352668760.17968599498271942-5.9-4.032016.017.1914.33614.5860.005780.025180.0217------False00715.370.25270.305214.772.4140.0020047540.0016109607False0.0020047542.697True0.0083681632.0780.95645280.019334562846611.77999973297119140----1921302FalseFalse17.693142856149280.025585260300380372
1211240168197.19732-67.64373197.19732-67.64373001987.010.5316--5857187746464509696197.19711332414371-67.643761905727050.82549393177032470.7541971802711487----2016.017.2211.8811.870.020860.024840.01767------False00713.660.30540.305413.02.470.00150380830.0015038083False0.00150380832.822True0.00690910732.161.00.0464774089.94999980926513743----7019467TrueTrue17.639001215718310.025585260300380372
In [26]:
agasc_id = 549462880
agasc_gaia_matches[agasc_gaia_matches["agasc_id"] == agasc_id][
    'mag_aca_pred', 'mag_aca', 'mag_aca_pred_err', 'mag_aca_err', 'mag_aca_obs', 'mag_aca_err_obs', 'g_mag', 'bp_mag', 'rp_mag'
].pprint(max_width=-1)
report = star_report.Report(agasc_id)
report.show_in_notebook()
   mag_aca_pred        mag_aca         mag_aca_pred_err   mag_aca_err mag_aca_obs mag_aca_err_obs g_mag bp_mag rp_mag
----------------- ------------------ -------------------- ----------- ----------- --------------- ----- ------ ------
20.13661647019506 14.846000671386719 0.025585260300380372          63          --              -- 20.06  17.08  15.72

AGASC entry

Table length=1
agasc_idradecmagmag_acamag_aca_obspm_rapm_decepochtycho_idgsc2.3
int32float64float64float32float32float32int16int16float32str12str10
549462880242.7929263.9548516.0014.85---9999-99991983.0--N4M8000079

Matches

Table length=1
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
549462880162962769438772313616.0017.701.700.060.00880.02251.000----20.06True

Match Candidates

Table length=1
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
549462880162962769438772313616.0017.701.700.060.00880.02251.000----20.06True

AGASC Neighborhood (within 20 arcsec)

Table length=1
agasc_idradecd2dmagmag_acamag_aca_obspm_rapm_decpos_catidpm_catidmag_catid
arcsec
int32float64float64float64float32float32float32int16int16uint8uint8uint8
549462880242.7929263.954850.0016.0014.85---9999-9999601

Observed Magnitude Outliers¶

In [158]:
observed = agasc_gaia_matches[~agasc_gaia_matches["mag_aca_obs"].mask]

gaia_model = gm.get_gaia_model()
observed["mag_aca_pred"] = gaia_model.predict(observed.to_pandas())
observed["d_mag_aca"] = observed["mag_aca_obs"] - observed["mag_aca"]
observed.sort("d_mag_aca", reverse=True)

observed_outliers = observed[np.abs(observed["mag_aca_obs"] - observed["mag_aca_pred"]) > 1.]
#observed_outliers[["agasc_id", "mag_aca_obs", "mag_aca", "d_mag_aca"]].pprint(max_lines=-1)
In [160]:
plt.hist(
    observed["mag_aca_obs"] - observed["mag_aca_pred"],
    histtype="step",
    bins=np.linspace(-5, 5, 301),
)
plt.yscale("log")
# plt.xlim(-2, 2)
No description has been provided for this image
In [161]:
with open("mag_aca_obs_outliers.txt", "w") as fh:
    for agasc_id in observed_outliers["agasc_id"]:
        fh.write(f"{agasc_id}\n")

proc = subprocess.run(
    [
        "agasc-update-magnitudes",
        "--report",
        "--agasc-id-file=mag_aca_obs_outliers.txt",
        f"--output-dir={REPORTS_DIR / 'mag_aca_obs-outliers' / 'supplement_files'}",
        f"--reports-dir={REPORTS_DIR / 'mag_aca_obs-outliers'}",
        "--log-level=info"
    ],
    capture_output=True
)
In [162]:
reports_dir = REPORTS_DIR / 'mag_aca_obs-outliers'
if (reports_dir / "latest").exists():
    name = (reports_dir / "latest").resolve()
    name.rename(name.parent / "supplement_reports")
    (reports_dir / "latest").unlink()
In [164]:
np.random.seed(549462880)
if generate_reports:
    print(f"Making report with {len(observed_outliers)} stars")

    description = """
    <p>This list includes the stars with abs(mag_aca_1p8 - mag_aca_obs) > 1 </p>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>
      <li></li>

    </ul>

    <h2> Stars </h2>
    """

    reports_dir = REPORTS_DIR / "mag_aca_obs-outliers" / "agasc_gaia_reports"
    star_report.make_report_list(
        data=observed_outliers[['agasc_id', 'p_value', 'd2d', 'd_mag', 'd_mag_aca', 'mag_aca', 'mag_aca_pred', 'mag_aca_obs']],
        path=reports_dir,
        title='Catalog Magnitude Outliers',
        description=description,
        overwrite=True
    )
Making report with 76 stars
76it [00:16,  4.69it/s]

"Difficult" Stars¶

There is a very important point to make

"Difficult" stars are grouped with all the stars it could be confused with. The matches in each group are recomputed to guarantee there are no two repeated AGASC or Gaia IDs. The process starts by selecting the match with the highest probability, removing the corresponding AGASC and Gaia IDs from the candidate matches, and repeating until there are no candidate matches left.

For each of these groups, we defined "latest_pos_cat" as the POS_CATID value with the highest precedence from the AGASC entries in the group. The precedence is (in decreasing order) [5, 6, 4, 3, 2, 1]. The catalog precedence IS NOT considered when recomputing the matches.

The entries with POS_CATID different than latest_pos_cat can be considered duplicates, but this is not guaranteed to be the case.

For example, AGASC 102499594 and 102499593 are two stars in Tycho2 that form a binary system (?). AGASC 102499594 is a star in GSC2.3 that lies right in between, and has a magnitude consistent with it being a blend of the other two. Based on catalog precedence, 102499594 appears to be a duplicate. Gaia IDs 3151414218873077760 and 3151414218874713600 are two resolved stars in Gaia that are matched to AGASC 102499594 and 102499593 respectively. AGASC 102499594 is matched to the nearest star, which is 13 arcsec away, and this match is discarded as background based on p-value.

One can ask the question of whether there are any possible duplicates that ended up being the best match. The answer is no:

In [30]:
agasc_difficult[
    (agasc_difficult['pos_catid'] != agasc_difficult['latest_pos_cat'])
    & (agasc_difficult['best_match'])
]
Out[30]:
Table length=0
agasc_idgaia_idbest_matchd2dd_magp_matchp_valuep_relativeidxmagmag_bandmag_catidpos_catidbest_match_0latest_pos_catbest_match_1best_match_2groupgroup_size
int64int64boolfloat16float16float32float32float32int64float32int16uint8uint8boolint64boolboolint64int64
In [31]:
# AGASC_GAIA_X_MATCH_ALL = star_report.AGASC_GAIA_X_MATCH_ALL
# SUMMARY = star_report.SUMMARY
# import importlib
# importlib.reload(star_report)
# star_report.AGASC_GAIA_X_MATCH_ALL = AGASC_GAIA_X_MATCH_ALL
# star_report.SUMMARY = SUMMARY
In [168]:
report = star_report.Report([
    181023536,
    181023537,
    181023560,
    181023552,
    181023544,
    181023568
])
report.show_in_notebook()
report.save_html(REPORTS_DIR / "difficult")
AGASC 181023536 epoch in AGASC: 2000.0, epoch in summary: 1991.74
AGASC 181023537 epoch in AGASC: 2000.0, epoch in summary: 1991.74
AGASC 181023560 epoch in AGASC: 2000.0, epoch in summary: 1991.87

AGASC entry

Table length=6
agasc_idradecmagmag_acamag_aca_obspm_rapm_decepochtycho_idgsc2.3
int32float64float64float32float32float32int16int16float32str12str10
181023544123.0535517.6485.594.89--74-1192000.0--N908000475
181023537123.0531904917.647749426.246.23--80-1292000.01381-1638-20.0
181023536123.0531904917.647749425.305.36--80-1292000.01381-1638-1N908000476
181023552123.0531217.647725.194.48---9999-99992000.0--N908000477
181023560123.0549392317.648235675.855.87--86-912000.01381-1641-1N908000478
181023568123.0548317.647426.305.59---9999-99992000.0--N908000479

Matches

Table length=3
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
1810235366572445215935093765.305.560.263.740.23410.17960.949----5.867True
1810235606572445215935089925.856.330.481.530.00030.00261.00037.000-150.8755.934True
1810235376572445860154854406.245.51-0.731.380.00020.00180.175118.312-148.3755.52True

Match Candidates

Table length=18
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
1810235526572445860154854405.195.330.140.490.33380.22491.000118.312-148.3755.52False
1810235366572445215935093765.305.560.263.740.23410.17960.949----5.867True
1810235366572445860154854405.305.510.210.980.01260.02750.051118.312-148.3755.52False
1810235446572445215935093765.595.38-0.212.620.01000.02410.984----5.867False
1810235376572445215935093766.245.56-0.692.840.00100.00510.825----5.867False
1810235606572445215935089925.856.330.481.530.00030.00261.00037.000-150.8755.934True
1810235376572445860154854406.245.51-0.731.380.00020.00180.175118.312-148.3755.52True
1810235526572445215935093765.195.380.192.430.00020.00150.000----5.867False
1810235446572445860154854405.595.33-0.262.280.00020.00150.016118.312-148.3755.52False
1810235686572445215935089926.306.13-0.173.900.00000.00010.98137.000-150.8755.934False
1810235686572445215935093766.305.38-0.924.050.00000.00000.019----5.867False
1810235606572445215935093765.855.56-0.295.570.00000.00000.000----5.867False
1810235446572445215935089925.596.130.545.270.00000.00000.00037.000-150.8755.934False
1810235376572445215935089926.246.330.087.120.00000.00000.00037.000-150.8755.934False
1810235686572445860154854406.305.33-0.976.320.00000.00000.000118.312-148.3755.52False
1810235606572445860154854405.855.51-0.347.110.00000.00000.000118.312-148.3755.52False
1810235526572445215935089925.196.130.947.020.00000.00000.00037.000-150.8755.934False
1810235366572445215935089925.306.331.037.140.00000.00000.00037.000-150.8755.934False

AGASC Neighborhood (within 20 arcsec)

Table length=6
agasc_idradecd2dmagmag_acamag_aca_obspm_rapm_decpos_catidpm_catidmag_catid
arcsec
int32float64float64float64float32float32float32int16int16uint8uint8uint8
181023544123.0535517.6481.175.594.89--74-119221
181023537123.0531904917.647749422.146.246.23--80-129555
181023536123.0531904917.647749422.145.305.36--80-129555
181023552123.0531217.647722.185.194.48---9999-9999601
181023560123.0549392317.648235674.115.855.87--86-91555
181023568123.0548317.647424.226.305.59---9999-9999601
In [33]:
report = star_report.Report([
    120468440, 120468441, 120468448, 120468456, 120468464
])
report.show_in_notebook()
AGASC 120468440 epoch in AGASC: 2000.0, epoch in summary: 1991.59
AGASC 120468441 epoch in AGASC: 2000.0, epoch in summary: 1991.55

AGASC entry

Table length=5
agasc_idradecmagmag_acamag_aca_obspm_rapm_decepochtycho_idgsc2.3
int32float64float64float32float32float32int16int16float32str12str10
120468456226.88689.22537.406.67---20572000.0--N5RX000015
120468441226.886704369.225051697.547.48---189252000.0919-1659-20.0
120468448226.886469.225426.806.09---9999-99992000.0--N5RX000014
120468464226.886589.224947.506.79---9999-99992000.0--N5RX000016
120468440226.887287039.226049917.357.32---190292000.0919-1659-1N5RX000013

Matches

Table length=2
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
12046844011677180491946192647.357.550.200.031.40490.56711.000-194.87522.9227.188True
12046844111677180491946191367.547.740.200.021.35480.55341.000-190.50030.3917.38True

Match Candidates

Table length=10
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
12046844011677180491946192647.357.550.200.031.40490.56711.000-194.87522.9227.188True
12046844111677180491946191367.547.740.200.021.35480.55341.000-190.50030.3917.38True
12046846411677180491946191367.507.520.020.580.29700.20971.000-190.50030.3917.38False
12046845611677180491946191367.407.520.120.940.02500.04330.997-190.50030.3917.38False
12046844811677180491946191366.807.520.721.500.00010.00130.989-190.50030.3917.38False
12046845611677180491946192647.407.33-0.073.130.00010.00080.003-194.87522.9227.188False
12046844111677180491946192647.547.550.014.140.00000.00010.000-194.87522.9227.188False
12046844811677180491946192646.807.330.533.640.00000.00000.011-194.87522.9227.188False
12046844011677180491946191367.357.740.394.120.00000.00000.000-190.50030.3917.38False
12046846411677180491946192647.507.33-0.174.640.00000.00000.000-194.87522.9227.188False

AGASC Neighborhood (within 20 arcsec)

Table length=5
agasc_idradecd2dmagmag_acamag_aca_obspm_rapm_decpos_catidpm_catidmag_catid
arcsec
int32float64float64float64float32float32float32int16int16uint8uint8uint8
120468456226.88689.22530.227.406.67---2057221
120468441226.886704369.225051691.097.547.48---18925555
120468448226.886469.225421.746.806.09---9999-9999601
120468464226.886589.224941.897.506.79---9999-9999601
120468440226.887287039.226049913.137.357.32---19029555
In [34]:
report = star_report.Report([102499592, 102499593, 102499594])
report.show_in_notebook()
AGASC 102499593 epoch in AGASC: 2000.0, epoch in summary: 1991.90
AGASC 102499594 epoch in AGASC: 2000.0, epoch in summary: 1991.90

AGASC entry

Table length=3
agasc_idradecmagmag_acamag_aca_obspm_rapm_decepochtycho_idgsc2.3
int32float64float64float32float32float32int16int16float32str12str10
102499592116.6822688511.0978971810.5710.72---9999-99991991.25--N88Z000037
102499593116.6823091711.0978773311.0811.22---9999-99992000.0782-161-20.0
102499594116.6823091711.0978773311.0911.41---9999-99992000.0782-161-30.0

Matches

Table length=3
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
102499593315141421887307776011.0811.06-0.020.093.00410.95321.000-0.8560.58611.09True
102499594315141421887471360011.0910.87-0.220.041.29620.53880.999-0.7810.51210.94True
102499592315141421457857728010.5717.817.2513.020.00000.00000.0003.0313.06417.69True

Match Candidates

Table length=10
agasc_idgaia_idmag_1p7mag_predd_magd2dp_matchp_valuep_relativepm_rapm_decg_magbaseline
int64int64float32float16float16float16float32float32float32float16float16float16bool
102499593315141421887307776011.0811.06-0.020.093.00410.95321.000-0.8560.58611.09True
102499594315141421887471360011.0910.87-0.220.041.29620.53880.999-0.7810.51210.94True
102499592315141421887471360010.5710.870.300.950.00840.02210.743-0.7810.51210.94False
102499592315141421887307776010.5711.060.490.950.00290.01070.257-0.8560.58611.09False
102499594315141421887307776011.0911.06-0.031.920.00120.00580.001-0.8560.58611.09False
102499593315141421887471360011.0810.87-0.211.970.00040.00300.000-0.7810.51210.94False
102499594315141421457857728011.0917.816.7312.280.00000.00000.0003.0313.06417.69False
102499592315141421457857728010.5717.817.2513.020.00000.00000.0003.0313.06417.69True
102499593315141421457857728011.0817.816.7413.850.00000.00000.0003.0313.06417.69False
102499593315141428330378764811.0818.086.9914.050.00000.00000.000-1.754-7.25817.94False

AGASC Neighborhood (within 20 arcsec)

Table length=3
agasc_idradecd2dmagmag_acamag_aca_obspm_rapm_decpos_catidpm_catidmag_catid
arcsec
int32float64float64float64float32float32float32int16int16uint8uint8uint8
102499592116.6822688511.097897180.0210.5710.72---9999-9999505
102499593116.6823091711.097877330.1811.0811.22---9999-9999505
102499594116.6823091711.097877330.1811.0911.41---9999-9999505
In [35]:
np.random.seed(120468440)

diff_1 = join(agasc_difficult, agasc_gaia_matches, keys=['agasc_id', 'gaia_id'], join_type='left', table_names=['difficult', 'base'])
diff_1.rename_columns(
    [col for col in diff_1.colnames if col[-5:] == '_base'],
    [col[:-5] for col in diff_1.colnames if col[-5:] == '_base'],
)
diff_1['best_match'][diff_1['best_match'].mask] = False

diff_1['d_mag'].format = "{:.2f}"
diff_1['mag_aca'].format = "{:.2f}"
diff_1['mag_aca_pred'].format = "{:.2f}"
diff_1['mag_aca_obs'].format = "{:.2f}"
diff_1['d2d'].format = "{:.3f}"
diff_1['p_value'].format = "{:.4f}"

diff_1.sort(['group', 'mag_aca'])
diff_1 = diff_1.group_by('group')
In [36]:
hand_picked_ids = np.asarray(diff_1['group'][np.in1d(diff_1['agasc_id'], [11931168, 980817608, 5253584, 77996162, 23877032, 102499593, 102499592])])
group_idx = np.unique(np.concatenate([
    np.random.choice(np.unique(diff_1['group']), size=50),
    np.asarray(np.unique(diff_1['group'][diff_1['group_size'] >= 3])[:50]),
    hand_picked_ids
]))
diff = diff_1.groups[group_idx]
In [169]:
generate_reports = True
if generate_reports:
    print(f"Making report with {len(diff.groups)} groups of stars")

    description = """
    <p>This list includes "difficult" stars. </p>

    <p>
    Difficult stars are the ones that had matching collisions. In other words, the same Gaia star
    was matched to more than one AGASC star. Difficult stars are divided in equivalence classes,
    based on which stars it collided. Often these stars are duplicates in AGASC, but not always.
    </p>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>
      <li>
        <a href="report_181023536_181023537_181023560_181023552_181023544_181023568.html"> 181023536 et al </a>.
        The most difficult one. Six AGASC stars and three Gaia stars. Some AGASC stars might be duplicates.
        Not all Gaia stars have proper motion.
      </li>
      <li>
        <a href="report_11931168.html"> 11931168 </a>. Two stars in AGASC matched to two in Gaia, but
        they are difficult. The table values are masked. NEED TO CHECK.
      </li>
      <li>
        <a href="report_980817608.html"> 980817608 </a>. Three stars in AGASC, but only one in Gaia.
        Two of the AGASC stars are from GSC2.3 and are separated by 6 arcsec. The other star is a
        Tycho2 star right in between. The Gaia star matches the Tycho2 star. Sounds like a spurious
        binary system identification in GSC2.3, which should correspond to the single Tycho star.
      </li>
      <li>
        <a href="report_5253584.html"> 5253584 </a>. Three stars in AGASC matched to one in Gaia.
        Two of the three are Tycho2 stars at the exact same location. The third is an older catalog.
      </li>
      <li>
        <a href="report_77996162.html"> 77996162 </a>. Four stars in AGASC matched to two in Gaia.
      </li>
      <li>
        <a href="report_23877032.html"> 23877032 </a>. Three stars in AGASC matched to one in Gaia.
        Two are duplicates.
      </li>
      <li>
        <a href="report_102499593.html"> 102499593 </a>. Two stars in AGASC matched to three in Gaia.
        
      </li>

      [102499592, 102499593, 102499594]. One star is a duplicate. Fortunately, this is the one that
      is matched to a third Gaia star, and is removed based on p-value. It could have happened that
      the duplicate gets a better p-value than one of the other two, so one of the other two is not
      updated, and the duplicate is updated instead. Duplicates must be removed from the match candidates.
    </ul>

    <h2> Stars </h2>
    """

    reports_dir = REPORTS_DIR / "difficult"
    star_report.make_report_list_by_group(
        data=diff.groups[:][['agasc_id', 'best_match', 'group_size', 'group']],
        path=reports_dir,
        title='Difficult Stars',
        description=description,
        overwrite=True
    )
Making report with 102 groups of stars
102it [00:24,  4.09it/s]

Candidates with low p-value¶

In [50]:
bins = np.linspace(0, 1, 101)
sns.histplot(
    agasc_gaia_matches['p_value'],
    stat="density",
    bins=bins,
    label="all",
)
sns.histplot(
    agasc_gaia_matches['p_value'][agasc_gaia_matches['guide'] | agasc_gaia_matches['acq']],
    stat="density",
    bins=bins,
    label="candidates",
)
plt.legend()
plt.xlabel('p-value')
plt.title("p-value distribution (candidates)")
plt.yscale('log')
No description has been provided for this image
In [ ]:
n_cand = np.count_nonzero((agasc_gaia_matches['guide'] | agasc_gaia_matches['acq']))
n_cand_marginal = np.count_nonzero((agasc_gaia_matches['p_value'] < 0.02) & (agasc_gaia_matches['guide'] | agasc_gaia_matches['acq']))

print(f"{n_cand_marginal} marginal candidates out of {n_cand}")
Out[ ]:
443208
In [61]:
candidates = agasc_gaia_matches[
    (agasc_gaia_matches['p_value'] < 0.022) & (agasc_gaia_matches['guide'] | agasc_gaia_matches['acq'])
]
candidates.sort('p_value')
candidates = candidates[:100]

np.random.seed(120468464)
generate_reports = True
if generate_reports:
    print(f"Making report with {len(candidates)} stars")

    description = """
    <p>This list includes stars that are guide or acq candidates with low p-value. </p>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>
      <li>
        <a href="report_303961568.html"> 303961568 </a>. Gaia star is missing from plot. NEED TO CHECK.
      </li>
    </ul>

    <h2> Stars </h2>
    """

    reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/candidates-p-value-0.01-0.022"
    star_report.make_report_list(
        data=candidates[['agasc_id', 'p_value', 'd2d', 'd_mag', 'mag_aca', 'mag_aca_pred', 'mag_aca_obs']],
        path=reports_dir,
        title='Candidates with low p-value',
        description=description,
        overwrite=True
    )
Making report with 100 stars
100it [03:48,  2.28s/it]
In [65]:
agasc_gaia_matches_all = xm.get_agasc_gaia_x_match_difficult_fixed()
agasc_gaia_matches_all = agasc_gaia_matches_all[agasc_gaia_matches_all['best_match']]
In [66]:
i = np.searchsorted(agasc_summary["agasc_id"], agasc_gaia_matches_all["agasc_id"])
cols = [
    "mag_aca",
    "mag_aca_err",
    "mag_aca_obs",
    "mag_aca_err_obs",
    "mag_catid",
    "mag_band",
    "random_index",
    "guide",
    "acq"
]
for col in cols:
    agasc_gaia_matches_all[col] = agasc_summary[col][i]

cols = [
    "pm_ra",
    "pm_dec",
    "epoch",
    "ra",
    "dec",
]
for col in cols:
    agasc_gaia_matches_all[f"{col}_agasc"] = agasc_summary[col][i]
In [69]:
bins = np.linspace(0, 1, 101)
sns.histplot(
    agasc_gaia_matches_all['p_value'],
    stat="density",
    bins=bins,
    label="all",
)
sns.histplot(
    agasc_gaia_matches_all['p_value'][agasc_gaia_matches_all['guide'] | agasc_gaia_matches_all['acq']],
    stat="density",
    bins=bins,
    label="candidates",
)
plt.legend()
plt.xlabel('p-value')
plt.title("p-value distribution (candidates)")
plt.yscale('log')
No description has been provided for this image
In [ ]:
n_cand = np.count_nonzero((agasc_gaia_matches_all['guide'] | agasc_gaia_matches_all['acq']))
n_cand_marginal = np.count_nonzero((agasc_gaia_matches_all['p_value'] < 0.02) & (agasc_gaia_matches_all['guide'] | agasc_gaia_matches_all['acq']))

print(f"{n_cand_marginal} marginal candidates out of {n_cand}")
1633 marginal candidates out of 444377
In [70]:
np.count_nonzero(
    (agasc_gaia_matches_all['guide'] | agasc_gaia_matches_all['acq'])
    & (agasc_gaia_matches_all["p_value"] < 0.005)
    & (agasc_gaia_matches_all["p_value"] > 0.003)
)
Out[70]:
157
In [71]:
bad_candidates = agasc_gaia_matches_all[
    (agasc_gaia_matches_all['guide'] | agasc_gaia_matches_all['acq'])
    & (agasc_gaia_matches_all["p_value"] < 0.005)
    & (agasc_gaia_matches_all["p_value"] > 0.003)
]

np.random.seed(120468464)
generate_reports = True
if generate_reports:
    print(f"Making report with {len(bad_candidates)} stars")

    description = """
    <p>This list includes stars that are guide or acq candidates with p-value between 0.003 and 0.005. </p>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>
      <li>
        <a href="report_303961568.html"> 303961568 </a>. Gaia star is missing from plot. NEED TO CHECK.
      </li>
    </ul>

    <h2> Stars </h2>
    """

    reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/candidates-p-value-0.003-0.005"
    star_report.make_report_list(
        data=bad_candidates[['agasc_id', 'p_value', 'd2d', 'd_mag', 'mag_aca', 'mag_aca_obs']],
        path=reports_dir,
        title='Candidates with low p-value',
        description=description,
        overwrite=True
    )
Making report with 157 stars
157it [06:47,  2.60s/it]