import sys
sys.path.insert(0, '../')

import warnings

warnings.simplefilter("ignore")
# warnings.filterwarnings("error", message="overflow encountered in cast")
# warnings.filterwarnings("ignore", message="`alltrue` is deprecated as of NumPy")

import os
import numpy as np
import matplotlib.pyplot as plt
import tables

from astropy.table import Table, join
from astropy.coordinates import SkyCoord
from astropy import units as u

from pathlib import Path
from IPython.display import display, Markdown

from agasc_gaia.datasets import get_agasc_summary
import agasc_gaia.cross_match as xm
from agasc_gaia.gaia_model import get_gaia_model

from agasc_gaia.star_report import Report
from agasc_gaia import star_report

import seaborn as sns

SKA = Path(os.environ['SKA'])
AGASC_FILE = SKA / 'data' / 'agasc' / 'agasc1p7.h5'

generate_reports = True

# @utils.cached(name="comparison_table")
def get_comparison_table():
    print("loading tables")


    print("  - summary")
    agasc_summary = get_agasc_summary()


    print("  - indirect x-matches")
    agasc_indirect = xm.get_agasc_tycho_gsc_gaia_x_match()
    print("  - direct x-matches")
    agasc_direct = xm.get_agasc_gaia_x_match_difficult_fixed()
    agasc_direct = agasc_direct[agasc_direct["best_match"] & (agasc_direct['p_value'] > 0.001)]
    assert len(agasc_indirect), len(np.unique(agasc_indirect['agasc_id']))
    assert len(agasc_direct), len(np.unique(agasc_direct['agasc_id']))

    print("  - AGASC 1.7")
    with tables.open_file(SKA / 'data' / 'agasc' / 'agasc1p7.h5') as h5:
        agasc_1p7 = Table(h5.root.data[:])

    # this removes a star that is duplicated in the AGASC catalog
    agasc_1p7 = agasc_1p7[
        ~(
            (agasc_1p7["AGASC_ID"] == 154534513)
            & (agasc_1p7["MAG_CATID"] == 1)
        )
    ]
    agasc_ids = np.unique(np.concatenate([agasc_indirect['agasc_id'].data, agasc_direct['agasc_id'].data]))
    agasc_1p7 = agasc_1p7[np.in1d(agasc_1p7['AGASC_ID'], agasc_ids)]


    cols = [
        # 'pm_ra',
        # 'pm_dec',
        'mag_aca',
        'mag_aca_err',
        'mag_aca_obs',
        'mag_aca_err_obs',
        'guide',
        'acq',
        'tycho_id',
        'tycho_tdsc_id',
        'mag_catid',
        'mag_band',
    ]

    i = np.searchsorted(agasc_summary['agasc_id'], agasc_indirect['agasc_id'])
    for col in cols:
        agasc_indirect[col] = agasc_summary[col][i]
    agasc_indirect['mag_1p7'] = agasc_summary['mag'][i]

    i = np.searchsorted(agasc_summary['agasc_id'], agasc_direct['agasc_id'])
    for col in cols:
        agasc_direct[col] = agasc_summary[col][i]
    agasc_direct['mag_1p7'] = agasc_summary['mag'][i]

    gaia_model = get_gaia_model()
    cols = ['g_mag', 'rp_mag', 'bp_mag', 'mag_catid', 'mag_band', 'has_mag', "phot_variable_flag", "mad_mag_g_fov"]

    agasc_indirect['mag_aca_pred'] = np.ma.masked_all(len(agasc_indirect), dtype=np.float16)
    agasc_indirect['mag_aca_pred'][agasc_indirect['has_mag'] != 0] = gaia_model.predict(agasc_indirect[agasc_indirect['has_mag'] != 0][cols].to_pandas())

    agasc_direct['mag_aca_pred'] = np.ma.masked_all(len(agasc_direct), dtype=np.float16)
    agasc_direct['mag_aca_pred'][agasc_direct['has_mag'] != 0] = gaia_model.predict(agasc_direct[agasc_direct['has_mag'] != 0][cols].to_pandas())



    print("joining indirect table")
    data = join(
        agasc_1p7,
        agasc_indirect,
        keys_left='AGASC_ID',
        keys_right='agasc_id',
        table_names=['1', '2'],
        uniq_col_name='{col_name}_{table_name}',
        metadata_conflicts='silent',
        join_type='left',
    )

    assert not np.any([col[-2:] == '_1' for col in data.colnames])

    print("joining direct table")
    data = join(
        data,
        agasc_direct,
        keys_left='AGASC_ID',
        keys_right='agasc_id',
        table_names=['indirect', 'direct'],
        uniq_col_name='{col_name}_{table_name}',
        metadata_conflicts='silent',
        join_type='left',
    )

    direct = ~data['agasc_id_direct'].mask
    indirect = ~data['agasc_id_indirect'].mask


    print("removing redundant columns")
    cols = [
        'agasc_id',
        'acq',
        'guide',
        'mag_aca',
        'mag_aca_err',
        'mag_aca_err_obs',
        'mag_aca_obs',
        'tycho_id',
        'tycho_tdsc_id',
        ]

    for col in cols:
        if f'{col}_direct' in data.colnames:
            data.remove_column(f'{col}_direct')
        if f'{col}_indirect' in data.colnames:
            data.remove_column(f'{col}_indirect')


    cols = [
        'acq',
        'gsc2.3',
        'guide',
        'mag',
        'mag_aca',
        'mag_aca_err',
        'mag_aca_err_obs',
        'mag_aca_obs',
        'tycho_id',
        'tycho_tdsc_id',
    ]

    i = np.searchsorted(agasc_summary['agasc_id'], data['AGASC_ID'])
    for col in cols:
        data[col] = agasc_summary[col][i]
    # data['mag_1p7'] = agasc_summary['mag'][i]

    cols = ['agasc_id', 'ra', 'dec', 'epoch', 'pm_ra', 'pm_dec']
    for col in cols:
        data[col] = agasc_summary[col][i]

    print("setting positions")
    data['has_gsc'] = data["XREF_ID1"] != -9999
    data['has_tyc'] = data["XREF_ID3"] != -9999

    data['PM_RA'][data['PM_RA'] == -9999] = 0
    data['PM_DEC'][data['PM_DEC'] == -9999] = 0

    # these are "corrected" 1p7 positions,
    # same as 1p7 except Tycho2 positions were shifted to the actual epoch instead of 2000.
    # This was to avoid errors arising from the 1p7 proper motions
    data["coord"] = SkyCoord(
        ra=data["ra"],
        dec=data["dec"],
        unit="deg"
    )
    data["coord_direct_today"] = SkyCoord(
        ra=data["ra_direct"],
        dec=data["dec_direct"],
        # ra=(
        #     data["ra_direct"]
        #     + data["pm_ra_direct"]
        #     * (2023 - data["epoch_direct"])
        #     / 1000
        #     / 3600
        #     / np.cos(np.deg2rad(data["dec_direct"]))
        # ),
        # dec=(
        #     data["dec_direct"]
        #     + data["pm_dec_direct"]
        #     * (2023 - data["epoch_direct"])
        #     / 1000
        #     / 3600
        # ),
        unit="deg"
    )
    data["coord_indirect_today"] = SkyCoord(
        ra=data["ra_indirect"],
        dec=data["dec_indirect"],
        # ra=(
        #     data["ra_indirect"]
        #     + data["pm_ra_indirect"]
        #     * (2023 - data["epoch_indirect"])
        #     / 1000
        #     / 3600
        #     / np.cos(np.deg2rad(data["dec_indirect"]))
        # ),
        # dec=(
        #     data["dec_indirect"]
        #     + data["pm_dec_indirect"]
        #     * (2023 - data["epoch_indirect"])
        #     / 1000
        #     / 3600
        # ),
        unit="deg"
    )
    data["coord_direct"] = SkyCoord(
        ra=(
            data["ra_direct"]
            + data["pm_ra_direct"]
            * (data["epoch"] - data["epoch_direct"])
            / 1000
            / 3600
            / np.cos(np.deg2rad(data["dec_direct"]))
        ),
        dec=(
            data["dec_direct"]
            + data["pm_dec_direct"]
            * (data["epoch"] - data["epoch_direct"])
            / 1000
            / 3600
        ),
        unit="deg"
    )
    data["coord_indirect"] = SkyCoord(
        ra=(
            data["ra_indirect"]
            + data["pm_ra_indirect"]
            * (data["epoch"] - data["epoch_indirect"])
            / 1000
            / 3600
            / np.cos(np.deg2rad(data["dec_indirect"]))
        ),
        dec=(
            data["dec_indirect"]
            + data["pm_dec_indirect"]
            * (data["epoch"] - data["epoch_indirect"])
            / 1000
            / 3600
        ),
        unit="deg"
    )
    # this is to compare the 1p7 positions to what they would be with the 1p8 proper motion
    # our reference are the "corrected" positions above, and these are the 1p7 positions
    # shifted to their original epoch using the 1p8 proper motion
    data["coord_1p7"] = SkyCoord(
        ra=(
            data["RA"]
            + data["pm_ra_direct"]
            * (data["epoch"] - data["EPOCH"])
            / 1000
            / 3600
            / np.cos(np.deg2rad(data["DEC"]))
        ),
        dec=(
            data["DEC"]
            + data["pm_dec_direct"]
            * (data["epoch"] - data["EPOCH"])
            / 1000
            / 3600
        ),
        unit="deg"
    )

    # the difference between the two methods
    data["d2d"] = data["coord_direct"].separation(data["coord_indirect"]).to(u.arcsec)
    data["d2d_direct"] = data["coord"].separation(data["coord_direct"]).to(u.arcsec)
    data["d2d_indirect"] = data["coord"].separation(data["coord_indirect"]).to(u.arcsec)
    # differences introduced by removing 1p7 proper motion
    data["d2d_1p7"] = data["coord"].separation(data["coord_1p7"]).to(u.arcsec)
    # the difference between the two methods propagated to today
    data["d2d_today"] = data["coord_direct_today"].separation(data["coord_indirect_today"]).to(u.arcsec)

    return data

data = get_comparison_table()

loading tables
  - summary
  - indirect x-matches
  - direct x-matches
  - AGASC 1.7
joining indirect table
joining direct table
removing redundant columns
setting positions

direct = ~data['gaia_id_direct'].mask
indirect = ~data['gaia_id_indirect'].mask

data_intersection = data[indirect & direct]
observed = ~data_intersection['mag_aca_obs'].mask

agree = (data_intersection['gaia_id_direct'] == data_intersection['gaia_id_indirect'])

disagree = data_intersection[~agree]

fig, axes = plt.subplot_mosaic([["mag", "d2d"]], figsize=(9, 4))

plt.sca(axes["mag"])
plt.title("$\Delta$ mag")
plt.hist(
    data_intersection['d_mag_direct'],
    bins=np.linspace(-10, 10, 100),
    histtype='step',
    density=True,
    label="direct"
    )
plt.hist(
    data_intersection['d_mag_indirect'],
    bins=np.linspace(-10, 10, 100),
    histtype='step',
    density=True,
    label="indirect"
    )
plt.yscale('log')
plt.xlabel("mag$_{pred}$ - mag$_{Gaia}$")

N = len(data_intersection)
x = np.linspace(-3, 3, 100)
plt.plot(x, xm.d_mag_probability(x))


plt.sca(axes["d2d"])
plt.title("Radial offset")
plt.hist(
    data_intersection['d2d_direct'],
    # bins=np.logspace(-5, 1, 100),
    bins=np.linspace(0, 10, 100),
    histtype='step',
    density=True,
    label="direct",
    )
plt.hist(
    data_intersection['d2d_indirect'],
    # bins=np.logspace(-5, 1, 100),
    bins=np.linspace(0, 10, 100),
    histtype='step',
    density=True,
    label="indirect",
    )
plt.yscale('log')
plt.xlabel("d2d (arcsec)")

N = len(data_intersection['d2d_direct'])
# x = np.logspace(-3, 1, 101)
x = np.linspace(0, 10, 101)
dx = np.diff(x)
x = x[:-1] + dx / 2
# p_d2d = (
#     1*d2d_probability(x, sigma_d2d=.2)
#     + 0.05*d2d_probability(x, sigma_d2d=.5)
#     # + 0.5*d2d_probability(x, sigma_d2d=1.5)
# )
p_d2d = xm.d2d_probability(x)
p_d2d /= np.sum(dx * p_d2d)
plt.plot(x, p_d2d)
p_d2d = xm.gaussian_d2d_probability_(x, sigma_d2d=1.5)
p_d2d /= np.sum(dx * p_d2d)
plt.plot(x, p_d2d)
# p_d2d = d2d_probability(x, sigma_d2d=0.5)
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
# p = 0.1
# p_d2d = (
#     (1-p)*d2d_probability(x, sigma_d2d=0.25) + 
#     p*d2d_probability(x, sigma_d2d=.5)
# )
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
# p = 0.1
# p2 = 0.008
# p_d2d = (
#     (1-p)*xm.gaussian_d2d_probability_(x, sigma_d2d=0.25) + 
#     p*xm.gaussian_d2d_probability_(x, sigma_d2d=.5) +
#     p2*xm.gaussian_d2d_probability_(x, sigma_d2d=.8)
# )
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
# p, p1, p2, p3 = 0.89020772, 0.09891197, 0.00791296, 0.00296736
# p_d2d = (
#     p*xm.gaussian_d2d_probability_(x, sigma_d2d=0.25) + 
#     p1*xm.gaussian_d2d_probability_(x, sigma_d2d=.5) +
#     p2*xm.gaussian_d2d_probability_(x, sigma_d2d=.8) +
#     p3*xm.gaussian_d2d_probability_(x, sigma_d2d=1.2)
# )
# p_d2d /= np.sum(dx * p_d2d)
# plt.plot(x, p_d2d)
plt.ylim(ymin=1e-7, ymax=4)
# plt.ylim(ymin=1e-1, ymax=1e6)
plt.xlim((0, 5))
plt.legend()

plt.tight_layout()

fig, axes = plt.subplot_mosaic([["mag", "d2d"]], figsize=(9, 4))

plt.sca(axes["mag"])
plt.hist(
    data_intersection['d_mag_direct'],
    bins=np.linspace(-10, 10, 100),
    histtype='step',
    label="direct",
)
plt.hist(
    data_intersection['d_mag_indirect'],
    bins=np.linspace(-10, 10, 100),
    histtype='step',
    label="indirect",
)

plt.xlabel("mag_1p8 - mag_1p7")
plt.legend()
plt.title("Magnitude")
plt.yscale('log')


plt.sca(axes["d2d"])
bins = np.linspace(0, 4, 100)
plt.hist(
    data_intersection["d2d_direct"][~agree],
    bins=bins,
    histtype='step',
    label='direct',
)
plt.hist(
    data_intersection["d2d_indirect"][~agree],
    bins=bins,
    histtype='step',
    label='indirect',
)
plt.title("Radial offset")
plt.legend()
plt.xlabel("d2d$_{1p7}$ (arcsec)")

plt.tight_layout()

n_both, n_indirect_no_direct, n_direct_no_indirect = np.count_nonzero(indirect & direct), np.count_nonzero(indirect & ~direct), np.count_nonzero(direct & ~indirect)


n_both_candidates = np.count_nonzero(indirect & direct & (data['guide'] | data['acq']))
n_indirect_no_direct_candidates = np.count_nonzero(indirect & ~direct & (data['guide'] | data['acq']))
n_direct_no_indirect_candidates = np.count_nonzero(direct & ~indirect & (data['guide'] | data['acq']))

n_both_observed = np.count_nonzero(indirect & direct & ~data['mag_aca_obs'].mask)
n_indirect_no_direct_observed = np.count_nonzero(indirect & ~direct & ~data['mag_aca_obs'].mask)
n_direct_no_indirect_observed = np.count_nonzero(direct & ~indirect & ~data['mag_aca_obs'].mask)

mag_out_indirect_2_5 = np.abs(data["mag_pred_indirect"] - data["MAG"]) > 2.5
mag_out_direct_2_5 = np.abs(data["mag_pred_direct"] - data["MAG"]) > 2.5
mag_out_indirect_5 = np.abs(data["mag_pred_indirect"] - data["MAG"]) > 5
mag_out_direct_5 = np.abs(data["mag_pred_direct"] - data["MAG"]) > 5
radial_outlier_direct_3 = data["d2d_direct"] > 3
radial_outlier_indirect_3 = data["d2d_indirect"] > 3
radial_outlier_direct_5 = data["d2d_direct"] > 5
radial_outlier_indirect_5 = data["d2d_indirect"] > 5

n_both_mag_outliers_2_5 = np.count_nonzero(indirect & direct & (mag_out_indirect_2_5 | mag_out_direct_2_5))
n_indirect_mag_outliers_2_5 = np.count_nonzero(indirect & ~direct & mag_out_indirect_2_5)
n_direct_mag_outliers_2_5 = np.count_nonzero(~indirect & direct & mag_out_direct_2_5)

n_both_mag_outliers_5 = np.count_nonzero(indirect & direct & (mag_out_indirect_5 | mag_out_direct_5))
n_indirect_mag_outliers_5 = np.count_nonzero(indirect & ~direct & mag_out_indirect_5)
n_direct_mag_outliers_5 = np.count_nonzero(~indirect & direct & mag_out_direct_5)

n_direct_radial_outliers_3 = np.count_nonzero(~indirect & direct & radial_outlier_direct_3)
n_indirect_radial_outliers_3 = np.count_nonzero(indirect & ~direct & radial_outlier_indirect_3)
n_both_radial_outliers_3 = np.count_nonzero(indirect & direct & radial_outlier_direct_3)

n_direct_radial_outliers_5 = np.count_nonzero(~indirect & direct & radial_outlier_direct_5)
n_indirect_radial_outliers_5 = np.count_nonzero(indirect & ~direct & radial_outlier_indirect_5)
n_both_radial_outliers_5 = np.count_nonzero(indirect & direct & radial_outlier_direct_5)

# "agree" is defined only in the intersection, which implies (indirect & direct)
n_disagree, f_agree = np.count_nonzero(~agree), np.count_nonzero(agree)/n_both
n_disagree_observed = np.count_nonzero(~agree & observed)
n_disagree_candidates = np.count_nonzero(~agree & (data_intersection['guide'] | data_intersection['acq']))

disagree_mag_out_indirect_2_5 = np.abs(data_intersection["mag_pred_indirect"] - data_intersection["MAG"]) > 2.5
disagree_mag_out_direct_2_5 = np.abs(data_intersection["mag_pred_direct"] - data_intersection["MAG"]) > 2.5
disagree_mag_out_indirect_5 = np.abs(data_intersection["mag_pred_indirect"] - data_intersection["MAG"]) > 5
disagree_mag_out_direct_5 = np.abs(data_intersection["mag_pred_direct"] - data_intersection["MAG"]) > 5
disagree_radial_outlier_direct_3 = data_intersection["d2d_direct"] > 3
disagree_radial_outlier_indirect_3 = data_intersection["d2d_indirect"] > 3
disagree_radial_outlier_direct_5 = data_intersection["d2d_direct"] > 5
disagree_radial_outlier_indirect_5 = data_intersection["d2d_indirect"] > 5

n_disagree_both_mag_outliers_2_5 = np.count_nonzero(agree & disagree_mag_out_indirect_2_5)
n_disagree_indirect_mag_outliers_2_5 = np.count_nonzero(~agree & disagree_mag_out_indirect_2_5)
n_disagree_direct_mag_outliers_2_5 = np.count_nonzero(~agree & disagree_mag_out_direct_2_5)

n_disagree_both_mag_outliers_5 = np.count_nonzero(agree & disagree_mag_out_indirect_5)
n_disagree_indirect_mag_outliers_5 = np.count_nonzero(~agree & disagree_mag_out_indirect_5)
n_disagree_direct_mag_outliers_5 = np.count_nonzero(~agree & disagree_mag_out_direct_5)

n_disagree_direct_radial_outliers_3 = np.count_nonzero(~agree & disagree_radial_outlier_direct_3)
n_disagree_indirect_radial_outliers_3 = np.count_nonzero(~agree & disagree_radial_outlier_indirect_3)
n_disagree_both_radial_outliers_3 = np.count_nonzero(agree & disagree_radial_outlier_direct_3)

n_disagree_direct_radial_outliers_5 = np.count_nonzero(~agree & disagree_radial_outlier_direct_5)
n_disagree_indirect_radial_outliers_5 = np.count_nonzero(~agree & disagree_radial_outlier_indirect_5)
n_disagree_both_radial_outliers_5 = np.count_nonzero(agree & disagree_radial_outlier_direct_5)

n_large_d2d_today = np.count_nonzero(data_intersection["d2d_today"] > 5)
n_large_d2d_direct = np.count_nonzero(data_intersection["d2d_direct"] > 3)
n_large_d2d_indirect = np.count_nonzero(data_intersection["d2d_indirect"] > 3)
n_disagree_indirect_mag_outliers_2_5 = np.count_nonzero(abs(data_intersection["mag_pred_indirect"] - data_intersection["MAG"]) > 5 )
n_disagree_direct_mag_outliers_2_5 = np.count_nonzero(abs(data_intersection["mag_pred_direct"] - data_intersection["MAG"]) > 5 )
n_disagree_indirect_p_relative = np.count_nonzero(data_intersection["p_relative_indirect"] < 0.5)
n_disagree_direct_p_relative = np.count_nonzero(data_intersection["p_relative_direct"] < 0.5)

txt = f"""

Disagreements in terms of which stars are matched to a Gaia counterpart:

| condition | both | direct | indirect |
| --- | --- | --- | --- |
| any | {n_both} | {n_direct_no_indirect} ({n_direct_no_indirect/n_both:.1%}) | {n_indirect_no_direct} ({n_indirect_no_direct/n_both:.1%})|
| guide or acq | {n_both_candidates} | {n_direct_no_indirect_candidates} ({n_direct_no_indirect_candidates/n_both_candidates:.1%}) | {n_indirect_no_direct_candidates} ({n_indirect_no_direct_candidates/n_both_candidates:.1%}) |
| observed | {n_both_observed} | {n_direct_no_indirect_observed} ({n_direct_no_indirect_observed/n_both_observed:.1%}) | {n_indirect_no_direct_observed} ({n_indirect_no_direct_observed/n_both_observed:.1%}) |
| d2d > 3 | {n_both_radial_outliers_3} | {n_direct_radial_outliers_3} | {n_indirect_radial_outliers_3} |
| d2d > 5 | {n_both_radial_outliers_5} | {n_direct_radial_outliers_5} | {n_indirect_radial_outliers_5} |
| abs(mag_pred - MAG) > 2.5 | {n_both_mag_outliers_2_5} | {n_direct_mag_outliers_2_5} | {n_indirect_mag_outliers_2_5} |
| abs(mag_pred - MAG) > 5 | {n_both_mag_outliers_5} | {n_direct_mag_outliers_5} | {n_indirect_mag_outliers_5} |

Disagreements among the stars that are matched to a Gaia counterpart in both methods: 

| | in both | disagree |
| --- | --- | --- |
| any | {n_both} | {n_disagree} ({n_disagree/n_both:.3%}) |
| candidates | {n_both_candidates} | {n_disagree_candidates} ({n_disagree_candidates/n_both_candidates:.3%})  |
| observed | {n_both_observed} | {n_disagree_observed} ({n_disagree_observed/n_both_observed:.3%})  |
| d2d_direct > 3 arcsec | | {n_large_d2d_direct} |
| d2d_indirect > 3 arcsec | | {n_large_d2d_indirect} |
| d2d_today > 5 | | {n_large_d2d_today} |
| abs(mag_pred - MAG) > 5 (indirect)| | {n_disagree_indirect_mag_outliers_2_5} |
| abs(mag_pred - MAG) > 5 (direct)| | {n_disagree_direct_mag_outliers_2_5} |
| p_relative_direct < 0.5 | | {n_disagree_direct_p_relative} |
| p_relative_indirect < 0.5 | | {n_disagree_indirect_p_relative} |
"""
display(Markdown(txt))

fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4), sharex=True, sharey=True)
plt.suptitle("$\Delta PM$")
plt.sca(axes["ra"])
plt.plot(disagree['pm_ra_direct'] - disagree['PM_RA'], disagree['pm_ra_indirect'] - disagree['PM_RA'], '.')
plt.xlabel("$\Delta$PM$_{RA}$ direct")
plt.ylabel("$\Delta$PM$_{RA}$ indirect")
plt.title("RA")

plt.sca(axes["dec"])
plt.plot(disagree['pm_dec_direct'] - disagree['PM_DEC'], disagree['pm_dec_indirect'] - disagree['PM_DEC'], '.')
plt.xlabel("$\Delta$PM$_{DEC}$ direct")
plt.ylabel("$\Delta$PM$_{DEC}$ indirect")
plt.ylim((-1000, 1000))
plt.xlim((-1000, 1000))
plt.title("DEC")

fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4), sharex=True, sharey=True)
plt.suptitle("$\Delta PM$ (zoomed in)")
                               
plt.sca(axes["ra"])
plt.plot(disagree['pm_ra_direct'] - disagree['PM_RA'], disagree['pm_ra_indirect'] - disagree['PM_RA'], '.')
plt.xlabel("$\Delta$PM$_{RA}$ direct")
plt.ylabel("$\Delta$PM$_{RA}$ indirect")
plt.title("RA")

plt.sca(axes["dec"])
plt.plot(disagree['pm_dec_direct'] - disagree['PM_DEC'], disagree['pm_dec_indirect'] - disagree['PM_DEC'], '.')
plt.xlabel("$\Delta$PM$_{DEC}$ direct")
plt.ylabel("$\Delta$PM$_{DEC}$ indirect")
plt.ylim((-100, 100))
plt.xlim((-100, 100))
plt.title("DEC")

Text(0.5, 1.0, 'DEC')

fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4))
                               
plt.suptitle("1p7-Direct")
plt.sca(axes["ra"])
plt.plot(disagree['PM_RA'], disagree['pm_ra_indirect'], '.')
plt.xlabel("PM_RA 1p7")
plt.ylabel("PM_RA indirect")
plt.title("RA")

plt.sca(axes["dec"])
plt.plot(disagree['PM_DEC'], disagree['pm_dec_indirect'], '.')
plt.xlabel("PM_DEC 1p7")
plt.ylabel("PM_DEC indirect")
plt.title("DEC")


fig, axes = plt.subplot_mosaic([["ra", "dec"]], figsize=(9, 4))

plt.suptitle("1p7-Direct")

plt.sca(axes["ra"])
plt.plot(disagree['PM_RA'], disagree['pm_ra_direct'], '.')
plt.xlabel("PM_RA 1p7")
plt.ylabel("PM_RA direct")
plt.title("RA")

plt.sca(axes["dec"])
plt.plot(disagree['PM_DEC'], disagree['pm_dec_direct'], '.')
plt.xlabel("PM_DEC 1p7")
plt.ylabel("PM_DEC direct")
plt.title("DEC")

Text(0.5, 1.0, 'DEC')

bins = np.logspace(-3, 2, 100)
plt.hist(
    data_intersection["d2d_direct"][~agree],
    bins=bins,
    histtype='step',
    label='direct-1p7',
)
plt.hist(
    data_intersection["d2d_indirect"][~agree],
    bins=bins,
    histtype='step',
    label='indirect-1p7',
)
# plt.hist(
#     data_intersection["d2d"][~agree],
#     bins=bins,
#     histtype='step',
#     label='indirect-direct',
# )
plt.xscale('log')
plt.legend()
plt.xlabel("d2d (arcsec)")
plt.tight_layout()

bins = np.linspace(0, 4, 100)
plt.hist(
    data_intersection["d2d_direct"][~agree],
    bins=bins,
    histtype='step',
    label='direct-1p7',
)
plt.hist(
    data_intersection["d2d_indirect"][~agree],
    bins=bins,
    histtype='step',
    label='indirect-1p7',
)
# plt.hist(
#     data_intersection["d2d"][~agree],
#     bins=bins,
#     histtype='step',
#     label='indirect-direct',
# )
plt.legend()
plt.xlabel("d2d (arcsec)")
plt.tight_layout()

plt.hist(
    data_intersection["d2d_today"][~agree],
    bins=np.logspace(-3, 2, 100),
    histtype="step",
)
plt.xscale('log')
plt.yscale('log')

data_intersection[data_intersection['d2d_today'] > 10][['AGASC_ID', 'd2d_today']]

plt.hist(
    disagree["d2d_direct"] - disagree["d2d_indirect"],
    bins=np.linspace(-10, 10, 100),
    histtype='step',
    );
plt.yscale('log')

fig, axes = plt.subplot_mosaic(
    [
        ["direct-indirect", "1p7-indirect"],
        ["1p7-direct", "."]
    ],
    figsize=(9, 8)
)

plt.sca(axes["direct-indirect"])
bins = np.linspace(0, 5, 50)
sns.histplot(
    x=disagree["d2d_direct"],
    y=disagree["d2d_indirect"],  
    bins=[bins, bins],
)
plt.sca(axes["1p7-indirect"])
bins = np.linspace(0, 5, 50)
sns.histplot(
    x=disagree["d2d"],
    y=disagree["d2d_indirect"],  
    bins=[bins, bins],
)
plt.sca(axes["1p7-direct"])
bins = np.linspace(0, 5, 50)
sns.histplot(
    x=disagree["d2d"],
    y=disagree["d2d_direct"],  
    bins=[bins, bins],
)

<Axes: label='1p7-direct', xlabel='d2d', ylabel='d2d_direct'>

bins = np.linspace(-10, 10, 100)
plt.hist(
    (data_intersection["mag_pred_direct"] - data_intersection["MAG"])[~agree],
    bins=bins,
    histtype='step',
    label='direct',
)
plt.hist(
    (data_intersection["mag_pred_indirect"] - data_intersection["MAG"])[~agree],
    bins=bins,
    histtype='step',
    label='Tycho2-GSC2.3',
)
plt.xlabel('mag_1p8 - mag_1p7')
plt.yscale('log')
plt.legend()
plt.tight_layout()

agasc_id = 966533624
# agasc_id = 680536288
report = star_report.Report(agasc_id)
report = star_report.Report(agasc_id, alternates={'direct': 5974018828337490944, "indirect": 5974018828340131456})
# report.show_in_notebook()
# report.save_html("bla")
fig = report.plotly_fig()
fig.show()

disagree[disagree['p_value_indirect'] > 0.99][['agasc_id', 'p_value_direct', 'p_value_indirect', 'gaia_id_direct', 'gaia_id_indirect']]

agasc_id = 530408
report = star_report.Report(agasc_id, alternates=dict( disagree[disagree['agasc_id'] == agasc_id][['gaia_id_direct', 'gaia_id_indirect']][0]))
# report.show_in_notebook()
# report.save_html("bla")
fig = report.plotly_fig()
fig.show()

plt.hist(
    disagree['p_value_direct'],
    bins=np.linspace(0, 1, 100),
    histtype='step',
    );
plt.hist(
    disagree['p_value_indirect'],
    bins=np.linspace(0, 1, 100),
    histtype='step',
    );
plt.yscale('log')

plt.hist(
    data['p_value_direct'],
    bins=np.linspace(0, 1, 100),
    histtype='step',
    label="direct",
    )
plt.hist(
    data['p_value_indirect'],
    bins=np.linspace(0, 1, 100),
    histtype='step',
    label="indirect",
    )
plt.yscale('log')
plt.xlabel("p-value")
plt.legend()
plt.tight_layout()

for col in ['d2d_direct', 'd2d_indirect', 'd_mag_direct', 'd_mag_indirect']:
    data_intersection[col].format = "{:.2f}"
for col in ['p_value_direct', 'p_value_indirect']:
    data_intersection[col].format = "{:.4f}"

data_intersection[~agree & observed][[
    'agasc_id',
    'gaia_id_direct',
    'gaia_id_indirect',
    'd_mag_direct',
    'd2d_direct',
    'd_mag_indirect',
    'd2d_indirect',
    'p_value_direct',
    'p_value_indirect',
]].pprint(max_width=-1, max_lines=-1)

 agasc_id     gaia_id_direct     gaia_id_indirect  d_mag_direct d2d_direct d_mag_indirect d2d_indirect p_value_direct p_value_indirect
                                                                  arcsec                     arcsec                                   
---------- ------------------- ------------------- ------------ ---------- -------------- ------------ -------------- ----------------
   6687480    4760816628892160    4760820923834112         0.19       1.25           0.23         0.69         0.3632           0.3498
   9446616 3273969976794445056 3273969972497767552         0.14       0.21           0.16         0.17         0.5646           0.5625
  32243760 3831419417137291904 3831419412842627456         0.18       0.71           0.18         0.88         0.5498           0.0414
  36444624 3897649565189277952 3897649565189298688         0.02       0.27           8.11         0.67         0.6729           0.0001
  52053176 4392691618897267200 4392691618894632064         0.05       0.24           0.17         1.27         0.7053           0.6406
  77604992 2756819802270040064 2756819802269026688         0.27       1.33           0.27         0.40         0.3410           0.2008
  81535712 2568639108730134016 2568639108730133888         0.69       0.46           5.03         3.08         0.0424           0.0000
  87428640 3299310421279505152 3299310421279819776        -0.06       0.71           0.03         0.59         0.9111           0.1986
 170537688 3397818657309068288 3397818657308098048         0.10       0.15           0.14         0.27         0.7330           0.4884
 176951584 3360388906382410752 3360388910675923584        -0.00       0.04            nan         0.67         0.9928              nan
 183638376  607916787837169152  607916783542120448         0.17       0.44           0.25         0.36         0.5880           0.3473
 210767352 4516726426141479808 4516726426144371456         0.10       0.17           7.27         1.33         0.7069           0.0000
 265030112 1282430921954322048 1282430921955436032         0.05       0.07            nan         0.32         0.9218              nan
 295583712 2852925360578759424 2852925364875262080         0.00       0.07           0.18         0.74         0.9758           0.0907
 296757336 2863668486831283840 2863668486830442368         0.18       0.11           0.19         0.19         0.5751           0.4824
 382992664  207915763322753408  207915763325485696        -0.00       0.04            nan         0.35         0.9933              nan
 417858744 1954085707369157888 1954085703068556800         0.14       0.18           0.16         0.35         0.6058           0.5821
 420499768 1957445058987177856 1957445058984715648         0.06       0.04            nan         1.40         0.9176              nan
 496248896  985405277415464448  985405277413204480         0.11       1.15           0.16         0.82         0.6878           0.5409
 541984456 1039008251679233792 1039008255973683840         0.14       0.10            nan         0.30         0.6885              nan
 576595136 1690468547538461184 1690468551833654144         0.04       0.03            nan         0.87         0.9661              nan
 593245120 1115458948720469632 1115458948720469888         0.07       0.04            nan         0.87         0.8892              nan
 623249168 3214697297806159616 3214697293514063616         0.13       0.15           0.31         1.08         0.6591           0.2472
 643965560 3806051759739888256 3806051755444147584         0.42       0.19           0.44         0.24         0.2080           0.1721
 731121880 6333200204490021888 6333200208784059648         0.07       0.07           3.21         1.21         0.8724           0.0001
 766651552 2362388594423841792 2362388594422948480         0.08       0.58           0.15         0.22         0.8048           0.5443
 908462360 6801811888448490624 6801811892745605632         0.08       0.11           0.46         1.00         0.8360           0.0689
 939393312 5440213797128328064 5440213801426139008         0.13       0.20           0.31         0.64         0.6198           0.2590
 966525880 5975153215090484224 5975153210796430336         0.05       0.49           0.16         0.27         0.7706           0.5602
 966533624 5974018828337490944 5974018828340131456         0.06       0.23          11.73         0.83         0.6970           0.0000
 977410600 6793999961413449216 6793999965707654656         0.01       0.19           0.19         0.70         0.8201           0.0780
 997071584 4799566167336113536 4799566171630256000         0.14       0.05           0.62         0.33         0.7106           0.0822
1038767952 6714057120955575424 6714057120952340864         0.12       0.04           6.62         0.96         0.7689           0.0000
1054738896 4913415652886181504 4913415652884481792         0.13       0.10           6.40         0.60         0.7053           0.0003
1057103368 4737095837951376128 4737095837950650624         0.23       1.15           0.23         0.95         0.4613           0.4151
1089212088 5985256936705385344 5985256936695918720         0.14       0.06          12.23         1.08         0.7094           0.0000
1089217664 5984852694390760960 5984852694382630656        -0.01       0.06           2.65         0.93         0.9816           0.0005
1091193912 5942780839701934208 5942780844009984512        -0.16       0.13           1.32         0.56         0.6246           0.0084
1106379424 6561257337207141120 6561257337207686912         0.07       0.06            nan         1.05         0.8844              nan
1141794848 5932241509752072576 5932241509774006656         0.07       0.05          12.80         0.90         0.9015           0.0000
1142837608 5832158120199627392 5832158120187577344         0.05       0.03            nan         0.82         0.9498              nan
1186733048 5911787191757998208 5911787191762479872         0.00       0.05            nan         1.06         0.9865              nan
1201407648 4652097129542392448 4652097125197823232         0.20       0.57           0.23         0.29         0.5154           0.3239

observed_disagree = data_intersection[~agree & observed][[
    'agasc_id',
    'gaia_id_direct',
    'gaia_id_indirect',
    'p_value_direct',
    'p_value_indirect',
    'd_mag_direct',
    'd2d_direct',
    'd_mag_indirect',
    'd2d_indirect'
]]

np.random.seed(1186733048)
# generate_reports = True
if generate_reports:
    print(f"Making report with {len(observed_disagree)} stars")

    description = """
    <p>This list includes stars that have been observed, and the direct and indirect methods disagree. </p>

    <p>This is the comparison that led us to ditch the indirect method. It seems to have two issues</p>
    <ol>
      <li> It does not take magnitude into account. </li>
      <li> Some stars with proper motion seem to be wrongly matched. </li>
    </ol>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>
      <li>
        <a href="report_36444624.html"> 36444624 </a>. The indirect method does not use magnitude as a criterion, and matches a star with the wrong magnitude. The correct match is actually closer if one considers proper motion.
      </li>
      <li>
        <a href="report_52053176.html"> 52053176 </a>. Maybe the indirect method did not consider proper motion?
      </li>
      <li>
        <a href="report_170537688.html"> 170537688 </a>. This one should be a blend of two stars. The direct method chooses one, and the indirect method the other. Still, the indirect method does not choose the closest one.
      </li>
      <li>
        <a href="report_176951584.html"> 176951584 </a>. The indirect method gives the star with the wrong magnitude even though the right one is closer.
      </li>

    </ul>

    <h2> Stars </h2>
    """

    reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/direct-indirect/observed-disagree"
    star_report.make_report_list(
        data=observed_disagree,
        path=reports_dir,
        title='Observed Stars with Different direct/indirect matches',
        description=description,
        alternates=['direct', 'indirect'],
        overwrite=True
    )

candidates_disagree = data_intersection[~agree & (data_intersection['guide'] | data_intersection['acq'])]
candidates_disagree.sort('mag_aca')
# candidates.pprint(max_width=-1, max_lines=-1)

candidates_disagree = candidates_disagree[[
    'agasc_id',
    'gaia_id_direct',
    'gaia_id_indirect',
    'p_value_direct',
    'p_value_indirect',
    'd_mag_direct',
    'd2d_direct',
    'd_mag_indirect',
    'd2d_indirect'
]][:100]

np.random.seed(1186733048)
# generate_reports = True
if generate_reports:
    print(f"Making report with {len(candidates_disagree)} stars")

    description = """
    <p>This list includes stars that are candidates, and the direct and indirect methods disagree. </p>

    <p>This is the comparison that led us to ditch the indirect method. It seems to have two issues</p>
    <ol>
      <li> It does not take magnitude into account. </li>
      <li> Some stars with proper motion seem to be wrongly matched. </li>
    </ol>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>

    </ul>

    <h2> Stars </h2>
    """

    reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/direct-indirect/candidates-disagree"
    star_report.make_report_list(
        data=candidates_disagree,
        path=reports_dir,
        title='Candidate Guide/Acq Stars with Different direct/indirect matches',
        description=description,
        alternates=['direct', 'indirect'],
        overwrite=True
    )

disagree = data_intersection[~agree]
disagree[disagree['d2d'] > 3][['AGASC_ID', 'gaia_id_direct', 'gaia_id_indirect', 'd2d_direct', 'd2d_indirect']]

agasc_ids = np.unique(np.concatenate(
    [
    [
        1185232,
        1185880,
        2754344,
        2892808,
        7609200,
        8657120,
        10881736,
        11274712,
        11802440,
        1213475776,
        1213613240,
        1214128920,
        1214384968,
        1214519736,
        1214648312,
        1214664424,
        1248595144,
    ],
    disagree[disagree['d2d'] > 3]['AGASC_ID'][:40]
    ]
))
large_d2d = disagree[np.in1d(disagree['agasc_id'], agasc_ids)][[
    'agasc_id',
    'gaia_id_direct',
    'gaia_id_indirect',
    'd_mag_direct',
    'd2d_direct',
    'd_mag_indirect',
    'd2d_indirect',
    'p_value_direct',
    'p_value_indirect',
]]

np.random.seed(1248595144)
# generate_reports = True
if generate_reports:
    print(f"Making report with {len(large_d2d)} stars")

    description = """
    <p>This list includes stars that have been observed, and the direct and indirect methods disagree. </p>

    <p>This is the comparison that led us to ditch the indirect method. It seems to have two issues</p>
    <ol>
      <li> It does not take magnitude into account. </li>
      <li> Some stars with proper motion seem to be wrongly matched. </li>
    </ol>

    <p>Anyway, these are all faint stars.</p>

    <p>
    The purpose of this report is to check whether outliers can be caused by misidentification.
    When looking at the report for a given outlier, consider that if the true match is a star with no
    proper motion in Gaia, then there should be an AGASC star matched to this Gaia star. Is there one?
    </p>

    <p>If you double-click on a report's figure, it will zoom out and show you all AGASC stars around.</p>

    <h2> Notable examples:</h2>
    <ul>

      <li>
        <a href="report_1185880.html"> 1185880 </a>.
        direct method gives a star that is closer in magnitude and angular separation
        no idea why the indirect method gives another
      </li>

      <li>
        <a href="report_10881736.html"> 10881736 </a>.
        two stars close in magnitude with the AGASC star right in between (AGASC was not resolved)
        the direct method gives a star that is MUCH closer in magnitude and angular separation
      </li>

      <li>
        <a href="report_1214128920.html"> 1214128920 </a>.
        the direct method gives a star that is MUCH closer in magnitude and angular separation
      </li>

      <li>
        <a href="report_1214648312.html"> 1214648312 </a>.
        the direct method gives a star that is MUCH closer in magnitude and angular separation
        still, this is a faint star, so irrelevant for us
      </li>
    </ul>

    </ul>

    <h2> Stars </h2>
    """

    reports_dir = "/Users/javierg/SAO/Notebooks/agasc/gaia-magnitudes-2023/reports/direct-indirect/large_d2d"
    star_report.make_report_list(
        data=large_d2d,
        path=reports_dir,
        title='Distance between direct and indirect matches > 3 arcsec',
        description=description,
        alternates=['direct', 'indirect'],
        overwrite=True
    )

AGASC_ID	d2d_today
	arcsec
int32	float64
13770888	37.63962906801418
44828928	17.38145521107779
56629648	24.798016439675187
64100472	25.144329520220083
295182256	46.28792664226764
328992728	28.51402100385447
400826496	26.660939304221365
586811608	39.33190385339832
685910296	14.346379809274778
1082400928	44.33634978852832
1092224544	12.218642797037425

AGASC_ID	gaia_id_direct	gaia_id_indirect	d2d_direct	d2d_indirect
			arcsec	arcsec
int32	int64	int64	float64	float64
1185880	2555582442510174208	2555582438215436672	0.95	2.87
10881736	3230906126423182336	3230906126423182592	0.56	3.38
13770888	3235885299189856384	3235885402267895936	1.19	3.21
15990856	3236565892590502656	3236565892595740800	1.30	1.76
16255456	3320471072312202880	3320471072312203136	2.38	1.10
20201336	3131384170742362112	3131384170736025344	0.19	3.66
21893408	3115206132743613824	3115206132738760448	0.82	2.66
26616568	3094626126928169600	3094626126928170240	1.45	1.83
32637200	3856621907273557376	3856621907273557504	1.22	2.28
...	...	...	...	...
1181491160	5849591907847389440	5849591907847641984	0.46	4.05
1182531872	5875721183218108160	5875721183218108672	1.31	2.17
1183204944	5832534702911047040	5832534702911047680	1.06	3.69
1183975024	5824353752266031232	5824353752266031488	1.38	1.99
1183980544	5824702537975197440	5824702537975197184	0.86	3.65
1207468040	5238263820712678144	5238264022569299712	0.67	2.98
1209151256	5235047508682934144	5235047474323195904	1.17	3.21
1210454568	5842593653714409984	5842593653714410368	1.14	1.97
1214128920	5799016331422764288	5799016331422763264	0.17	3.15
1214648312	5795492357996370048	5795492362281899520	0.22	3.38

High Level Plots¶

Some numbers¶

Proper Motion¶

Radial Offset¶

Magnitude Changes¶

Reports¶

Checking p_value¶

Observed¶

Candidates¶

stars with d2d > 3 arcsec¶

condition	both	direct	indirect
any	8467466	489879 (5.8%)	13169 (0.2%)
guide or acq	436738	6991 (1.6%)	593 (0.1%)
observed	92432	2194 (2.4%)	1 (0.0%)
d2d > 3	7	418	1174
d2d > 5	2	187	7
abs(mag_pred - MAG) > 2.5	4528	643	1351
abs(mag_pred - MAG) > 5	642	111	503

	in both	disagree
any	8467466	4896 (0.058%)
candidates	436738	379 (0.087%)
observed	92432	43 (0.047%)
d2d_direct > 3 arcsec		7
d2d_indirect > 3 arcsec		259
d2d_today > 5		52
abs(mag_pred - MAG) > 5 (indirect)		632
abs(mag_pred - MAG) > 5 (direct)		74
p_relative_direct < 0.5		3
p_relative_indirect < 0.5		102