Issue 6

6. Cross-Rate Inconsistency

This notebook documents the analysis for Issue #6: Cross-Rate Inconsistency in the Autonity Oracle data. It covers:

What is this issue about?
Why conduct this issue analysis?
How to conduct this issue analysis?
What are the results?

6.1 What Is This Issue About?

In the Issues Table, Issue #6 describes Cross-Rate Inconsistency:
> “NTN-USD * ATN-USD ≠ NTN-ATN (when scaling properly).”

This implies that if NTN-ATN = X and ATN-USD = Y, expect NTN-USD ≈ X * Y. Significant mismatches indicate potential errors or data inconsistencies.

6.2 Why Conduct This Issue Analysis?

Data Reliability: Ensuring consistent data across pairs is critical.
Trust: Maintaining accuracy and confidence in the Oracle.
Debugging: Identifying systemic or validator-specific issues.

6.3 How to Conduct the Analysis?

Use Python with the Polars library (v1.24.0) to:

Load and preprocess Oracle submission CSV files.
Convert Wei-based prices to decimal.
Calculate NTN-USD_estimated = NTN-ATN * ATN-USD.
Flag rows exceeding a threshold (10%).
Summarize by date and validator.

Below is the Python code for the analysis:

import polars as pl
import glob
import math
from typing import Optional
import warnings

warnings.filterwarnings("ignore")

def load_and_preprocess_submissions(submission_glob: str) -> pl.DataFrame:
    """
    Loads Oracle Submission CSVs and returns a Polars DataFrame.
    """
    files = sorted(glob.glob(submission_glob))
    if not files:
        raise ValueError(f"No CSV files found matching pattern {submission_glob}")

    lf_list = []
    for f in files:
        lf_temp = pl.scan_csv(
            f,
            dtypes={"Timestamp": pl.Utf8},
            null_values=[""],
            ignore_errors=True,
        )
        lf_list.append(lf_temp)

    lf = pl.concat(lf_list)

    lf = lf.with_columns(
        pl.col("Timestamp")
        .str.strptime(pl.Datetime, strict=False)
        .alias("Timestamp_dt")
    )

    lf = lf.with_columns(
        [
            pl.col("Timestamp_dt").cast(pl.Date).alias("date_only"),
            pl.col("Timestamp_dt")
            .dt.weekday()
            .alias("weekday_num"),
        ]
    )

    df = lf.collect()
    return df


def convert_wei_to_decimal(price_wei: Optional[float]) -> Optional[float]:
    """
    Converts a Wei-based price to a normal float decimal.
    """
    if price_wei is None or math.isnan(price_wei):
        return None
    return price_wei / 1e18


def check_cross_rate_inconsistency(
    df: pl.DataFrame,
    atn_usd_col: str = "ATN-USD Price",
    ntn_usd_col: str = "NTN-USD Price",
    ntn_atn_col: str = "NTN-ATN Price",
    threshold: float = 0.01,
) -> pl.DataFrame:
    """
    Computes cross-rate mismatch and measures relative differences.
    """
    df_local = df.clone()
    decimal_cols = []
    for col in [atn_usd_col, ntn_usd_col, ntn_atn_col]:
        col_decimal = col + " Decimal"
        df_local = df_local.with_columns(
            (pl.col(col).cast(pl.Float64) / 1e18).alias(col_decimal)
        )
        decimal_cols.append(col_decimal)

    atn_usd_dec = atn_usd_col + " Decimal"
    ntn_usd_dec = ntn_usd_col + " Decimal"
    ntn_atn_dec = ntn_atn_col + " Decimal"

    df_local = df_local.with_columns(
        [(pl.col(ntn_atn_dec) * pl.col(atn_usd_dec)).alias("ntn_usd_estimated")]
    )

    epsilon = 1e-18
    df_local = df_local.with_columns(
        [
            (
                (pl.col("ntn_usd_estimated") - pl.col(ntn_usd_dec)).abs()
                / (pl.col(ntn_usd_dec).abs() + epsilon)
            ).alias("rel_diff_cross")
        ]
    )

    df_local = df_local.with_columns(
        [
            pl.when(pl.col("rel_diff_cross") > threshold)
            .then(pl.lit(f"Cross-rate mismatch > {int(threshold*100)}%"))
            .otherwise(pl.lit(""))
            .alias("suspect_reason")
        ]
    )

    df_flagged = df_local.filter(pl.col("suspect_reason") != "")

    keep_cols = [
        "Timestamp_dt",
        "Validator Address",
        atn_usd_dec,
        ntn_usd_dec,
        ntn_atn_dec,
        "ntn_usd_estimated",
        "rel_diff_cross",
        "suspect_reason",
    ]

    optional_cols = []
    for c in ["date_only", "weekday_num"]:
        if c in df_flagged.columns:
            optional_cols.append(c)

    df_flagged = df_flagged.select(keep_cols + optional_cols)
    return df_flagged


def summarize_cross_rate_inconsistency(df_flagged: pl.DataFrame) -> pl.DataFrame:
    """
    Summarizes the number of cross-rate mismatches.
    """
    if df_flagged.is_empty():
        return pl.DataFrame(
            {
                "date_only": [],
                "Validator Address": [],
                "mismatch_count": [],
                "avg_rel_diff": [],
                "max_rel_diff": [],
            }
        )

    lf = df_flagged.lazy()
    summary_lf = (
        lf.group_by(["date_only", "Validator Address"])
        .agg(
            [
                pl.count("rel_diff_cross").alias("mismatch_count"),
                pl.mean("rel_diff_cross").alias("avg_rel_diff"),
                pl.max("rel_diff_cross").alias("max_rel_diff"),
            ]
        )
        .sort(["date_only", "Validator Address"])
    )
    return summary_lf.collect()


def analyze_cross_rate_inconsistency(
    submission_glob: str,
    atn_usd_col: str = "ATN-USD Price",
    ntn_usd_col: str = "NTN-USD Price",
    ntn_atn_col: str = "NTN-ATN Price",
    threshold: float = 0.01,
):
    """
    Main analysis function.
    """
    df_all = load_and_preprocess_submissions(submission_glob)

    df_flagged = check_cross_rate_inconsistency(
        df_all,
        atn_usd_col=atn_usd_col,
        ntn_usd_col=ntn_usd_col,
        ntn_atn_col=ntn_atn_col,
        threshold=threshold,
    )

    df_summary = summarize_cross_rate_inconsistency(df_flagged)

    return {
        "df_flagged": df_flagged,
        "df_summary": df_summary,
    }

results = analyze_cross_rate_inconsistency(
    submission_glob="../submission-data/Oracle_Submission_*.csv",
    atn_usd_col="ATN-USD Price",
    ntn_usd_col="NTN-USD Price",
    ntn_atn_col="NTN-ATN Price",
    threshold=0.01,  # 1% mismatch threshold
)

6.4 What are the resuults?

Below presents the results from the analysis:

Flagged Submissions (Cross-rate mismatch > 10%)

df_flagged = results["df_flagged"]
if df_flagged.is_empty():
    print("No cross-rate inconsistencies found beyond 10% threshold.")
else:
    print(f"Total flagged cross-rate mismatches: {df_flagged.height}")
    df_flagged

No cross-rate inconsistencies found beyond 10% threshold.

Timestamp_dt and Validator Address indicate problematic submissions.
Prices (ATN-USD, NTN-USD, NTN-ATN) and estimated NTN-USD.
rel_diff_cross shows percentage mismatch.

Daily Validator Summary

df_summary = results["df_summary"]
if df_summary.is_empty():
    print("No daily cross-rate mismatches to summarize.")
else:
    print(f"Number of daily mismatch records: {df_summary.height}")
    df_summary

No daily cross-rate mismatches to summarize.

mismatch_count: Number of daily flagged submissions per validator.
avg_rel_diff and max_rel_diff: Mean and maximum relative mismatch percentages.

Combine both findings

total_flagged = df_flagged.height
if total_flagged == 0:
    print("Data consistency is good. No critical cross-rate mismatches identified.")
else:
    num_validators = df_summary.get_column("Validator Address").n_unique()
    print(f"{total_flagged} total mismatches across {num_validators} validators found. Investigate further:")
    high_impact = df_summary.filter(pl.col("max_rel_diff") > 0.5)
    if high_impact.is_empty():
        print("No extreme mismatches (>50%) identified. Likely minor synchronization issues or rounding errors.")
    else:
        print(f"Validators with extreme mismatches (>50%): {high_impact.height}")
        high_impact

Data consistency is good. No critical cross-rate mismatches identified.

If no mismatches, system appears healthy.
Small mismatches (<50%) often reflect minor timing or rounding discrepancies.
Extreme mismatches (>50%) require thorough validator-specific investigation:
- Verify decimal conversions.
- Confirm synchronized data updates.
- Review validator configurations or potential malicious behavior.

List of all Validators and their Consistency Indices

df_all = load_and_preprocess_submissions("../submission-data/Oracle_Submission_*.csv")

total_stats = (
    df_all.lazy()
    .group_by("Validator Address")
    .agg(pl.count().alias("total_submissions"))
    .collect()
)

flagged_stats = (
    df_flagged.lazy()
    .group_by("Validator Address")
    .agg(
        pl.count().alias("flagged_count"),
        pl.mean("rel_diff_cross").alias("mean_rel_diff"),
        pl.max("rel_diff_cross").alias("max_rel_diff"),
    )
    .collect()
)

validator_stats = (
    total_stats
    .join(flagged_stats, on="Validator Address", how="left")
    .fill_null(0)
)

validator_stats = (
    validator_stats
    .with_columns(
        (
            (pl.col("flagged_count") / pl.col("total_submissions")) *
            (pl.col("mean_rel_diff") + pl.col("max_rel_diff"))
        ).alias("mismatch_penalty_rate")
    )
    .with_columns(
        (1 / (1 + pl.col("mismatch_penalty_rate")))
        .alias("consistency_index")
    )
    .sort("consistency_index", descending=False)
)

print("Validator Consistency Ranking (higher index = better)\n")
for row in validator_stats.to_dicts():
    print(
        f"Validator {row['Validator Address']}: "
        f"subs={row['total_submissions']:5d}, "
        f"flags={row['flagged_count']:3d}, "
        f"index={row['consistency_index']:.4f}"
    )

Validator Consistency Ranking (higher index = better)

Validator 0x383A3c437d3F12f60E5fC990119468D3561EfBfc: subs= 2880, flags=  0, index=1.0000
Validator 0xDCA5DFF3D42f2db3C18dBE823380A0A81db49A7E: subs= 2880, flags=  0, index=1.0000
Validator 0xcf716b3930d7cf6f2ADAD90A27c39fDc9D643BBd: subs= 2880, flags=  0, index=1.0000
Validator 0xbfDcAF35f52F9ef423ac8F2621F9eef8be6dEd17: subs= 2833, flags=  0, index=1.0000
Validator 0x1Be7f70BCf8393a7e4A5BcC66F6f15d6e35cfBBC: subs= 2880, flags=  0, index=1.0000
Validator 0x64F83c2538A646A550Ad9bEEb63427a377359DEE: subs= 2880, flags=  0, index=1.0000
Validator 0xB5d8be2AB4b6d7E6be7Ea28E91b370223a06289f: subs= 2880, flags=  0, index=1.0000
Validator 0x5603caFE3313D0cf56Fd4bE4A2f606dD6E43F8Eb: subs= 2880, flags=  0, index=1.0000
Validator 0xdF239e0D5b4E6e820B0cFEF6972A90893c2073AB: subs= 2880, flags=  0, index=1.0000
Validator 0xBE287C82A786218E008FF97320b08244BE4A282c: subs= 2879, flags=  0, index=1.0000
Validator 0x5E17e837DcBa2728C94f95c38fA8a47CB9C8818F: subs= 2876, flags=  0, index=1.0000
Validator 0x551f3300FCFE0e392178b3542c009948008B2a9F: subs= 2880, flags=  0, index=1.0000
Validator 0xE4686A4C6E63A8ab51B458c52EB779AEcf0B74f7: subs= 2880, flags=  0, index=1.0000
Validator 0x100E38f7BCEc53937BDd79ADE46F34362470577B: subs= 2876, flags=  0, index=1.0000
Validator 0xF9B38D02959379d43C764064dE201324d5e12931: subs= 2880, flags=  0, index=1.0000
Validator 0x3fe573552E14a0FC11Da25E43Fef11e16a785068: subs= 2880, flags=  0, index=1.0000
Validator 0x718361fc3637199F24a2437331677D6B89a40519: subs= 2880, flags=  0, index=1.0000
Validator 0x358488a4EdCA493FCD87610dcd50c62c8A3Dd658: subs= 2880, flags=  0, index=1.0000
Validator 0x527192F3D2408C84087607b7feE1d0f907821E17: subs= 2880, flags=  0, index=1.0000
Validator 0x9C7dAABb5101623340C925CFD6fF74088ff5672e: subs= 2880, flags=  0, index=1.0000
Validator 0x6a395dE946c0493157404E2b1947493c633f569E: subs= 2874, flags=  0, index=1.0000
Validator 0x2928FE5b911BCAf837cAd93eB9626E86a189f1dd: subs= 2829, flags=  0, index=1.0000
Validator 0x9d28e40E9Ec4789f9A0D17e421F76D8D0868EA44: subs= 2880, flags=  0, index=1.0000
Validator 0x01F788E4371a70D579C178Ea7F48E04e8B2CD743: subs= 2837, flags=  0, index=1.0000
Validator 0xDF2D0052ea56A860443039619f6DAe4434bc0Ac4: subs= 2879, flags=  0, index=1.0000
Validator 0x8f91e0ADF8065C3fFF92297267E02DF32C2978FF: subs= 2879, flags=  0, index=1.0000
Validator 0x3597d2D42f8Fbbc82E8b1046048773aD6DDB717E: subs= 2880, flags=  0, index=1.0000
Validator 0x7232e75a8bFd8c9ab002BB3A00eAa885BC72A6dd: subs= 2877, flags=  0, index=1.0000
Validator 0x8584A78A9b94f332A34BBf24D2AF83367Da31894: subs= 2879, flags=  0, index=1.0000
Validator 0xc5B9d978715F081E226cb28bADB7Ba4cde5f9775: subs= 2879, flags=  0, index=1.0000
Validator 0xd625d50B0d087861c286d726eC51Cf4Bd9c54357: subs= 2856, flags=  0, index=1.0000
Validator 0x00a96aaED75015Bb44cED878D927dcb15ec1FF54: subs= 2866, flags=  0, index=1.0000
Validator 0x197B2c44b887c4aC01243BDE7E4bBa8bd95BC3a8: subs= 2880, flags=  0, index=1.0000
Validator 0x24915749B793375a8C93090AF19928aFF1CAEcb6: subs= 2880, flags=  0, index=1.0000
Validator 0xcdEed21b471b0Dc54faF74480A0E700fCc42a7b6: subs= 2880, flags=  0, index=1.0000
Validator 0x19E356ebC20283fc74AF0BA4C179502A1F62fA7B: subs= 2833, flags=  0, index=1.0000
Validator 0x831B837C3DA1B6c2AB68a690206bDfF368877E19: subs= 2863, flags=  0, index=1.0000
Validator 0x36142A4f36974e2935192A1111C39330aA296D3C: subs= 2880, flags=  0, index=1.0000
Validator 0xfD97FB8835d25740A2Da27c69762D74F6A931858: subs= 2880, flags=  0, index=1.0000
Validator 0x94d28f08Ff81A80f4716C0a8EfC6CAC2Ec74d09E: subs= 2880, flags=  0, index=1.0000
Validator 0xD9fDab408dF7Ae751691BeC2efE3b713ba3f9C36: subs= 2880, flags=  0, index=1.0000
Validator 0xE9FFF86CAdC3136b3D94948B8Fd23631EDaa2dE3: subs= 2880, flags=  0, index=1.0000
Validator 0xBBf36374eb23968F25aecAEbb97BF3118f3c2fEC: subs= 2880, flags=  0, index=1.0000
Validator 0xf34CD6c09a59d7D3d1a6C3dC231a7834E5615D6A: subs= 2834, flags=  0, index=1.0000
Validator 0x3AaF7817618728ffEF81898E11A3171C33faAE41: subs= 2874, flags=  0, index=1.0000
Validator 0x94470A842Ea4f44e668EB9C2AB81367b6Ce01772: subs= 2880, flags=  0, index=1.0000
Validator 0x4cD134001EEF0843B9c69Ba9569d11fDcF4bd495: subs= 2823, flags=  0, index=1.0000
Validator 0x984A46Ec685Bb41A7BBb2bc39f80C78410ff4057: subs= 2877, flags=  0, index=1.0000
Validator 0x22A76e194A49c9e5508Cd4A3E1cD555D088ECB08: subs= 2880, flags=  0, index=1.0000
Validator 0x26E2724dBD14Fbd52be430B97043AA4c83F05852: subs= 2880, flags=  0, index=1.0000
Validator 0x791A7F840ac11841cCB0FaA968B2e3a0Db930fCe: subs= 2880, flags=  0, index=1.0000
Validator 0x6747c02DE7eb2099265e55715Ba2E03e8563D051: subs= 2840, flags=  0, index=1.0000
Validator 0xEf0Ba5e345C2C3937df5667A870Aae5105CAa3a5: subs= 2879, flags=  0, index=1.0000
Validator 0xC1F9acAF1824F6C906b35A0D2584D6E25077C7f5: subs= 2880, flags=  0, index=1.0000
Validator 0x1476A65D7B5739dE1805d5130441A94022Ee49fe: subs= 2462, flags=  0, index=1.0000
Validator 0xf10f56Bf0A28E0737c7e6bB0aF92f3DDad34aE6a: subs= 2880, flags=  0, index=1.0000
Validator 0x99E2B4B27BDe92b42D04B6CF302cF564D2C13b74: subs= 2880, flags=  0, index=1.0000
Validator 0xd61a48b0e11B0Dc6b7Bd713B1012563c52591BAA: subs= 2873, flags=  0, index=1.0000
Validator 0x59031767f20EA8F4a3d90d33aB0DAA2ca469Fd9a: subs= 2880, flags=  0, index=1.0000
Validator 0x23b4Be9536F93b8D550214912fD0e38417Ff7209: subs= 2880, flags=  0, index=1.0000

Please note, total_submissions represents the total number of submissions for this validator. flagged_count is how many of those were caught by the 1 % cross-rate check. consistency_index is a 0-to-1 reliability score (1 = no mismatches; lower values indicate more frequent or larger inconsistencies).