import { DuplicateRecordHash, Field, RecordDataSet } from "fuse-importer";
import { useBatchProcessing } from "./useBatchProcessing";
import { rowIsEmpty } from "./utils";

const findDuplicateHashes = (sortedHashes: DuplicateRecordHash[]) => {
  let results = [];
  for (let i = 0; i < sortedHashes.length - 1; i++) {
    if (
      sortedHashes[i + 1].hash == sortedHashes[i].hash ||
      sortedHashes[i - 1]?.hash == sortedHashes[i].hash
    ) {
      results.push(sortedHashes[i]);
    }
  }
  return results;
};

function compareHashes(a, b) {
  if (a.hash < b.hash) return -1;
  if (a.hash > b.hash) return 1;
  return 0;
}

// generate hashes that can be used for comparing two records
// for the sake of tracking duplicates
// if the hash for one record equals the hash for another,
// the two records are duplicates
export const RecordHashCalculator = () => {
  const computeHashesForBatch = (dataSet: RecordDataSet, fields) => {
    const hashMap = [];

    for (const recordKey in dataSet) {
      const record = dataSet[recordKey];
      const key = fields.map((field) => record[field.name]).join("|");

      if (!rowIsEmpty(record)) {
        hashMap.push({ id: record._meta.id, hash: key });
      }
    }

    return hashMap;
  };

  let calculatingHashes = false;

  const computeHashesForRecords = async (
    dataSet: RecordDataSet,
    fields: Field[],
    _onHashesComputed: (x: any) => void
  ) => {
    if (!dataSet || calculatingHashes) {
      return;
    }
    const { processDataSetInBatches } = useBatchProcessing();

    calculatingHashes = true;

    let results = [];
    processDataSetInBatches(dataSet, (batch) => {
      results = results.concat(computeHashesForBatch(batch, fields));
    });
    results.sort(compareHashes);
    const duplicateHashes = findDuplicateHashes(results);

    _onHashesComputed(duplicateHashes);
    calculatingHashes = false;
  };

  return {
    computeHashesForRecords,
  };
};

// generate hashes that can be used for comparing the values of a specific field
// for the sake of tracking uniqueness
export const RecordValueHashCalculator = () => {
  let columnsBeingCalculated = [];

  const computeHashesForBatch = (dataSet: RecordDataSet, field) => {
    const hashMap = [];

    const isCaseInsensitive =
      field.validations.find((v) => {
        return v.name === "unique_case_insensitive";
      }) !== undefined;

    for (const recordKey in dataSet) {
      const record = dataSet[recordKey];

      let key = record[field.name];
      if (isCaseInsensitive) {
        key = key.toLowerCase();
      }

      if (key) {
        hashMap.push({ id: record._meta.id, hash: key });
      }
    }

    return hashMap;
  };

  const computeDuplicateValueHashes = async (
    dataSet: RecordDataSet,
    _field: Field,
    _onDuplicateHashesComputed: (x: any[]) => void
  ) => {
    if (!dataSet || columnsBeingCalculated[_field.name]) {
      return;
    }
    columnsBeingCalculated[_field.name] = true;

    const { processDataSetInBatches } = useBatchProcessing();

    let results = [];
    processDataSetInBatches(dataSet, (batch) => {
      results = results.concat(computeHashesForBatch(batch, _field));
    });
    results.sort(compareHashes);

    _onDuplicateHashesComputed(results);
    columnsBeingCalculated[_field.name] = false;
  };

  return {
    computeDuplicateValueHashes,
  };
};
