/**
 * Capitalizes the first letter and lowercases the rest of the string
 */
export function capitalizeFirstLetter(str: string): string {
  if (str.length === 0) {
    return "";
  }

  return str[0]!.toUpperCase() + str.slice(1).toLowerCase();
}

/**
 * Replaces accented and other extended characters with the basic latin equivalent
 * @param str
 */
export function toBasicLatin(str: string): string {
  // https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript
  // if this isn't sufficient, lodash deburr might also be a good option
  return str.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
}

/**
 * Trims the string and returns undefined if there is nothing left.
 * Returns undefined if undefined is passed in.
 */
export function emptyToUndefined(str?: string): string | undefined {
  if (!str) {
    return undefined;
  }

  const trimmed = str.trim();
  return trimmed.length === 0 ? undefined : trimmed;
}

export function ellipsizeSentence(str: string, maxLen: number): string {
  const trimmed = str.trim();
  if (trimmed.length <= maxLen) {
    return trimmed;
  }

  // save room for the ellipsis
  const candidate = trimmed.substring(0, maxLen - 2);
  const end = candidate.lastIndexOf(" ");
  const chunk = candidate.substring(0, end).trim();
  return `${chunk}...`;
}

/**
 * Returns an array containing objects with a sentence and the indicies for the sentence in the original string
 * This function simply splits on /[.][ ]{1,2}[A-Z]/ so it will not work as expected
 * if things are not formatted "normally". Will always return the original string as a sentence if multiple sentences
 * are not found.
 * @param str
 */
export function splitOnSentences(str: string): Array<{ sentence: string; indices: [number, number] }> {
  let r: RegExpExecArray | undefined | null;
  // allow for 1 or 2 spaces after the period
  const regex = /[.][ ]{1,2}[A-Z]/g;
  const indices: Array<[number, number]> = [];
  let start = 0;
  while ((r = regex.exec(str)) !== null) {
    indices.push([start, r.index]);
    start = r.index + (r[0]?.length ?? 1) - 1;
  }

  // if we're not at the end, push the remaining text. This also accounts for the case where no matches are found.
  if (start < str.length - 1) {
    indices.push([start, str.length - 1]);
  }

  return indices.map(i => {
    return { sentence: str.substring(i[0], i[1] + 1), indices: i };
  });
}

/**
 * Get the character length of a string accounting for double-width characters.
 * "😂".length is 2 charLength("😂") is 1.
 * @param str
 */
export function charLength(str: string): number {
  return [...str].length;
}

/**
 * Chunks the given string into chunks of the specified `chunkSizeBytes`.
 * @param str
 * @param chunkSizeBytes
 * @returns
 */
export function chunkString(str: string, chunkSizeBytes: number): string[] {
  const chunks: string[] = [];
  let currentChunk = "";
  let currentByteSize = 0;

  for (const char of str) {
    const charByteSize = utf8ByteSize(char);
    if (currentByteSize + charByteSize > chunkSizeBytes) {
      chunks.push(currentChunk);
      currentChunk = char;
      currentByteSize = charByteSize;
    } else {
      currentChunk += char;
      currentByteSize += charByteSize;
    }
  }

  if (currentChunk) {
    chunks.push(currentChunk);
  }

  return chunks;
}

/**
 * Iterates over each character of the string, checks its Unicode code point, and calculates the byte size based on UTF-8 encoding rules:
 * - Single-byte characters (standard ASCII): 1 byte.
 * - Two-byte characters: 2 bytes.
 * - Surrogate pairs (like many emojis): 4 bytes.
 * - Other characters: 3 bytes.
 * This method provides a direct way to calculate the byte size of a string in environments without TextEncoder (i.e. React Native).
 * @param str
 * @returns
 */
export function utf8ByteSize(str: string): number {
  let size = 0;
  for (let i = 0; i < str.length; i++) {
    const code = str.charCodeAt(i);
    if (code <= 0x7f) {
      size += 1;
    } else if (code <= 0x7ff) {
      size += 2;
    } else if (code >= 0xd800 && code <= 0xdbff) {
      // Surrogate pair: These take 4 bytes.
      size += 4;
      i++; // Skip the next character
    } else {
      size += 3;
    }
  }
  return size;
}

export function formatNumberWithCommas(number: number, locale: "en-US" = "en-US") {
  return new Intl.NumberFormat(locale).format(number);
}

/**
 * Replace the non-standard character sets some people use on social for captions. AI doesn't recognize these correctly.
 */
export function replaceUnicodeCharacters(input: string): string {
  const unicodeToAsciiMap: Record<string, string> = {
    // Bold Characters (Mathematical Bold)
    "𝐀": "A",
    "𝐁": "B",
    "𝐂": "C",
    "𝐃": "D",
    "𝐄": "E",
    "𝐅": "F",
    "𝐆": "G",
    "𝐇": "H",
    "𝐈": "I",
    "𝐉": "J",
    "𝐊": "K",
    "𝐋": "L",
    "𝐌": "M",
    "𝐍": "N",
    "𝐎": "O",
    "𝐏": "P",
    "𝐐": "Q",
    "𝐑": "R",
    "𝐒": "S",
    "𝐓": "T",
    "𝐔": "U",
    "𝐕": "V",
    "𝐖": "W",
    "𝐗": "X",
    "𝐘": "Y",
    "𝐙": "Z",
    "𝐚": "a",
    "𝐛": "b",
    "𝐜": "c",
    "𝐝": "d",
    "𝐞": "e",
    "𝐟": "f",
    "𝐠": "g",
    "𝐡": "h",
    "𝐢": "i",
    "𝐣": "j",
    "𝐤": "k",
    "𝐥": "l",
    "𝐦": "m",
    "𝐧": "n",
    "𝐨": "o",
    "𝐩": "p",
    "𝐪": "q",
    "𝐫": "r",
    "𝐬": "s",
    "𝐭": "t",
    "𝐮": "u",
    "𝐯": "v",
    "𝐰": "w",
    "𝐱": "x",
    "𝐲": "y",
    "𝐳": "z",

    // Italic Characters (Mathematical Italic)
    "𝘈": "A",
    "𝘉": "B",
    "𝘊": "C",
    "𝘋": "D",
    "𝘌": "E",
    "𝘍": "F",
    "𝘎": "G",
    "𝘏": "H",
    "𝘐": "I",
    "𝘑": "J",
    "𝘒": "K",
    "𝘓": "L",
    "𝘔": "M",
    "𝘕": "N",
    "𝘖": "O",
    "𝘗": "P",
    "𝘘": "Q",
    "𝘙": "R",
    "𝘚": "S",
    "𝘛": "T",
    "𝘜": "U",
    "𝘝": "V",
    "𝘞": "W",
    "𝘟": "X",
    "𝘠": "Y",
    "𝘡": "Z",
    "𝘢": "a",
    "𝘣": "b",
    "𝘤": "c",
    "𝘥": "d",
    "𝘦": "e",
    "𝘧": "f",
    "𝘨": "g",
    "𝘩": "h",
    "𝘪": "i",
    "𝘫": "j",
    "𝘬": "k",
    "𝘭": "l",
    "𝘮": "m",
    "𝘯": "n",
    "𝘰": "o",
    "𝘱": "p",
    "𝘲": "q",
    "𝘳": "r",
    "𝘴": "s",
    "𝘵": "t",
    "𝘶": "u",
    "𝘷": "v",
    "𝘸": "w",
    "𝘹": "x",
    "𝘺": "y",
    "𝘻": "z",

    // Numbers (Mathematical Bold Digits)
    "𝟶": "0",
    "𝟷": "1",
    "𝟸": "2",
    "𝟹": "3",
    "𝟺": "4",
    "𝟻": "5",
    "𝟼": "6",
    "𝟽": "7",
    "𝟾": "8",
    "𝟿": "9",

    // Script Characters (Mathematical Script)
    "𝒜": "A",
    "𝒞": "C",
    "𝒟": "D",
    "𝒢": "G",
    "𝒥": "J",
    "𝒦": "K",
    "𝒩": "N",
    "𝒪": "O",
    "𝒬": "Q",
    "𝒮": "S",
    "𝒯": "T",
    "𝒰": "U",
    "𝒱": "V",
    "𝒲": "W",
    "𝒳": "X",
    "𝒴": "Y",
    "𝒵": "Z",
    "𝒶": "a",
    "𝒷": "b",
    "𝒸": "c",
    "𝒹": "d",
    "𝒻": "f",
    "𝒽": "h",
    "𝒾": "i",
    "𝒿": "j",
    "𝓀": "k",
    "𝓁": "l",
    "𝓂": "m",
    "𝓃": "n",
    "𝓅": "p",
    "𝓆": "q",
    "𝓇": "r",
    "𝓈": "s",
    "𝓉": "t",
    "𝓊": "u",
    "𝓋": "v",
    "𝓌": "w",
    "𝓍": "x",
    "𝓎": "y",
    "𝓏": "z",
  };

  return [...input].map(char => unicodeToAsciiMap[char] ?? char).join("");
}
