// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// locdistance.h
// created: 2019may08 Markus W. Scherer

#ifndef __LOCDISTANCE_H__
#define __LOCDISTANCE_H__

#include "unicode/utypes.h"
#include "unicode/bytestrie.h"
#include "unicode/localematcher.h"
#include "unicode/locid.h"
#include "unicode/uobject.h"
#include "lsr.h"

U_NAMESPACE_BEGIN

struct LocaleDistanceData;

/**
 * Offline-built data for LocaleMatcher.
 * Mostly but not only the data for mapping locales to their maximized forms.
 */
class LocaleDistance final : public UMemory {
public:
    static const LocaleDistance *getSingleton(UErrorCode &errorCode);

    static int32_t shiftDistance(int32_t distance) {
        return distance << DISTANCE_SHIFT;
    }

    static int32_t getShiftedDistance(int32_t indexAndDistance) {
        return indexAndDistance & DISTANCE_MASK;
    }

    static double getDistanceDouble(int32_t indexAndDistance) {
        double shiftedDistance = getShiftedDistance(indexAndDistance);
        return shiftedDistance / (1 << DISTANCE_SHIFT);
    }

    static int32_t getDistanceFloor(int32_t indexAndDistance) {
        return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT;
    }

    static int32_t getIndex(int32_t indexAndDistance) {
        // assert indexAndDistance >= 0;
        return indexAndDistance >> INDEX_SHIFT;
    }

    /**
     * Finds the supported LSR with the smallest distance from the desired one.
     * Equivalent LSR subtags must be normalized into a canonical form.
     *
     * <p>Returns the index of the lowest-distance supported LSR in the high bits
     * (negative if none has a distance below the threshold),
     * and its distance (0..ABOVE_THRESHOLD) in the low bits.
     */
    int32_t getBestIndexAndDistance(const LSR &desired,
                                    const LSR **supportedLSRs, int32_t supportedLSRsLength,
                                    int32_t shiftedThreshold,
                                    ULocMatchFavorSubtag favorSubtag,
                                    ULocMatchDirection direction) const;

    UBool isParadigmLSR(const LSR &lsr) const;

    int32_t getDefaultScriptDistance() const {
        return defaultScriptDistance;
    }

    int32_t getDefaultDemotionPerDesiredLocale() const {
        return defaultDemotionPerDesiredLocale;
    }

private:
    // The distance is shifted left to gain some fraction bits.
    static constexpr int32_t DISTANCE_SHIFT = 3;
    static constexpr int32_t DISTANCE_FRACTION_MASK = 7;
    // 7 bits for 0..100
    static constexpr int32_t DISTANCE_INT_SHIFT = 7;
    static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT;
    static constexpr int32_t DISTANCE_MASK = 0x3ff;
    // tic constexpr int32_t MAX_INDEX = 0x1fffff;  // avoids sign bit
    static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;

    LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
    LocaleDistance(const LocaleDistance &other) = delete;
    LocaleDistance &operator=(const LocaleDistance &other) = delete;

    static void initLocaleDistance(UErrorCode &errorCode);

    UBool isMatch(const LSR &desired, const LSR &supported,
                  int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
        const LSR *pSupp = &supported;
        return getBestIndexAndDistance(
            desired, &pSupp, 1,
            shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0;
    }

    static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState,
                                            const char *desired, const char *supported);

    static int32_t getRegionPartitionsDistance(
        BytesTrie &iter, uint64_t startState,
        const char *desiredPartitions, const char *supportedPartitions,
        int32_t threshold);

    static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState);

    static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue);

    const char *partitionsForRegion(const LSR &lsr) const {
        // ill-formed region -> one non-matching string
        int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex];
        return partitionArrays[pIndex];
    }

    int32_t getDefaultRegionDistance() const {
        return defaultRegionDistance;
    }

    const XLikelySubtags &likelySubtags;

    // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
    // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.
    // There is also a trie value for each subsequence of whole subtags.
    // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"".
    BytesTrie trie;

    /**
     * Maps each region to zero or more single-character partitions.
     */
    const uint8_t *regionToPartitionsIndex;
    const char **partitionArrays;

    /**
     * Used to get the paradigm region for a cluster, if there is one.
     */
    const LSR *paradigmLSRs;
    int32_t paradigmLSRsLength;

    int32_t defaultLanguageDistance;
    int32_t defaultScriptDistance;
    int32_t defaultRegionDistance;
    int32_t minRegionDistance;
    int32_t defaultDemotionPerDesiredLocale;
};

U_NAMESPACE_END

#endif  // __LOCDISTANCE_H__