#####
##### Helper Functions
#####

hash_trait(x::Transformer, y) = x.result_method
hash_trait(::Transformer{<:Any,Nothing}, y) = hash_trait(y)
hash_trait(x) = StructType(x)

# how we hash when we haven't hoisted the type hash out of a loop
function hash_type_and_value(x, hash_state, context)
    transform = transformer(typeof(x), context)::Transformer
    if transform.hoist_type
        hash_state = hash_type!(hash_state, context, typeof(x))
    end
    tx = transform(x)
    if !transform.hoist_type
        hash_state = hash_type!(hash_state, context, typeof(tx))
    end
    return stable_hash_helper(tx, hash_state, context, hash_trait(transform, tx))
end

# how we hash when the type hash can be hoisted out of a loop
function hash_value(x, hash_state, context, transform::Transformer)
    tx = transform(x)
    return stable_hash_helper(tx, hash_state, context, hash_trait(transform, tx))
end

# There are two cases where we want to hash types:
#
#   1. when we are hashing the type of an object we're hashing (`TypeHashContext`)
#   2. when a value we're hashing is itself a type (`TypeAsValueContext`)
#
# These are handled as separate contexts because the kind of value we want to generate from
# the type may differ. By default only the structure of types matters when hashing an
# objects type, e.g. when we hash a StructTypes.DataType we hash that it is a data type, the
# field names and we hash each individual element type (as per its rules) but we do not hash
# the name of the type. When a type is hashed as a value, its actual name also matters.

#####
##### Type Hashes
#####

"""
    hash_type!(hash_state, context, T)

Hash type `T` in the given context, updating `hash_state`.
"""
function hash_type!(hash_state, context, ::Type{T}) where {T}
    type_context = TypeHashContext(context)
    transform = transformer(typeof(T), type_context)
    tT = transform(T)
    hash_type_state = similar_hash_state(hash_state)
    hash_type_state = stable_hash_helper(tT, hash_type_state, type_context,
                                         hash_trait(transform, tT))
    bytes = reinterpret(UInt8, asarray(compute_hash!(hash_type_state)))

    return update_hash!(hash_state, bytes)
end
asarray(x) = [x]
asarray(x::AbstractArray) = x

struct TypeHashContext{T}
    parent::T
end
TypeHashContext(x::TypeHashContext) = x
parent_context(x::TypeHashContext) = x.parent
hash_type!(hash_state, ::TypeHashContext, key::Type) = hash_state

# pair_structure: When the internal structure of a type is `nothing`, avoid additional
# tuple-nesting in the returned value to hash. This ensures that if we want two types to
# transform to the same string, the hashed value doesn't depend on how many transformations
# deep we go to "find" this identical string (unless there is distinct structure that *must*
# be hashed give its `StructType`).
pair_structure(x, ::Nothing) = x
pair_structure(x, y) = (x, y)
function transformer(::Type{T}, context::TypeHashContext) where {T<:Type}
    return Transformer(T -> pair_structure(transform_type(T, parent_context(context)),
                                           internal_type_structure_(T, StructType_(T))))
end
@inline StructType_(T) = StructType(T)
StructType_(::Type{Union{}}) = StructTypes.NoStructType()
internal_type_structure_(T, trait) = internal_type_structure(T, trait)

function internal_type_structure_(T, c::StructTypes.UnorderedStruct)
    if T === DataType
        return nothing
    else
        internal_type_structure(T, c)
    end
end

# NOTE: `internal_type_structure` implements mandatory elements of a type's structure that
# are always included in the hash; this ensures that the invariants required by type
# hoisting hold
internal_type_structure(T, trait) = nothing

#####
##### Hashing Types as Values
#####

struct TypeAsValue <: StructTypes.StructType end
hash_trait(::Type) = TypeAsValue()

struct TypeAsValueContext{T}
    parent::T
end
parent_context(x::TypeAsValueContext) = x.parent

function hash_type!(hash_state, ::Any, ::Type{<:Type})
    return update_hash!(hash_state, "Base.Type")
end
# these methods are required to avoid method ambiguities
function hash_type!(hash_state, ::TypeHashContext, ::Type{<:Type})
    return update_hash!(hash_state, "Base.Type")
end
function hash_type!(hash_state, ::TypeAsValueContext, ::Type{<:Type})
    return update_hash!(hash_state, "Base.Type")
end

function transformer(::Type{<:Type}, context::TypeAsValueContext)
    return Transformer(T -> pair_structure(transform_type_value(T, context),
                                           internal_type_structure(T, StructType_(T))))
end

hash_type!(hash_state, ::TypeAsValueContext, ::Type) = hash_state
function stable_hash_helper(::Type{T}, hash_state, context, ::TypeAsValue) where {T}
    type_context = TypeAsValueContext(context)
    transform = transformer(typeof(T), type_context)::Transformer
    tT = transform(T)
    return stable_hash_helper(tT, hash_state, type_context, hash_trait(transform, tT))
end

#####
##### Function Hashes
#####

# remember: functions can have fields; in general StructTypes doesn't assume these are
# serialized but here we want that to happen by default, so e.g. ==(2) will properly hash
# both the name of `==` and `2`.
hash_trait(::Function) = StructTypes.UnorderedStruct()

transform_type(::Type{T}) where {T<:Function} = nameof_string(T)

#####
##### DataType
#####

transform_type_by_trait(::Type{T}, ::StructTypes.DataType) where {T} = nameof_string(T)

sorted_field_names(T::Type) = TupleTools.sort(fieldnames(T); by=string)
@generated function sorted_field_names(T)
    return TupleTools.sort(fieldnames(T); by=string)
end

function internal_type_structure(::Type{T}, trait::StructTypes.DataType) where {T}
    if isconcretetype(T)
        fields = trait isa StructTypes.OrderedStruct ? fieldnames(T) : sorted_field_names(T)
        return fields, map(field -> fieldtype(T, field), fields)
    else
        return nothing
    end
end

function stable_hash_helper(x, hash_state, context, st::StructTypes.DataType)
    nested_hash_state = start_nested_hash!(hash_state)

    # hash the field values
    fields = st isa StructTypes.UnorderedStruct ? sorted_field_names(x) :
             fieldnames(typeof(x))
    nested_hash_state = hash_fields(x, fields, nested_hash_state, context)
    hash_state = end_nested_hash!(hash_state, nested_hash_state)
    return hash_state
end

function hash_fields(x, fields, hash_state, context)
    for field in fields
        val = getfield(x, field)
        # can we optimize away the field's type_hash?
        transform = transformer(typeof(val), context)
        if isconcretetype(fieldtype(typeof(x), field)) && transform.hoist_type
            # the fieldtype has been hashed as part of the type of the container
            hash_value(val, hash_state, context, transform)
        else
            hash_type_and_value(val, hash_state, context)
        end
    end
    return hash_state
end

#####
##### ArrayType
#####

"""
    is_ordered(x)

Indicates whether the order of the elements of object `x` are important to its hashed value.
If false, `x`'s elements will first be `collect`ed and `sort`'ed before hashing them. When
calling `sort`, [`hash_sort_by`](@ref) is passed as the `by` keyword argument.
If `x` is a `DictType`, the elements are sorted by their keys rather than their elements.
"""
is_ordered(x) = true
is_ordered(::AbstractSet) = false

"""
    `hash_sort_by(x)`

Defines how the elements of a hashed container `x` are `sort`ed if [`is_ordered`](@ref) of
`x` returns `false`. The return value of this function is passed to `sort` as the `by`
keyword.
"""
hash_sort_by(x::Symbol) = String(x)
hash_sort_by(x::Char) = string(x)
hash_sort_by(x) = x

function internal_type_structure(::Type{T}, ::StructTypes.ArrayType) where {T}
    return eltype(T)
end

# include ndims in type hash when we can
function transform_type(::Type{T}) where {T<:AbstractArray}
    return transform_type_by_trait(T, StructType(T)), ndims_(T)
end
function transform_type_value(::Type{T}) where {T<:AbstractArray}
    return nameof_string(T), ndims_(T)
end
ndims_(::Type{<:AbstractArray{<:Any,N}}) where {N} = N
ndims_(::Type{<:AbstractArray}) = nothing

function transformer(::Type{<:AbstractArray}, ::HashVersion{4})
    return Transformer(x -> (size(x), split_union(x)); hoist_type=true)
end

split_union(array) = TransformIdentity(array)
# NOTE: this method actually properly handles union splitting for as many splits as julia
# will allow to match to this method, not just two; in the case where the eltype is
# Union{Int, UInt, Char} for instance, M will match to Union{UInt, Char} and the `else`
# branch will properly split out the first type. The returned M_array will then be split
# again, when the `transformer` method above is applied to it.
function split_union(array::AbstractArray{Union{N,M}}) where {N,M}
    # NOTE: when an abstract array is e.g. AbstractArray{Int}, N becomes
    # Int and M is left as undefined, we just need to hash this array
    !@isdefined(M) && return TransformIdentity(array)
    # special case null and singleton-types, since we don't need to hash their content at
    # all
    if StructType(N) isa StructTypes.NullType ||
       StructType(N) isa StructTypes.SingletonType
        isM_array = isa.(array, M)
        return isM_array, convert(AbstractArray{M}, array[isM_array])
    elseif StructType(M) isa StructTypes.NullType ||
           StructType(M) isa StructTypes.SingletonType
        # I'm not actually sure if its possible to hit this `elseif` branch since "smaller"
        # types seem to occur first in the `Union`, but its here since I don't know that
        # this pattern is documented behavior or an implementation detail of the current
        # version of julia, nor do I know if all singleton-types count as smaller than
        # non-singleton types
        isN_array = isa.(array, N)
        return isN_array, convert(AbstractArray{N}, array[isN_array])
    else
        isN_array = isa.(array, N)
        N_array = convert(AbstractArray{N}, array[isN_array])
        M_array = convert(AbstractArray{M}, array[.!isN_array])
        return isN_array, N_array, M_array
    end
end

function stable_hash_helper(xs, hash_state, context, ::StructTypes.ArrayType)
    nested_hash_state = start_nested_hash!(hash_state)

    items = !is_ordered(xs) ? sort!(collect(xs); by=hash_sort_by) : xs
    transform = transformer(eltype(items), context)::Transformer
    nested_hash_state = hash_elements(items, nested_hash_state, context, transform)

    hash_state = end_nested_hash!(hash_state, nested_hash_state)
    return hash_state
end

function hash_elements(items, hash_state, context, transform)
    # can we optimize away the element type hash?
    if isconcretetype(eltype(items)) && transform.hoist_type
        # the eltype has already been hashed as part of the type structure of
        # the container
        for x in items
            hash_value(x, hash_state, context, transform)
        end
    else
        for x in items
            hash_type_and_value(x, hash_state, context)
        end
    end
    return hash_state
end

#####
##### AbstractRange
#####

transform_type(::Type{<:AbstractRange}) = "Base.AbstractRange"
function transformer(::Type{<:AbstractRange}, ::HashVersion{4})
    return Transformer(x -> (first(x), step(x), last(x)); hoist_type=true)
end

#####
##### Tuples
#####

function internal_type_structure(::Type{T}, ::StructTypes.ArrayType) where {T<:Tuple}
    if isconcretetype(T)
        fields = T <: StructTypes.OrderedStruct ? fieldnames(T) : sorted_field_names(T)
        return fields, map(field -> fieldtype(T, field), fields)
    else
        return nothing
    end
end

function internal_type_structure(::Type{T}, ::StructTypes.ArrayType) where {T<:NTuple}
    return eltype(T)
end

function stable_hash_helper(x::Tuple, hash_state, context, ::StructTypes.ArrayType)
    nested_hash_state = start_nested_hash!(hash_state)
    nested_hash_state = hash_fields(x, fieldnames(typeof(x)), nested_hash_state, context)
    hash_state = end_nested_hash!(hash_state, nested_hash_state)
    return hash_state
end

#####
##### DictType
#####

is_ordered(x::AbstractDict) = false

function internal_type_structure(::Type{T}, ::StructTypes.DictType) where {T}
    return keytype(T), valtype(T)
end

# `Pair` does not implement `keytype` or `valtype`
function internal_type_structure(::Type{<:Pair{K,V}}, ::StructTypes.DictType) where {K,V}
    return K, V
end

hash_trait(::Pair) = StructTypes.OrderedStruct()

function stable_hash_helper(x, hash_state, context, ::StructTypes.DictType)
    pairs = StructTypes.keyvaluepairs(x)
    nested_hash_state = start_nested_hash!(hash_state)

    pairs = if is_ordered(x)
        StructTypes.keyvaluepairs(x)
    else
        sort!(collect(StructTypes.keyvaluepairs(x)); by=hash_sort_by ∘ first)
    end
    transform = transformer(eltype(x), context)::Transformer
    hash_elements(pairs, nested_hash_state, context, transform)

    hash_state = end_nested_hash!(hash_state, nested_hash_state)
    return hash_state
end

#####
##### CustomStruct
#####

# we need to hash the type for every instance when we have a CustomStruct; `lowered` could
# be anything
function stable_hash_helper(x, hash_state, context, ::StructTypes.CustomStruct)
    return hash_type_and_value(StructTypes.lower(x), hash_state, context)
end

#####
##### Basic data types
#####

transform_type(::Type{Symbol}) = "Base.Symbol"
function transformer(::Type{<:Symbol}, ::HashVersion{4})
    return Transformer(String; hoist_type=true)
end

function stable_hash_helper(str, hash_state, context, ::StructTypes.StringType)
    nested_hash_state = start_nested_hash!(hash_state)
    update_hash!(nested_hash_state, str isa AbstractString ? str : string(str))
    return end_nested_hash!(hash_state, nested_hash_state)
end

function stable_hash_helper(number::T, hash_state, context,
                            ::StructTypes.NumberType) where {T}
    U = StructTypes.numbertype(T)
    return update_hash!(hash_state, U(number))
end

function stable_hash_helper(bool, hash_state, context, ::StructTypes.BoolType)
    return update_hash!(hash_state, Bool(bool))
end

# null types are encoded purely by their type hash
transform_type(::Type{Missing}) = "Base.Missing"
transform_type(::Type{Nothing}) = "Base.Nothing"
transform_type_by_trait(::Type{T}, ::StructTypes.NullType) where {T} = nameof_string(T)
stable_hash_helper(_, hash_state, context, ::StructTypes.NullType) = hash_state

# singleton types are encoded purely by their type hash
transform_type_by_trait(::Type{T}, ::StructTypes.SingletonType) where {T} = nameof_string(T)
stable_hash_helper(_, hash_state, context, ::StructTypes.SingletonType) = hash_state

#####
##### Regex
#####

# NOTE: we don't have great options for keeping the next few functions from depending on
# some internals of Base julia
#
# The underlying problem is that there is no public API for inspecting regex flags or the
# regex pattern of a regex.
#
# We can:
#
# 1. Use the string representation of regex: non-breaking Julia releases change this
# 2. Directly read private fields of Regex and use flag defaults to compute what relevant
#    regex flags have been marked (e.g. `r"a"i` has the `i` flag marked).
#
# An added complication is that the default options for PCRE change across Julia versions,
# so we can't just use all the bytes of `compile_options`; this will break compatibility
# across julia versions.
#
# It seems more likely that the string representation will change than that the fields and
# private bit masks will change; so for now, the second approach is taken.

pattern_(x::Regex)::String = x.pattern

function compile_options_(x::Regex)::UInt32
    # NOTE: using this mask kept the code from breaking on Julia 1.6 we can't change it now,
    # since we don't want the hash to change furthermore, the default flags could
    # conceivably change in a future julia version. In our tests, we verify that this mask
    # properly captures the state all documented regex flags.
    mask = ~Base.DEFAULT_COMPILER_OPTS | Base.PCRE.UCP
    return x.compile_options & mask
end

# NOTE: we can safely hoist here because
# 1. the input type is concrete
# 2. all output types are primitive, concrete types
function transformer(::Type{Regex}, ::HashVersion{4})
    # This skips the compiled regex which is stored as a Ptr{Nothing}
    return Transformer(x -> (pattern_(x), compile_options_(x)); hoist_type=true)
end
