cat-bookmarker/deps/jason/lib/decoder.ex

705 lines
24 KiB
Elixir
Raw Normal View History

2024-03-10 18:52:04 +00:00
defmodule Jason.DecodeError do
@type t :: %__MODULE__{position: integer, data: String.t}
defexception [:position, :token, :data]
def message(%{position: position, token: token}) when is_binary(token) do
"unexpected sequence at position #{position}: #{inspect token}"
end
def message(%{position: position, data: data}) when position == byte_size(data) do
"unexpected end of input at position #{position}"
end
def message(%{position: position, data: data}) do
byte = :binary.at(data, position)
str = <<byte>>
if String.printable?(str) do
"unexpected byte at position #{position}: " <>
"#{inspect byte, base: :hex} (#{inspect str})"
else
"unexpected byte at position #{position}: " <>
"#{inspect byte, base: :hex}"
end
end
end
defmodule Jason.Decoder do
@moduledoc false
import Bitwise
alias Jason.{DecodeError, Codegen}
import Codegen, only: [bytecase: 2, bytecase: 3]
import Record
@dialyzer :no_improper_lists
# @compile :native
# We use integers instead of atoms to take advantage of the jump table
# optimization
@terminate 0
@array 1
@key 2
@object 3
defrecordp :decode, [keys: nil, strings: nil, objects: nil, floats: nil]
def parse(data, opts) when is_binary(data) do
key_decode = key_decode_function(opts)
string_decode = string_decode_function(opts)
float_decode = float_decode_function(opts)
object_decode = object_decode_function(opts)
decode = decode(keys: key_decode, strings: string_decode, objects: object_decode, floats: float_decode)
try do
value(data, data, 0, [@terminate], decode)
catch
{:position, position} ->
{:error, %DecodeError{position: position, data: data}}
{:token, token, position} ->
{:error, %DecodeError{token: token, position: position, data: data}}
else
value ->
{:ok, value}
end
end
defp key_decode_function(%{keys: :atoms}), do: &String.to_atom/1
defp key_decode_function(%{keys: :atoms!}), do: &String.to_existing_atom/1
defp key_decode_function(%{keys: :strings}), do: &(&1)
defp key_decode_function(%{keys: fun}) when is_function(fun, 1), do: fun
defp string_decode_function(%{strings: :copy}), do: &:binary.copy/1
defp string_decode_function(%{strings: :reference}), do: &(&1)
defp object_decode_function(%{objects: :maps}), do: &:maps.from_list/1
defp object_decode_function(%{objects: :ordered_objects}), do: &Jason.OrderedObject.new(:lists.reverse(&1))
defp float_decode_function(%{floats: :native}) do
fn string, token, skip ->
try do
:erlang.binary_to_float(string)
catch
:error, :badarg ->
token_error(token, skip)
end
end
end
defp float_decode_function(%{floats: :decimals}) do
fn string, token, skip ->
# silence xref warning
decimal = Decimal
try do
decimal.new(string)
rescue
Decimal.Error ->
token_error(token, skip)
end
end
end
defp value(data, original, skip, stack, decode) do
bytecase data do
_ in '\s\n\t\r', rest ->
value(rest, original, skip + 1, stack, decode)
_ in '0', rest ->
number_zero(rest, original, skip, stack, decode, 1)
_ in '123456789', rest ->
number(rest, original, skip, stack, decode, 1)
_ in '-', rest ->
number_minus(rest, original, skip, stack, decode)
_ in '"', rest ->
string(rest, original, skip + 1, stack, decode, 0)
_ in '[', rest ->
array(rest, original, skip + 1, stack, decode)
_ in '{', rest ->
object(rest, original, skip + 1, stack, decode)
_ in ']', rest ->
empty_array(rest, original, skip + 1, stack, decode)
_ in 't', rest ->
case rest do
<<"rue", rest::bits>> ->
continue(rest, original, skip + 4, stack, decode, true)
<<_::bits>> ->
error(original, skip)
end
_ in 'f', rest ->
case rest do
<<"alse", rest::bits>> ->
continue(rest, original, skip + 5, stack, decode, false)
<<_::bits>> ->
error(original, skip)
end
_ in 'n', rest ->
case rest do
<<"ull", rest::bits>> ->
continue(rest, original, skip + 4, stack, decode, nil)
<<_::bits>> ->
error(original, skip)
end
_, rest ->
error(rest, original, skip + 1, stack, decode)
<<_::bits>> ->
error(original, skip)
end
end
defp number_minus(<<?0, rest::bits>>, original, skip, stack, decode) do
number_zero(rest, original, skip, stack, decode, 2)
end
defp number_minus(<<byte, rest::bits>>, original, skip, stack, decode)
when byte in '123456789' do
number(rest, original, skip, stack, decode, 2)
end
defp number_minus(<<_rest::bits>>, original, skip, _stack, _decode) do
error(original, skip + 1)
end
if function_exported?(Application, :compile_env, 3) do
@integer_digit_limit Application.compile_env(:jason, :decoding_integer_digit_limit, 1024)
else
# use apply to avoid warnings in newer Elixir versions
@integer_digit_limit apply(Application, :get_env, [:jason, :decoding_integer_digit_limit, 1024])
end
defp number(<<byte, rest::bits>>, original, skip, stack, decode, len)
when byte in '0123456789' do
number(rest, original, skip, stack, decode, len + 1)
end
defp number(<<?., rest::bits>>, original, skip, stack, decode, len) do
number_frac(rest, original, skip, stack, decode, len + 1)
end
defp number(<<e, rest::bits>>, original, skip, stack, decode, len) when e in 'eE' do
prefix = binary_part(original, skip, len)
number_exp_copy(rest, original, skip + len + 1, stack, decode, prefix)
end
defp number(<<rest::bits>>, original, skip, stack, decode, len) do
token = binary_part(original, skip, len)
if byte_size(token) > @integer_digit_limit do
token_error(token, skip)
end
int = String.to_integer(token)
continue(rest, original, skip + len, stack, decode, int)
end
defp number_frac(<<byte, rest::bits>>, original, skip, stack, decode, len)
when byte in '0123456789' do
number_frac_cont(rest, original, skip, stack, decode, len + 1)
end
defp number_frac(<<_rest::bits>>, original, skip, _stack, _decode, len) do
error(original, skip + len)
end
defp number_frac_cont(<<byte, rest::bits>>, original, skip, stack, decode, len)
when byte in '0123456789' do
number_frac_cont(rest, original, skip, stack, decode, len + 1)
end
defp number_frac_cont(<<e, rest::bits>>, original, skip, stack, decode, len)
when e in 'eE' do
number_exp(rest, original, skip, stack, decode, len + 1)
end
defp number_frac_cont(<<rest::bits>>, original, skip, stack, decode, len) do
token = binary_part(original, skip, len)
decode(floats: float_decode) = decode
float = float_decode.(token, token, skip)
continue(rest, original, skip + len, stack, decode, float)
end
defp number_exp(<<byte, rest::bits>>, original, skip, stack, decode, len)
when byte in '0123456789' do
number_exp_cont(rest, original, skip, stack, decode, len + 1)
end
defp number_exp(<<byte, rest::bits>>, original, skip, stack, decode, len)
when byte in '+-' do
number_exp_sign(rest, original, skip, stack, decode, len + 1)
end
defp number_exp(<<_rest::bits>>, original, skip, _stack, _decode, len) do
error(original, skip + len)
end
defp number_exp_sign(<<byte, rest::bits>>, original, skip, stack, decode, len)
when byte in '0123456789' do
number_exp_cont(rest, original, skip, stack, decode, len + 1)
end
defp number_exp_sign(<<_rest::bits>>, original, skip, _stack, _decode, len) do
error(original, skip + len)
end
defp number_exp_cont(<<byte, rest::bits>>, original, skip, stack, decode, len)
when byte in '0123456789' do
number_exp_cont(rest, original, skip, stack, decode, len + 1)
end
defp number_exp_cont(<<rest::bits>>, original, skip, stack, decode, len) do
token = binary_part(original, skip, len)
decode(floats: float_decode) = decode
float = float_decode.(token, token, skip)
continue(rest, original, skip + len, stack, decode, float)
end
defp number_exp_copy(<<byte, rest::bits>>, original, skip, stack, decode, prefix)
when byte in '0123456789' do
number_exp_cont(rest, original, skip, stack, decode, prefix, 1)
end
defp number_exp_copy(<<byte, rest::bits>>, original, skip, stack, decode, prefix)
when byte in '+-' do
number_exp_sign(rest, original, skip, stack, decode, prefix, 1)
end
defp number_exp_copy(<<_rest::bits>>, original, skip, _stack, _decode, _prefix) do
error(original, skip)
end
defp number_exp_sign(<<byte, rest::bits>>, original, skip, stack, decode, prefix, len)
when byte in '0123456789' do
number_exp_cont(rest, original, skip, stack, decode, prefix, len + 1)
end
defp number_exp_sign(<<_rest::bits>>, original, skip, _stack, _decode, _prefix, len) do
error(original, skip + len)
end
defp number_exp_cont(<<byte, rest::bits>>, original, skip, stack, decode, prefix, len)
when byte in '0123456789' do
number_exp_cont(rest, original, skip, stack, decode, prefix, len + 1)
end
defp number_exp_cont(<<rest::bits>>, original, skip, stack, decode, prefix, len) do
suffix = binary_part(original, skip, len)
string = prefix <> ".0e" <> suffix
prefix_size = byte_size(prefix)
initial_skip = skip - prefix_size - 1
final_skip = skip + len
token = binary_part(original, initial_skip, prefix_size + len + 1)
decode(floats: float_decode) = decode
float = float_decode.(string, token, initial_skip)
continue(rest, original, final_skip, stack, decode, float)
end
defp number_zero(<<?., rest::bits>>, original, skip, stack, decode, len) do
number_frac(rest, original, skip, stack, decode, len + 1)
end
defp number_zero(<<e, rest::bits>>, original, skip, stack, decode, len) when e in 'eE' do
number_exp_copy(rest, original, skip + len + 1, stack, decode, "0")
end
defp number_zero(<<rest::bits>>, original, skip, stack, decode, len) do
continue(rest, original, skip + len, stack, decode, 0)
end
@compile {:inline, array: 5}
defp array(rest, original, skip, stack, decode) do
value(rest, original, skip, [@array, [] | stack], decode)
end
defp empty_array(<<rest::bits>>, original, skip, stack, decode) do
case stack do
[@array, [] | stack] ->
continue(rest, original, skip, stack, decode, [])
_ ->
error(original, skip - 1)
end
end
defp array(data, original, skip, stack, decode, value) do
bytecase data do
_ in '\s\n\t\r', rest ->
array(rest, original, skip + 1, stack, decode, value)
_ in ']', rest ->
[acc | stack] = stack
value = :lists.reverse(acc, [value])
continue(rest, original, skip + 1, stack, decode, value)
_ in ',', rest ->
[acc | stack] = stack
value(rest, original, skip + 1, [@array, [value | acc] | stack], decode)
_, _rest ->
error(original, skip)
<<_::bits>> ->
empty_error(original, skip)
end
end
@compile {:inline, object: 5}
defp object(rest, original, skip, stack, decode) do
key(rest, original, skip, [[] | stack], decode)
end
defp object(data, original, skip, stack, decode, value) do
bytecase data do
_ in '\s\n\t\r', rest ->
object(rest, original, skip + 1, stack, decode, value)
_ in '}', rest ->
skip = skip + 1
[key, acc | stack] = stack
decode(keys: key_decode) = decode
final = [{key_decode.(key), value} | acc]
decode(objects: object_decode) = decode
continue(rest, original, skip, stack, decode, object_decode.(final))
_ in ',', rest ->
skip = skip + 1
[key, acc | stack] = stack
decode(keys: key_decode) = decode
acc = [{key_decode.(key), value} | acc]
key(rest, original, skip, [acc | stack], decode)
_, _rest ->
error(original, skip)
<<_::bits>> ->
empty_error(original, skip)
end
end
defp key(data, original, skip, stack, decode) do
bytecase data do
_ in '\s\n\t\r', rest ->
key(rest, original, skip + 1, stack, decode)
_ in '}', rest ->
case stack do
[[] | stack] ->
decode(objects: object_decode) = decode
continue(rest, original, skip + 1, stack, decode, object_decode.([]))
_ ->
error(original, skip)
end
_ in '"', rest ->
string(rest, original, skip + 1, [@key | stack], decode, 0)
_, _rest ->
error(original, skip)
<<_::bits>> ->
empty_error(original, skip)
end
end
defp key(data, original, skip, stack, decode, value) do
bytecase data do
_ in '\s\n\t\r', rest ->
key(rest, original, skip + 1, stack, decode, value)
_ in ':', rest ->
value(rest, original, skip + 1, [@object, value | stack], decode)
_, _rest ->
error(original, skip)
<<_::bits>> ->
empty_error(original, skip)
end
end
# TODO: check if this approach would be faster:
# https://git.ninenines.eu/cowlib.git/tree/src/cow_ws.erl#n469
# http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
defp string(data, original, skip, stack, decode, len) do
bytecase data, 128 do
_ in '"', rest ->
decode(strings: string_decode) = decode
string = string_decode.(binary_part(original, skip, len))
continue(rest, original, skip + len + 1, stack, decode, string)
_ in '\\', rest ->
part = binary_part(original, skip, len)
escape(rest, original, skip + len, stack, decode, part)
_ in unquote(0x00..0x1F), _rest ->
error(original, skip + len)
_, rest ->
string(rest, original, skip, stack, decode, len + 1)
<<char::utf8, rest::bits>> when char <= 0x7FF ->
string(rest, original, skip, stack, decode, len + 2)
<<char::utf8, rest::bits>> when char <= 0xFFFF ->
string(rest, original, skip, stack, decode, len + 3)
<<_char::utf8, rest::bits>> ->
string(rest, original, skip, stack, decode, len + 4)
<<_::bits>> ->
empty_error(original, skip + len)
end
end
defp string(data, original, skip, stack, decode, acc, len) do
bytecase data, 128 do
_ in '"', rest ->
last = binary_part(original, skip, len)
string = IO.iodata_to_binary([acc | last])
continue(rest, original, skip + len + 1, stack, decode, string)
_ in '\\', rest ->
part = binary_part(original, skip, len)
escape(rest, original, skip + len, stack, decode, [acc | part])
_ in unquote(0x00..0x1F), _rest ->
error(original, skip + len)
_, rest ->
string(rest, original, skip, stack, decode, acc, len + 1)
<<char::utf8, rest::bits>> when char <= 0x7FF ->
string(rest, original, skip, stack, decode, acc, len + 2)
<<char::utf8, rest::bits>> when char <= 0xFFFF ->
string(rest, original, skip, stack, decode, acc, len + 3)
<<_char::utf8, rest::bits>> ->
string(rest, original, skip, stack, decode, acc, len + 4)
<<_::bits>> ->
empty_error(original, skip + len)
end
end
defp escape(data, original, skip, stack, decode, acc) do
bytecase data do
_ in 'b', rest ->
string(rest, original, skip + 2, stack, decode, [acc | '\b'], 0)
_ in 't', rest ->
string(rest, original, skip + 2, stack, decode, [acc | '\t'], 0)
_ in 'n', rest ->
string(rest, original, skip + 2, stack, decode, [acc | '\n'], 0)
_ in 'f', rest ->
string(rest, original, skip + 2, stack, decode, [acc | '\f'], 0)
_ in 'r', rest ->
string(rest, original, skip + 2, stack, decode, [acc | '\r'], 0)
_ in '"', rest ->
string(rest, original, skip + 2, stack, decode, [acc | '\"'], 0)
_ in '/', rest ->
string(rest, original, skip + 2, stack, decode, [acc | '/'], 0)
_ in '\\', rest ->
string(rest, original, skip + 2, stack, decode, [acc | '\\'], 0)
_ in 'u', rest ->
escapeu(rest, original, skip, stack, decode, acc)
_, _rest ->
error(original, skip + 1)
<<_::bits>> ->
empty_error(original, skip)
end
end
defmodule Unescape do
@moduledoc false
import Bitwise
@digits Enum.concat([?0..?9, ?A..?F, ?a..?f])
def unicode_escapes(chars1 \\ @digits, chars2 \\ @digits) do
for char1 <- chars1, char2 <- chars2 do
{(char1 <<< 8) + char2, integer8(char1, char2)}
end
end
defp integer8(char1, char2) do
(integer4(char1) <<< 4) + integer4(char2)
end
defp integer4(char) when char in ?0..?9, do: char - ?0
defp integer4(char) when char in ?A..?F, do: char - ?A + 10
defp integer4(char) when char in ?a..?f, do: char - ?a + 10
defp token_error_clause(original, skip, len) do
quote do
_ ->
token_error(unquote_splicing([original, skip, len]))
end
end
defmacro escapeu_first(int, last, rest, original, skip, stack, decode, acc) do
clauses = escapeu_first_clauses(last, rest, original, skip, stack, decode, acc)
quote location: :keep do
case unquote(int) do
unquote(clauses ++ token_error_clause(original, skip, 6))
end
end
end
defp escapeu_first_clauses(last, rest, original, skip, stack, decode, acc) do
for {int, first} <- unicode_escapes(),
not (first in 0xDC..0xDF) do
escapeu_first_clause(int, first, last, rest, original, skip, stack, decode, acc)
end
end
defp escapeu_first_clause(int, first, last, rest, original, skip, stack, decode, acc)
when first in 0xD8..0xDB do
hi =
quote bind_quoted: [first: first, last: last] do
0x10000 + ((((first &&& 0x03) <<< 8) + last) <<< 10)
end
args = [rest, original, skip, stack, decode, acc, hi]
[clause] =
quote location: :keep do
unquote(int) -> escape_surrogate(unquote_splicing(args))
end
clause
end
defp escapeu_first_clause(int, first, last, rest, original, skip, stack, decode, acc)
when first <= 0x00 do
skip = quote do: (unquote(skip) + 6)
acc =
quote bind_quoted: [acc: acc, first: first, last: last] do
if last <= 0x7F do
# 0?????
[acc, last]
else
# 110xxxx?? 10?????
byte1 = ((0b110 <<< 5) + (first <<< 2)) + (last >>> 6)
byte2 = (0b10 <<< 6) + (last &&& 0b111111)
[acc, byte1, byte2]
end
end
args = [rest, original, skip, stack, decode, acc, 0]
[clause] =
quote location: :keep do
unquote(int) -> string(unquote_splicing(args))
end
clause
end
defp escapeu_first_clause(int, first, last, rest, original, skip, stack, decode, acc)
when first <= 0x07 do
skip = quote do: (unquote(skip) + 6)
acc =
quote bind_quoted: [acc: acc, first: first, last: last] do
# 110xxx?? 10??????
byte1 = ((0b110 <<< 5) + (first <<< 2)) + (last >>> 6)
byte2 = (0b10 <<< 6) + (last &&& 0b111111)
[acc, byte1, byte2]
end
args = [rest, original, skip, stack, decode, acc, 0]
[clause] =
quote location: :keep do
unquote(int) -> string(unquote_splicing(args))
end
clause
end
defp escapeu_first_clause(int, first, last, rest, original, skip, stack, decode, acc)
when first <= 0xFF do
skip = quote do: (unquote(skip) + 6)
acc =
quote bind_quoted: [acc: acc, first: first, last: last] do
# 1110xxxx 10xxxx?? 10??????
byte1 = (0b1110 <<< 4) + (first >>> 4)
byte2 = ((0b10 <<< 6) + ((first &&& 0b1111) <<< 2)) + (last >>> 6)
byte3 = (0b10 <<< 6) + (last &&& 0b111111)
[acc, byte1, byte2, byte3]
end
args = [rest, original, skip, stack, decode, acc, 0]
[clause] =
quote location: :keep do
unquote(int) -> string(unquote_splicing(args))
end
clause
end
defmacro escapeu_last(int, original, skip) do
clauses = escapeu_last_clauses()
quote location: :keep do
case unquote(int) do
unquote(clauses ++ token_error_clause(original, skip, 6))
end
end
end
defp escapeu_last_clauses() do
for {int, last} <- unicode_escapes() do
[clause] =
quote do
unquote(int) -> unquote(last)
end
clause
end
end
defmacro escapeu_surrogate(int, last, rest, original, skip, stack, decode, acc,
hi) do
clauses = escapeu_surrogate_clauses(last, rest, original, skip, stack, decode, acc, hi)
quote location: :keep do
case unquote(int) do
unquote(clauses ++ token_error_clause(original, skip, 12))
end
end
end
defp escapeu_surrogate_clauses(last, rest, original, skip, stack, decode, acc, hi) do
digits1 = 'Dd'
digits2 = Stream.concat([?C..?F, ?c..?f])
for {int, first} <- unicode_escapes(digits1, digits2) do
escapeu_surrogate_clause(int, first, last, rest, original, skip, stack, decode, acc, hi)
end
end
defp escapeu_surrogate_clause(int, first, last, rest, original, skip, stack, decode, acc, hi) do
skip = quote do: unquote(skip) + 12
acc =
quote bind_quoted: [acc: acc, first: first, last: last, hi: hi] do
lo = ((first &&& 0x03) <<< 8) + last
[acc | <<(hi + lo)::utf8>>]
end
args = [rest, original, skip, stack, decode, acc, 0]
[clause] =
quote do
unquote(int) ->
string(unquote_splicing(args))
end
clause
end
end
defp escapeu(<<int1::16, int2::16, rest::bits>>, original, skip, stack, decode, acc) do
require Unescape
last = escapeu_last(int2, original, skip)
Unescape.escapeu_first(int1, last, rest, original, skip, stack, decode, acc)
end
defp escapeu(<<_rest::bits>>, original, skip, _stack, _decode, _acc) do
empty_error(original, skip)
end
# @compile {:inline, escapeu_last: 3}
defp escapeu_last(int, original, skip) do
require Unescape
Unescape.escapeu_last(int, original, skip)
end
defp escape_surrogate(<<?\\, ?u, int1::16, int2::16, rest::bits>>, original,
skip, stack, decode, acc, hi) do
require Unescape
last = escapeu_last(int2, original, skip + 6)
Unescape.escapeu_surrogate(int1, last, rest, original, skip, stack, decode, acc, hi)
end
defp escape_surrogate(<<_rest::bits>>, original, skip, _stack, _decode, _acc, _hi) do
error(original, skip + 6)
end
defp error(<<_rest::bits>>, _original, skip, _stack, _decode) do
throw {:position, skip - 1}
end
defp empty_error(_original, skip) do
throw {:position, skip}
end
@compile {:inline, error: 2, token_error: 2, token_error: 3}
defp error(_original, skip) do
throw {:position, skip}
end
defp token_error(token, position) do
throw {:token, token, position}
end
defp token_error(token, position, len) do
throw {:token, binary_part(token, position, len), position}
end
@compile {:inline, continue: 6}
defp continue(rest, original, skip, stack, decode, value) do
case stack do
[@terminate | stack] ->
terminate(rest, original, skip, stack, decode, value)
[@array | stack] ->
array(rest, original, skip, stack, decode, value)
[@key | stack] ->
key(rest, original, skip, stack, decode, value)
[@object | stack] ->
object(rest, original, skip, stack, decode, value)
end
end
defp terminate(<<byte, rest::bits>>, original, skip, stack, decode, value)
when byte in '\s\n\r\t' do
terminate(rest, original, skip + 1, stack, decode, value)
end
defp terminate(<<>>, _original, _skip, _stack, _decode, value) do
value
end
defp terminate(<<_rest::bits>>, original, skip, _stack, _decode, _value) do
error(original, skip)
end
end