diff --git a/Binary-strings-preserve-UTF-8-and-UTF-16-errors.patch b/Binary-strings-preserve-UTF-8-and-UTF-16-errors.patch
deleted file mode 100644
index 6863778adaa63ee495a2830f6446b82b2b3b9b31..0000000000000000000000000000000000000000
--- a/Binary-strings-preserve-UTF-8-and-UTF-16-errors.patch
+++ /dev/null
@@ -1,616 +0,0 @@
-From b2384ea878f484c48419fc0ec30380d0a5ffe3ce Mon Sep 17 00:00:00 2001
-From: Max Zerzouri <maxdamantus@gmail.com>
-Date: Sat, 15 May 2021 08:32:27 +0000
-Subject: [PATCH] Binary strings: preserve UTF-8 and UTF-16 errors
-
-The internal string representation is changed from UTF-8 with replacement
-characters to a modified form of "WTF-8" that is able to distinctly encode
-UTF-8 errors and UTF-16 errors.
-
-This handles UTF-8 errors in raw string inputs and handles UTF-8 and UTF-16
-errors in JSON input. UTF-16 errors (using "\uXXXX") and UTF-8 errors (using
-the original raw bytes) are maintained when emitting JSON. When emitting raw
-strings, UTF-8 errors are maintained and UTF-16 errors are converted into
-replacement characters.
----
- scripts/gen_utf8_tables.py |  3 +-
- src/jv.c                   | 28 ++++++------
- src/jv.h                   |  1 +
- src/jv_parse.c             | 77 ++++++++++++++++++++++-----------
- src/jv_print.c             | 26 +++++++++++-
- src/jv_unicode.c           | 87 ++++++++++++++++++++++++++++++++++----
- src/jv_unicode.h           | 11 +++++
- src/jv_utf8_tables.h       |  4 +-
- src/main.c                 | 29 ++++++++++++-
- tests/jq.test              |  5 +++
- tests/shtest               |  9 ++++
- 11 files changed, 228 insertions(+), 52 deletions(-)
-
-diff --git a/scripts/gen_utf8_tables.py b/scripts/gen_utf8_tables.py
-index 6fe0a53..7706462 100644
---- a/scripts/gen_utf8_tables.py
-+++ b/scripts/gen_utf8_tables.py
-@@ -16,8 +16,7 @@ def print_table(type, name, t):
- def utf8info(c):
-     if c < 0x80: return 1, mask(7)
-     if 0x80 <= c <= 0xBF: return 255, mask(6)
--    if 0xC0 <= c <= 0xC1: return 0, 0
--    if 0xC2 <= c <= 0xDF: return 2, mask(5)
-+    if 0xC0 <= c <= 0xDF: return 2, mask(5)
-     if 0xE0 <= c <= 0xEF: return 3, mask(4)
-     if 0xF0 <= c <= 0xF4: return 4, mask(3)
-     if 0xF4 <= c <= 0xFF: return 0, 0
-diff --git a/src/jv.c b/src/jv.c
-index 1f1029e..e979cc6 100644
---- a/src/jv.c
-+++ b/src/jv.c
-@@ -452,20 +452,24 @@ static jvp_string* jvp_string_alloc(uint32_t size) {
-   return s;
- }
- 
--/* Copy a UTF8 string, replacing all badly encoded points with U+FFFD */
-+/* Copy a UTF8 string, using WTF-8b to replace all UTF-8 errors */
- static jv jvp_string_copy_replace_bad(const char* data, uint32_t length) {
-   const char* end = data + length;
-   const char* i = data;
-   const char* cstart;
- 
--  uint32_t maxlength = length * 3 + 1; // worst case: all bad bytes, each becomes a 3-byte U+FFFD
-+  uint32_t maxlength = length * 2 + 1; // worst case: all bad bytes, each becomes a 2-byte overlong U+XX
-   jvp_string* s = jvp_string_alloc(maxlength);
-   char* out = s->data;
-   int c = 0;
- 
--  while ((i = jvp_utf8_next((cstart = i), end, &c))) {
-+  while ((i = jvp_utf8_extended_next((cstart = i), end, 0, &c))) {
-     if (c == -1) {
--      c = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER
-+      int error = (unsigned char)*cstart;
-+      assert(error >= 0x80 && error <= 0xFF);
-+      c = -error;
-+      /* Ensure each UTF-8 error byte is consumed separately */
-+      i = cstart + 1;
-     }
-     out += jvp_utf8_encode(c, out);
-     assert(out < s->data + maxlength);
-@@ -477,8 +481,8 @@ static jv jvp_string_copy_replace_bad(const char* data, uint32_t length) {
-   return r;
- }
- 
--/* Assumes valid UTF8 */
--static jv jvp_string_new(const char* data, uint32_t length) {
-+/* Assumes valid WTF-8b */
-+jv jv_string_extended_sized(const char* data, int length) {
-   jvp_string* s = jvp_string_alloc(length);
-   s->length_hashed = length << 1;
-   if (data != NULL)
-@@ -618,7 +622,7 @@ static int jvp_string_equal(jv a, jv b) {
- jv jv_string_sized(const char* str, int len) {
-   return
-     jvp_utf8_is_valid(str, str+len) ?
--    jvp_string_new(str, len) :
-+    jv_string_extended_sized(str, len) :
-     jvp_string_copy_replace_bad(str, len);
- }
- 
-@@ -682,14 +686,14 @@ jv jv_string_split(jv j, jv sep) {
- 
-   if (seplen == 0) {
-     int c;
--    while ((jstr = jvp_utf8_next(jstr, jend, &c)))
-+    while ((jstr = jvp_utf8_extended_next(jstr, jend, JVP_UTF8_ERRORS_ALL, &c)))
-       a = jv_array_append(a, jv_string_append_codepoint(jv_string(""), c));
-   } else {
-     for (p = jstr; p < jend; p = s + seplen) {
-       s = _jq_memmem(p, jend - p, sepstr, seplen);
-       if (s == NULL)
-         s = jend;
--      a = jv_array_append(a, jv_string_sized(p, s - p));
-+      a = jv_array_append(a, jv_string_extended_sized(p, s - p));
-       // Add an empty string to denote that j ends on a sep
-       if (s + seplen == jend && seplen != 0)
-         a = jv_array_append(a, jv_string(""));
-@@ -760,7 +764,7 @@ jv jv_string_slice(jv j, int start, int end) {
- 
-   /* Look for byte offset corresponding to start codepoints */
-   for (p = s, i = 0; i < start; i++) {
--    p = jvp_utf8_next(p, s + len, &c);
-+    p = jvp_utf8_extended_next(p, s + len, JVP_UTF8_ERRORS_ALL, &c);
-     if (p == NULL) {
-       jv_free(j);
-       return jv_string_empty(16);
-@@ -772,7 +776,7 @@ jv jv_string_slice(jv j, int start, int end) {
-   }
-   /* Look for byte offset corresponding to end codepoints */
-   for (e = p; e != NULL && i < end; i++) {
--    e = jvp_utf8_next(e, s + len, &c);
-+    e = jvp_utf8_extended_next(e, s + len, JVP_UTF8_ERRORS_ALL, &c);
-     if (e == NULL) {
-       e = s + len;
-       break;
-@@ -790,7 +794,7 @@ jv jv_string_slice(jv j, int start, int end) {
-    * memory like a drunken navy programmer.  There's probably nothing we
-    * can do about it.
-    */
--  res = jv_string_sized(p, e - p);
-+  res = jv_string_extended_sized(p, e - p);
-   jv_free(j);
-   return res;
- }
-diff --git a/src/jv.h b/src/jv.h
-index d111c80..2aed1ae 100644
---- a/src/jv.h
-+++ b/src/jv.h
-@@ -104,6 +104,7 @@ jv jv_array_indexes(jv, jv);
- 
- jv jv_string(const char*);
- jv jv_string_sized(const char*, int);
-+jv jv_string_extended_sized(const char*, int);
- jv jv_string_empty(int len);
- int jv_string_length_bytes(jv);
- int jv_string_length_codepoints(jv);
-diff --git a/src/jv_parse.c b/src/jv_parse.c
-index 51ad9f0..194efaf 100644
---- a/src/jv_parse.c
-+++ b/src/jv_parse.c
-@@ -397,7 +397,7 @@ static void tokenadd(struct jv_parser* p, char c) {
-   p->tokenbuf[p->tokenpos++] = c;
- }
- 
--static int unhex4(char* hex) {
-+static int unhex4(const char* hex) {
-   int r = 0;
-   for (int i=0; i<4; i++) {
-     char c = *hex++;
-@@ -413,15 +413,19 @@ static int unhex4(char* hex) {
- }
- 
- static pfunc found_string(struct jv_parser* p) {
--  char* in = p->tokenbuf;
--  char* out = p->tokenbuf;
--  char* end = p->tokenbuf + p->tokenpos;
--
--  while (in < end) {
--    char c = *in++;
-+  const char* in = p->tokenbuf;
-+  // start by writing to tokenbuf, only allocate in case that output size is greater than input size (possible only when input has UTF-8 errors)
-+  char* newbuf = NULL;
-+  char* buf = p->tokenbuf;
-+  char* out = buf;
-+  const char* end = p->tokenbuf + p->tokenpos;
-+  const char* cstart;
-+  int c;
-+
-+  while ((in = jvp_utf8_extended_next((cstart = in), end, 0, &c))) {
-     if (c == '\\') {
-       if (in >= end)
--        return "Expected escape character at end of string";
-+        return jv_mem_free(newbuf), "Expected escape character at end of string";
-       c = *in++;
-       switch (c) {
-       case '\\':
-@@ -436,38 +440,61 @@ static pfunc found_string(struct jv_parser* p) {
-       case 'u':
-         /* ahh, the complicated case */
-         if (in + 4 > end)
--          return "Invalid \\uXXXX escape";
-+          return jv_mem_free(newbuf), "Invalid \\uXXXX escape";
-         int hexvalue = unhex4(in);
-         if (hexvalue < 0)
--          return "Invalid characters in \\uXXXX escape";
-+          return jv_mem_free(newbuf), "Invalid characters in \\uXXXX escape";
-         unsigned long codepoint = (unsigned long)hexvalue;
-         in += 4;
-+        // leading surrogate
-         if (0xD800 <= codepoint && codepoint <= 0xDBFF) {
--          /* who thought UTF-16 surrogate pairs were a good idea? */
--          if (in + 6 > end || in[0] != '\\' || in[1] != 'u')
--            return "Invalid \\uXXXX\\uXXXX surrogate pair escape";
--          unsigned long surrogate = unhex4(in+2);
--          if (!(0xDC00 <= surrogate && surrogate <= 0xDFFF))
--            return "Invalid \\uXXXX\\uXXXX surrogate pair escape";
--          in += 6;
--          codepoint = 0x10000 + (((codepoint - 0xD800) << 10)
--                                 |(surrogate - 0xDC00));
-+          // look ahead for trailing surrogate and decode as UTF-16, otherwise encode this lone surrogate as WTF-8
-+          if (in + 6 <= end && in[0] == '\\' && in[1] == 'u') {
-+            unsigned long surrogate = unhex4(in+2);
-+            if (0xDC00 <= surrogate && surrogate <= 0xDFFF) {
-+              in += 6;
-+              codepoint = 0x10000 + (((codepoint - 0xD800) << 10)
-+                                     |(surrogate - 0xDC00));
-+            }
-+          }
-         }
--        if (codepoint > 0x10FFFF)
--          codepoint = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER
-+        // UTF-16 surrogates can not encode a greater codepoint
-+        assert(codepoint <= 0x10FFFF);
-+        // NOTE: a leading or trailing surrogate here (0xD800 <= codepoint && codepoint <= 0xDFFF) is encoded as WTF-8
-         out += jvp_utf8_encode(codepoint, out);
-         break;
- 
-       default:
--        return "Invalid escape";
-+        return jv_mem_free(newbuf), "Invalid escape";
-       }
-     } else {
-       if (c > 0 && c < 0x001f)
--        return "Invalid string: control characters from U+0000 through U+001F must be escaped";
--      *out++ = c;
-+        return jv_mem_free(newbuf), "Invalid string: control characters from U+0000 through U+001F must be escaped";
-+      if (c == -1) {
-+        int error = (unsigned char)*cstart;
-+        assert(error >= 0x80 && error <= 0xFF);
-+        c = -error;
-+        /* Ensure each UTF-8 error byte is consumed separately */
-+        const int wtf8_length = 2;
-+        assert(jvp_utf8_encode_length(c) == wtf8_length);
-+        in = cstart + 1;
-+        if (newbuf == NULL && out + wtf8_length > in) {
-+          /* Output is about to overflow input, move output to temporary buffer */
-+          int current_size = out - p->tokenbuf;
-+          int remaining = end - cstart;
-+          newbuf = jv_mem_alloc(current_size + remaining * wtf8_length); // worst case: all remaining bad bytes, each becomes a 2-byte overlong U+XX
-+          memcpy(newbuf, buf, current_size);
-+          buf = newbuf;
-+          out = buf + current_size;
-+        }
-+      } else
-+        assert(jvp_utf8_encode_length(c) == in - cstart);
-+      out += jvp_utf8_encode(c, out);
-     }
-   }
--  TRY(value(p, jv_string_sized(p->tokenbuf, out - p->tokenbuf)));
-+  jv v = jv_string_extended_sized(buf, out - buf);
-+  jv_mem_free(newbuf);
-+  TRY(value(p, v));
-   p->tokenpos = 0;
-   return 0;
- }
-diff --git a/src/jv_print.c b/src/jv_print.c
-index 5ebc01e..dfa1f05 100644
---- a/src/jv_print.c
-+++ b/src/jv_print.c
-@@ -98,6 +98,16 @@ static void put_char(char c, FILE* fout, jv* strout, int T) {
-   put_buf(&c, 1, fout, strout, T);
- }
- 
-+static void put_invalid_utf8_byte(int c, FILE* fout, jv* strout, int T) {
-+  assert(c >= 0x80 && c <= 0xFF);
-+  if (strout) {
-+    // encode as an invalid UTF-8 byte in output
-+    *strout = jv_string_append_codepoint(*strout, -c);
-+  } else {
-+    put_char(c, fout, strout, T);
-+  }
-+}
-+
- static void put_str(const char* s, FILE* fout, jv* strout, int T) {
-   put_buf(s, strlen(s), fout, strout, T);
- }
-@@ -121,7 +131,7 @@ static void jvp_dump_string(jv str, int ascii_only, FILE* F, jv* S, int T) {
-   int c = 0;
-   char buf[32];
-   put_char('"', F, S, T);
--  while ((i = jvp_utf8_next((cstart = i), end, &c))) {
-+  while ((i = jvp_utf8_extended_next((cstart = i), end, JVP_UTF8_ERRORS_ALL, &c))) {
-     assert(c != -1);
-     int unicode_escape = 0;
-     if (0x20 <= c && c <= 0x7E) {
-@@ -130,6 +140,17 @@ static void jvp_dump_string(jv str, int ascii_only, FILE* F, jv* S, int T) {
-         put_char('\\', F, S, T);
-       }
-       put_char(c, F, S, T);
-+    } else if (c >= -0xFF && c <= -0x80) {
-+      // Invalid UTF-8 byte
-+      if (ascii_only) {
-+        // refusing to emit invalid UTF-8
-+        // TODO: convince the world to adopt a "\xXX" notation for JSON?
-+        c = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER
-+        unicode_escape = 1;
-+      } else {
-+        // pass through
-+        put_invalid_utf8_byte(-c, F, S, T);
-+      }
-     } else if (c < 0x20 || c == 0x7F) {
-       // ASCII control character
-       switch (c) {
-@@ -160,6 +181,9 @@ static void jvp_dump_string(jv str, int ascii_only, FILE* F, jv* S, int T) {
-     } else {
-       if (ascii_only) {
-         unicode_escape = 1;
-+      } else if (c >= 0xD800 && c <= 0xDFFF) {
-+        // lone surrogate; can't be encoded to UTF-8
-+        unicode_escape = 1;
-       } else {
-         put_buf(cstart, i - cstart, F, S, T);
-       }
-diff --git a/src/jv_unicode.c b/src/jv_unicode.c
-index d197349..8c47536 100644
---- a/src/jv_unicode.c
-+++ b/src/jv_unicode.c
-@@ -27,6 +27,56 @@ const char* jvp_utf8_backtrack(const char* start, const char* min, int *missing_
- }
- 
- const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) {
-+  return jvp_utf8_extended_next(in, end, JVP_UTF8_REPLACE, codepoint_ret);
-+}
-+
-+/*
-+  The internal representation of jv strings uses an encoding that is hereby
-+  referred to as "WTF-8b" (until someone demonstrates use of another term to
-+  refer to the same encoding).
-+
-+  WTF-8b is an extension of WTF-8, which is an extension of UTF-8. Any sequence
-+  of Unicode scalar values is represented by the same bytes in UTF-8, WTF-8 and
-+  WTF-8b, therefore any well-formed UTF-8 string is interpreted as the same
-+  sequence of Unicode scalar values (roughly, code points) in WTF-8b.
-+
-+  Like WTF-8, WTF-8b is able to encode UTF-16 errors (lone surrogates) using
-+  the "generalized UTF-8" representation of code points between U+D800 and
-+  U+DFFF. These errors occur in JSON terms such as:
-+    "_\uD8AB_\uDBCD_"
-+
-+  Unlike WTF-8, WTF-8b is also able to encode UTF-8 errors (bytes 0x80 to 0xFF
-+  that are not part of a valid UTF-8 sequence) using the first 128 "overlong"
-+  codings (unused 2-byte representations of U+00 to U+7F). These errors can
-+  occur in any byte stream that is interpreted as UTF-8, for example:
-+    "\xED\xA2\xAB"
-+  The above example is in fact the WTF-8b (and WTF-8) encoding for the lone
-+  UTF-16 surrogate "\uD8AB", which demonstrates the need for a distinct
-+  encoding of UTF-8 errors. If a distinction were not made, then "\xED\xA2\xAB"
-+  and "\uD8AB" would be interpreted as the same string, so at least one of the
-+  forms would not be preserved when printed as JSON output.
-+
-+  It should also be noted that the process of converting from invalid UTF-8 to
-+  WTF-8b is not (and can not be) idempotent, since the "generalised UTF-8"
-+  representation of UTF-16 surrogates are intentionally not able to be
-+  generated from invalid UTF-8, only through some other means (usually "\uXXXX"
-+  notation).
-+
-+  Each UTF-16 error is encoded as 3 WTF-8b (or WTF-8) bytes.
-+  Each UTF-8 error is encoded as 2 WTF-8b bytes.
-+
-+  When iterating over code points using `JVP_UTF8_ERRORS_UTF16`, encoded UTF-16
-+  errors are emitted in the form of code points in the range U+D800 to U+DFFF.
-+  These code points can be reencoded as usual using `jvp_utf8_encode`.
-+
-+  When iterating over code points using `JVP_UTF8_ERRORS_UTF8`, encoded UTF-8
-+  errors are emitted in the form of code points in the negative range -0x80 to
-+  -0xFF. These negative code points can be negated to determine the original
-+  error bytes. These code points can be reencoded as usual using
-+  `jvp_utf8_encode`.
-+*/
-+
-+const char* jvp_utf8_extended_next(const char* in, const char* end, enum jvp_utf8_flags flags, int* codepoint_ret) {
-   assert(in <= end);
-   if (in == end) {
-     return 0;
-@@ -40,9 +90,11 @@ const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) {
-     length = 1;
-   } else if (length == 0 || length == UTF8_CONTINUATION_BYTE) {
-     /* Bad single byte - either an invalid byte or an out-of-place continuation byte */
-+    if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: bad single byte");
-     length = 1;
-   } else if (in + length > end) {
-     /* String ends before UTF8 sequence ends */
-+    if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: underrun");
-     length = end - in;
-   } else {
-     codepoint = ((unsigned)in[0]) & utf8_coding_bits[first];
-@@ -50,6 +102,7 @@ const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) {
-       unsigned ch = (unsigned char)in[i];
-       if (utf8_coding_length[ch] != UTF8_CONTINUATION_BYTE){
-         /* Invalid UTF8 sequence - not followed by the right number of continuation bytes */
-+        if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: wrong bytes");
-         codepoint = -1;
-         length = i;
-         break;
-@@ -58,17 +111,29 @@ const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) {
-     }
-     if (codepoint < utf8_first_codepoint[length]) {
-       /* Overlong UTF8 sequence */
--      codepoint = -1;
-+      if ((flags & JVP_UTF8_ERRORS_UTF8) && 0x00 <= codepoint && codepoint <= 0x7F) {
-+        /* UTF-8 error is emitted as a negative codepoint */
-+        codepoint = -(codepoint + 0x80);
-+      } else {
-+        if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: overlong");
-+        codepoint = -1;
-+      }
-     }
-     if (0xD800 <= codepoint && codepoint <= 0xDFFF) {
--      /* Surrogate codepoints can't be encoded in UTF8 */
--      codepoint = -1;
-+      /* Surrogate codepoints are allowed in WTF-8/WTF-8b */
-+      if (!(flags & JVP_UTF8_ERRORS_UTF16)) {
-+        /* Surrogate codepoints can't be encoded in UTF8 */
-+        codepoint = -1;
-+      }
-     }
-     if (codepoint > 0x10FFFF) {
-       /* Outside Unicode range */
-+      if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: out of range");
-       codepoint = -1;
-     }
-   }
-+  if (codepoint == -1 && (flags & JVP_UTF8_REPLACE))
-+    codepoint = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER
-   assert(length > 0);
-   *codepoint_ret = codepoint;
-   return in + length;
-@@ -76,7 +141,7 @@ const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) {
- 
- int jvp_utf8_is_valid(const char* in, const char* end) {
-   int codepoint;
--  while ((in = jvp_utf8_next(in, end, &codepoint))) {
-+  while ((in = jvp_utf8_extended_next(in, end, 0, &codepoint))) {
-     if (codepoint == -1) return 0;
-   }
-   return 1;
-@@ -91,20 +156,24 @@ int jvp_utf8_decode_length(char startchar) {
- }
- 
- int jvp_utf8_encode_length(int codepoint) {
--  if (codepoint <= 0x7F) return 1;
-+  if (codepoint >= 0 && codepoint <= 0x7F) return 1;
-   else if (codepoint <= 0x7FF) return 2;
-   else if (codepoint <= 0xFFFF) return 3;
-   else return 4;
- }
- 
- int jvp_utf8_encode(int codepoint, char* out) {
--  assert(codepoint >= 0 && codepoint <= 0x10FFFF);
-+  assert((codepoint >= 0 && codepoint <= 0x10FFFF) || (codepoint >= -0xFF && codepoint <= -0x80));
-   char* start = out;
--  if (codepoint <= 0x7F) {
-+  if (codepoint >= 0 && codepoint <= 0x7F) {
-     *out++ = codepoint;
-   } else if (codepoint <= 0x7FF) {
--    *out++ = 0xC0 + ((codepoint & 0x7C0) >> 6);
--    *out++ = 0x80 + ((codepoint & 0x03F));
-+    // encode UTF-8 errors as overlong representations of U+00 to U+7F
-+    int cp = codepoint >= -0xFF && codepoint <= -0x80?
-+      -codepoint - 0x80 :
-+      codepoint;
-+    *out++ = 0xC0 + ((cp & 0x7C0) >> 6);
-+    *out++ = 0x80 + ((cp & 0x03F));
-   } else if(codepoint <= 0xFFFF) {
-     *out++ = 0xE0 + ((codepoint & 0xF000) >> 12);
-     *out++ = 0x80 + ((codepoint & 0x0FC0) >> 6);
-diff --git a/src/jv_unicode.h b/src/jv_unicode.h
-index 558721a..37c7fc0 100644
---- a/src/jv_unicode.h
-+++ b/src/jv_unicode.h
-@@ -1,7 +1,18 @@
- #ifndef JV_UNICODE_H
- #define JV_UNICODE_H
- 
-+enum jvp_utf8_flags {
-+  /* Emit replacement character instead of -1 for errors */
-+  JVP_UTF8_REPLACE = 1,
-+  /* Treat input as WTF-8b, emit 0xD800 to 0xDFFF to denote encoded UTF-16 errors */
-+  JVP_UTF8_ERRORS_UTF16 = 2,
-+  /* Treat input as WTF-8b, emit -0x80 to -0xFF to denote encoded UTF-8 errors */
-+  JVP_UTF8_ERRORS_UTF8 = 4,
-+  JVP_UTF8_ERRORS_ALL = JVP_UTF8_ERRORS_UTF16 | JVP_UTF8_ERRORS_UTF8
-+};
-+
- const char* jvp_utf8_backtrack(const char* start, const char* min, int *missing_bytes);
-+const char* jvp_utf8_extended_next(const char* in, const char* end, enum jvp_utf8_flags flags, int* codepoint);
- const char* jvp_utf8_next(const char* in, const char* end, int* codepoint);
- int jvp_utf8_is_valid(const char* in, const char* end);
- 
-diff --git a/src/jv_utf8_tables.h b/src/jv_utf8_tables.h
-index f1a4252..7c68749 100644
---- a/src/jv_utf8_tables.h
-+++ b/src/jv_utf8_tables.h
-@@ -12,7 +12,7 @@ static const unsigned char utf8_coding_length[] =
-   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-   0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
--  0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-+  0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-   0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-   0x04, 0x04, 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
-@@ -29,7 +29,7 @@ static const unsigned char utf8_coding_bits[] =
-   0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
-   0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
-   0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
--  0x00, 0x00, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
-+  0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
-   0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
-   0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-   0x07, 0x07, 0x07, 0x07, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
-diff --git a/src/main.c b/src/main.c
-index b154689..5fa5c4f 100644
---- a/src/main.c
-+++ b/src/main.c
-@@ -30,6 +30,7 @@
- #include "jv.h"
- #include "jq.h"
- #include "jv_alloc.h"
-+#include "jv_unicode.h"
- #include "util.h"
- #include "src/version.h"
- 
-@@ -161,6 +162,30 @@ static const char *skip_shebang(const char *p) {
-   return n+1;
- }
- 
-+static void jvp_dump_raw_string(const char* start, const char* end, FILE* f) {
-+  static const unsigned char UTF8_REPLACEMENT[] = {0xEF,0xBF,0xBD}; // U+FFFD REPLACEMENT CHARACTER
-+
-+  const char* i = start;
-+  const char* cstart;
-+  int c;
-+
-+  while ((i = jvp_utf8_extended_next((cstart = i), end, JVP_UTF8_ERRORS_ALL, &c))) {
-+    if (c >= -0xFF && c <= -0x80) {
-+      // invalid UTF-8 byte; pass through
-+      fwrite(start, 1, cstart - start, f);
-+      start = i;
-+      fputc(-c, f);
-+    } else if ((c >= 0xD800 && c <= 0xDFFF) || c == -1) {
-+      // lone surrugate; can't be encoded to UTF-8
-+      fwrite(start, 1, cstart - start, f);
-+      start = i;
-+      fwrite(UTF8_REPLACEMENT, 1, sizeof(UTF8_REPLACEMENT), f);
-+    } else
-+      continue;
-+  }
-+  fwrite(start, 1, end - start, f);
-+}
-+
- static int process(jq_state *jq, jv value, int flags, int dumpopts) {
-   int ret = 14; // No valid results && -e -> exit(4)
-   jq_start(jq, value, flags);
-@@ -170,7 +195,9 @@ static int process(jq_state *jq, jv value, int flags, int dumpopts) {
-       if (options & ASCII_OUTPUT) {
-         jv_dumpf(result, stdout, JV_PRINT_ASCII);
-       } else {
--        fwrite(jv_string_value(result), 1, jv_string_length_bytes(jv_copy(result)), stdout);
-+        const char *start = jv_string_value(result);
-+        const char *end = start + jv_string_length_bytes(jv_copy(result));
-+        jvp_dump_raw_string(start, end, stdout);
-       }
-       ret = 0;
-       jv_free(result);
-diff --git a/tests/jq.test b/tests/jq.test
-index 7e2dd43..c882fd2 100644
---- a/tests/jq.test
-+++ b/tests/jq.test
-@@ -57,6 +57,11 @@ null
- "Aa\r\n\t\b\f\u03bc"
- "Aa\u000d\u000a\u0009\u0008\u000c\u03bc"
- 
-+# Check that unpaired surrogates are preserved in output
-+"\u2200\ud800\u2203\udc00\u2205\udfff"
-+null
-+"∀\ud800∃\udc00∅\udfff"
-+
- "inter\("pol" + "ation")"
- null
- "interpolation"
-diff --git a/tests/shtest b/tests/shtest
-index 86fec33..4c8b57e 100755
---- a/tests/shtest
-+++ b/tests/shtest
-@@ -130,6 +130,15 @@ printf "[1,2][3,4]\n" | $JQ -cs add > $d/out 2>&1
- cmp $d/out $d/expected
- 
- 
-+clean=false
-+# Invalid UTF-8 bytes are preserved when encoding/decoding JSON
-+dd if=/dev/urandom bs=1024 count=1024 >$d/rand 2>/dev/null
-+$VALGRIND $Q $JQ -sR . $d/rand >$d/out.json
-+$VALGRIND $Q $JQ -j . $d/out.json >$d/out
-+cmp $d/out $d/rand
-+clean=true
-+
-+
- ## Test streaming parser
- 
- ## If we add an option to stream to the `import ... as $symbol;` directive
diff --git a/CVE-2024-53427-pre.patch b/CVE-2024-53427-pre.patch
new file mode 100644
index 0000000000000000000000000000000000000000..4fc5a632205bed626c5ed3035defdda402513ac3
--- /dev/null
+++ b/CVE-2024-53427-pre.patch
@@ -0,0 +1,68 @@
+From b86ff49f46a4a37e5a8e75a140cb5fd6e1331384 Mon Sep 17 00:00:00 2001
+From: itchyny <itchyny@cybozu.co.jp>
+Date: Sun, 16 Feb 2025 22:08:36 +0900
+Subject: [PATCH] fix: `jv_number_value` should cache the double value of
+ literal numbers (#3245)
+
+The code of `jv_number_value` is intended to cache the double value of
+literal numbers, but it does not work because it accepts the `jv` struct
+by value. This patch fixes the behavior by checking if the double value
+is `NaN`, which indicates the unconverted value. This patch improves the
+performance of major use cases; e.g. `range(1000000)` runs 25% faster.
+
+Origin: https://github.com/jqlang/jq/commit/b86ff49f46a4a37e5a8e75a140cb5fd6e1331384
+---
+ src/jv.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/src/jv.c b/src/jv.c
+index e23d8ec..9329eae 100644
+--- a/src/jv.c
++++ b/src/jv.c
+@@ -206,9 +206,6 @@ enum {
+   JVP_NUMBER_DECIMAL = 1
+ };
+ 
+-#define JV_NUMBER_SIZE_INIT      (0)
+-#define JV_NUMBER_SIZE_CONVERTED (1)
+-
+ #define JVP_FLAGS_NUMBER_NATIVE       JVP_MAKE_FLAGS(JV_KIND_NUMBER, JVP_MAKE_PFLAGS(JVP_NUMBER_NATIVE, 0))
+ #define JVP_FLAGS_NUMBER_LITERAL      JVP_MAKE_FLAGS(JV_KIND_NUMBER, JVP_MAKE_PFLAGS(JVP_NUMBER_DECIMAL, 1))
+ 
+@@ -589,8 +586,12 @@ static jv jvp_literal_number_new(const char * literal) {
+     jv_mem_free(n);
+     return JV_INVALID;
+   }
++  if (decNumberIsNaN(&n->num_decimal)) {
++    jv_mem_free(n);
++    return jv_number(NAN);
++  }
+ 
+-  jv r = {JVP_FLAGS_NUMBER_LITERAL, 0, 0, JV_NUMBER_SIZE_INIT, {&n->refcnt}};
++  jv r = {JVP_FLAGS_NUMBER_LITERAL, 0, 0, 0, {&n->refcnt}};
+   return r;
+ }
+ 
+@@ -698,9 +699,8 @@ double jv_number_value(jv j) {
+   if (JVP_HAS_FLAGS(j, JVP_FLAGS_NUMBER_LITERAL)) {
+     jvp_literal_number* n = jvp_literal_number_ptr(j);
+ 
+-    if (j.size != JV_NUMBER_SIZE_CONVERTED) {
++    if (isnan(n->num_double)) {
+       n->num_double = jvp_literal_number_to_double(j);
+-      j.size = JV_NUMBER_SIZE_CONVERTED;
+     }
+ 
+     return n->num_double;
+@@ -731,7 +731,7 @@ int jvp_number_is_nan(jv n) {
+     return decNumberIsNaN(pdec);
+   }
+ #endif
+-  return n.u.number != n.u.number;
++  return isnan(n.u.number);
+ }
+ 
+ int jvp_number_cmp(jv a, jv b) {
+-- 
+2.48.1
+
diff --git a/CVE-2024-53427.patch b/CVE-2024-53427.patch
new file mode 100644
index 0000000000000000000000000000000000000000..6046eba7e4b4f000e931fb40f518999880dc212f
--- /dev/null
+++ b/CVE-2024-53427.patch
@@ -0,0 +1,77 @@
+From a09a4dfd55e6c24d04b35062ccfe4509748b1dd3 Mon Sep 17 00:00:00 2001
+From: itchyny <itchyny@cybozu.co.jp>
+Date: Wed, 5 Mar 2025 07:43:54 +0900
+Subject: [PATCH] Reject NaN with payload while parsing JSON
+
+This commit drops support for parsing NaN with payload in JSON like
+`NaN123` and fixes CVE-2024-53427. Other JSON extensions like `NaN` and
+`Infinity` are still supported. Fixes #3023, fixes #3196, fixes #3246.
+
+Origin: https://github.com/jqlang/jq/commit/a09a4dfd55e6c24d04b35062ccfe4509748b1dd3
+---
+ src/jv.c      |  5 +++++
+ tests/jq.test | 14 ++++++++++----
+ tests/shtest  |  5 -----
+ 3 files changed, 15 insertions(+), 9 deletions(-)
+
+diff --git a/src/jv.c b/src/jv.c
+index 9329eae..e26f74d 100644
+--- a/src/jv.c
++++ b/src/jv.c
+@@ -587,6 +587,11 @@ static jv jvp_literal_number_new(const char * literal) {
+     return JV_INVALID;
+   }
+   if (decNumberIsNaN(&n->num_decimal)) {
++    // Reject NaN with payload.
++    if (n->num_decimal.digits > 1 || *n->num_decimal.lsu != 0) {
++      jv_mem_free(n);
++      return JV_INVALID;
++    }
+     jv_mem_free(n);
+     return jv_number(NAN);
+   }
+diff --git a/tests/jq.test b/tests/jq.test
+index 7036df2..d052b22 100644
+--- a/tests/jq.test
++++ b/tests/jq.test
+@@ -1938,11 +1938,17 @@ tojson | fromjson
+ {"a":nan}
+ {"a":null}
+ 
+-# also "nan with payload" #2985
+-fromjson | isnan
+-"nan1234"
++# NaN with payload is not parsed
++.[] | try (fromjson | isnan) catch .
++["NaN","-NaN","NaN1","NaN10","NaN100","NaN1000","NaN10000","NaN100000"]
+ true
+-
++true
++"Invalid numeric literal at EOF at line 1, column 4 (while parsing 'NaN1')"
++"Invalid numeric literal at EOF at line 1, column 5 (while parsing 'NaN10')"
++"Invalid numeric literal at EOF at line 1, column 6 (while parsing 'NaN100')"
++"Invalid numeric literal at EOF at line 1, column 7 (while parsing 'NaN1000')"
++"Invalid numeric literal at EOF at line 1, column 8 (while parsing 'NaN10000')"
++"Invalid numeric literal at EOF at line 1, column 9 (while parsing 'NaN100000')"
+ 
+ # calling input/0, or debug/0 in a test doesn't crash jq
+ 
+diff --git a/tests/shtest b/tests/shtest
+index 14aafbf..a471889 100755
+--- a/tests/shtest
++++ b/tests/shtest
+@@ -594,11 +594,6 @@ if ! x=$($JQ -n "1 # foo$cr + 2") || [ "$x" != 1 ]; then
+   exit 1
+ fi
+ 
+-# CVE-2023-50268: No stack overflow comparing a nan with a large payload
+-$VALGRIND $Q $JQ '1 != .' <<\EOF >/dev/null
+-Nan4000
+-EOF
+-
+ # Allow passing the inline jq script before -- #2919
+ if ! r=$($JQ --args -rn -- '$ARGS.positional[0]' bar) || [ "$r" != bar ]; then
+     echo "passing the inline script after -- didn't work"
+-- 
+2.48.1
+
diff --git a/Correct-UTF-8-and-UTF-16-errors-during-concatenation.patch b/Correct-UTF-8-and-UTF-16-errors-during-concatenation.patch
deleted file mode 100644
index 6ceeda158004badc622261d49bed3e0b1d96fa42..0000000000000000000000000000000000000000
--- a/Correct-UTF-8-and-UTF-16-errors-during-concatenation.patch
+++ /dev/null
@@ -1,388 +0,0 @@
-From 8829368f14943b8d2674c75805b27e56a569ad2c Mon Sep 17 00:00:00 2001
-From: Max Zerzouri <maxdamantus@gmail.com>
-Date: Tue, 25 May 2021 22:59:59 +1200
-Subject: [PATCH] Correct UTF-8 and UTF-16 errors during concatenation
-
-UTF-8 errors and UTF-16 errors that were previously encoded into the
-ends of
-strings will now potentially be used to form correct code points.
-
-This is mostly a matter of making string equality behave expectedly, since
-without this normalisation, it is possible to produce `jv` strings that are
-converted to UTF-8 or UTF-16 the same way but are not equal due well-formed
-code units that may or may not be encoded as errors.
----
- src/jv.c         |  13 ++-
- src/jv_unicode.c | 248 ++++++++++++++++++++++++++++++++++++++---------
- src/jv_unicode.h |   3 +
- tests/jq.test    |  15 +++
- 4 files changed, 230 insertions(+), 49 deletions(-)
-
-diff --git a/src/jv.c b/src/jv.c
-index e979cc6..67d86fb 100644
---- a/src/jv.c
-+++ b/src/jv.c
-@@ -522,20 +522,27 @@ static jv jvp_string_append(jv string, const char* data, uint32_t len) {
-   jvp_string* s = jvp_string_ptr(string);
-   uint32_t currlen = jvp_string_length(s);
- 
-+  char join_buf[4];
-+  int join_len = jvp_utf8_extended_join(s->data, &currlen, &data, &len, join_buf);
-+
-   if (jvp_refcnt_unshared(string.u.ptr) &&
--      jvp_string_remaining_space(s) >= len) {
-+      jvp_string_remaining_space(s) >= join_len + len) {
-     // the next string fits at the end of a
-+    memcpy(s->data + currlen, join_buf, join_len);
-+    currlen += join_len;
-     memcpy(s->data + currlen, data, len);
-     s->data[currlen + len] = 0;
-     s->length_hashed = (currlen + len) << 1;
-     return string;
-   } else {
-     // allocate a bigger buffer and copy
--    uint32_t allocsz = (currlen + len) * 2;
-+    uint32_t allocsz = (currlen + join_len + len) * 2;
-     if (allocsz < 32) allocsz = 32;
-     jvp_string* news = jvp_string_alloc(allocsz);
--    news->length_hashed = (currlen + len) << 1;
-+    news->length_hashed = (currlen + join_len + len) << 1;
-     memcpy(news->data, s->data, currlen);
-+    memcpy(news->data + currlen, join_buf, join_len);
-+    currlen += join_len;
-     memcpy(news->data + currlen, data, len);
-     news->data[currlen + len] = 0;
-     jvp_string_free(string);
-diff --git a/src/jv_unicode.c b/src/jv_unicode.c
-index 8c47536..7d67300 100644
---- a/src/jv_unicode.c
-+++ b/src/jv_unicode.c
-@@ -1,8 +1,72 @@
- #include <stdio.h>
-+#include <string.h>
- #include <assert.h>
- #include "jv_unicode.h"
- #include "jv_utf8_tables.h"
- 
-+// length of encoding of erroneous UTF-8 byte
-+#define UTF8_ERR_LEN 2
-+// length of encoding of erroneous UTF-16 surrogate
-+#define UTF16_ERR_LEN 3
-+
-+#define U32(a, b, c, d) ( \
-+  (uint32_t) (a) << 0 | \
-+  (uint32_t) (b) << 8 | \
-+  (uint32_t) (c) << 16 | \
-+  (uint32_t) (d) << 24 \
-+)
-+
-+#define BYTE(u32, n) ((uint32_t) (((u32) >> (n)*8) & 0xFF))
-+
-+#define B0 0x00 // 00000000
-+#define B1 0x80 // 10000000
-+#define B2 0xC0 // 11000000
-+#define B3 0xE0 // 11100000
-+#define B4 0xF0 // 11110000
-+#define B5 0xF8 // 11111000
-+
-+// NOTE: these flags are likely to be optimised out as `decode` gets inlined
-+enum decode_flags {
-+  DECODE_1 = 1,
-+  DECODE_2 = 2,
-+  DECODE_3 = 8,
-+  DECODE_4 = 16
-+};
-+
-+// decode up to 4 bytes of "generalised UTF-8"; no checking for overlong
-+// codings or out-of-range code points, works by testing all fixed bits in each
-+// of the 4 coding patterns, then shifting the value bits according to the
-+// pattern
-+static int decode(enum decode_flags flags, uint32_t data, int* codepoint_ret) {
-+  if((flags & DECODE_1) && (data & U32(B1, B0, B0, B0)) == 0){
-+    *codepoint_ret = BYTE(data, 0);
-+    return 1;
-+  }
-+  if((flags & DECODE_2) && (data & U32(B3, B2, B0, B0)) == U32(B2, B1, B0, B0)){
-+    *codepoint_ret =
-+      (BYTE(data, 0) & ~B3) << 6 |
-+      (BYTE(data, 1) & ~B2) << 0;
-+    return 2;
-+  }
-+  if((flags & DECODE_3) && (data & U32(B4, B2, B2, B0)) == U32(B3, B1, B1, B0)){
-+    *codepoint_ret =
-+      (BYTE(data, 0) & ~B4) << 12 |
-+      (BYTE(data, 1) & ~B2) << 6 |
-+      (BYTE(data, 2) & ~B2) << 0;
-+    return 3;
-+  }
-+  if((flags & DECODE_4) && (data & U32(B5, B2, B2, B2)) == U32(B4, B1, B1, B1)){
-+    *codepoint_ret =
-+      (BYTE(data, 0) & ~B5) << 18 |
-+      (BYTE(data, 1) & ~B2) << 12 |
-+      (BYTE(data, 2) & ~B2) << 6 |
-+      (BYTE(data, 3) & ~B2) << 0;
-+    return 4;
-+  }
-+  *codepoint_ret = -1;
-+  return 1;
-+}
-+
- // jvp_utf8_backtrack returns the beginning of the last codepoint in the
- // string, assuming that start is the last byte in the string.
- // If the last codepoint is incomplete, returns the number of missing bytes via
-@@ -81,56 +145,42 @@ const char* jvp_utf8_extended_next(const char* in, const char* end, enum jvp_utf
-   if (in == end) {
-     return 0;
-   }
--  int codepoint = -1;
--  unsigned char first = (unsigned char)in[0];
--  int length = utf8_coding_length[first];
--  if ((first & 0x80) == 0) {
-+  uint32_t data = in[0] & 0xFF;
-+  if ((data & B1) == 0) {
-     /* Fast-path for ASCII */
--    codepoint = first;
--    length = 1;
--  } else if (length == 0 || length == UTF8_CONTINUATION_BYTE) {
--    /* Bad single byte - either an invalid byte or an out-of-place continuation byte */
--    if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: bad single byte");
--    length = 1;
--  } else if (in + length > end) {
--    /* String ends before UTF8 sequence ends */
--    if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: underrun");
--    length = end - in;
--  } else {
--    codepoint = ((unsigned)in[0]) & utf8_coding_bits[first];
--    for (int i=1; i<length; i++) {
--      unsigned ch = (unsigned char)in[i];
--      if (utf8_coding_length[ch] != UTF8_CONTINUATION_BYTE){
--        /* Invalid UTF8 sequence - not followed by the right number of continuation bytes */
--        if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: wrong bytes");
--        codepoint = -1;
--        length = i;
--        break;
--      }
--      codepoint = (codepoint << 6) | (ch & 0x3f);
--    }
--    if (codepoint < utf8_first_codepoint[length]) {
--      /* Overlong UTF8 sequence */
--      if ((flags & JVP_UTF8_ERRORS_UTF8) && 0x00 <= codepoint && codepoint <= 0x7F) {
--        /* UTF-8 error is emitted as a negative codepoint */
--        codepoint = -(codepoint + 0x80);
--      } else {
--        if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: overlong");
--        codepoint = -1;
--      }
--    }
--    if (0xD800 <= codepoint && codepoint <= 0xDFFF) {
--      /* Surrogate codepoints are allowed in WTF-8/WTF-8b */
--      if (!(flags & JVP_UTF8_ERRORS_UTF16)) {
--        /* Surrogate codepoints can't be encoded in UTF8 */
--        codepoint = -1;
--      }
-+    *codepoint_ret = data;
-+    return in + 1;
-+  }
-+  switch (end - in) {
-+    default: // fall through
-+    case 4: data |= (uint32_t)(in[3] & 0xFF) << 24; // fall through
-+    case 3: data |= (uint32_t)(in[2] & 0xFF) << 16; // fall through
-+    case 2: data |= (uint32_t)(in[1] & 0xFF) << 8; // fall through
-+    case 1: break;
-+  }
-+  int codepoint;
-+  int length = decode(DECODE_2 | DECODE_3 | DECODE_4, data, &codepoint);
-+  if (codepoint == -1) {
-+    if (flags & JVP_UTF8_ERRORS_UTF8) assert(0 && "Invalid WTF-8b sequence: no match");
-+  } else if (codepoint < utf8_first_codepoint[length]) {
-+    /* Overlong UTF-8 sequence */
-+    if ((flags & JVP_UTF8_ERRORS_UTF8) && length == UTF8_ERR_LEN && 0x00 <= codepoint && codepoint <= 0x7F) {
-+      /* UTF-8 error is emitted as a negative codepoint */
-+      codepoint = -(codepoint + 0x80);
-+    } else {
-+      if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: overlong");
-+      codepoint = -1;
-     }
--    if (codepoint > 0x10FFFF) {
--      /* Outside Unicode range */
--      if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: out of range");
-+  } else if (0xD800 <= codepoint && codepoint <= 0xDFFF) {
-+    /* Surrogate codepoints are allowed in WTF-8/WTF-8b */
-+    if (!(flags & JVP_UTF8_ERRORS_UTF16)) {
-+      /* Surrogate codepoints can't be encoded in UTF8 */
-       codepoint = -1;
-     }
-+  } else if (codepoint > 0x10FFFF) {
-+    /* Outside Unicode range */
-+    if (flags & JVP_UTF8_ERRORS_ALL) assert(0 && "Invalid WTF-8b sequence: out of range");
-+    codepoint = -1;
-   }
-   if (codepoint == -1 && (flags & JVP_UTF8_REPLACE))
-     codepoint = 0xFFFD; // U+FFFD REPLACEMENT CHARACTER
-@@ -139,6 +189,112 @@ const char* jvp_utf8_extended_next(const char* in, const char* end, enum jvp_utf
-   return in + length;
- }
- 
-+// assumes two bytes are readable from `in`
-+static int decode_utf8_error(const char* in) {
-+  uint32_t data = U32(in[0]  & 0xFF, in[1] & 0xFF, 0, 0);
-+  int codepoint;
-+  if (decode(DECODE_2, data, &codepoint) == UTF8_ERR_LEN && codepoint < 0x80)
-+    return codepoint + 0x80;
-+  return -1;
-+}
-+
-+// assumes three bytes are readable from `in`
-+static int decode_utf16_error(const char* in) {
-+  uint32_t data = U32(in[0] & 0xFF, in[1] & 0xFF, in[2] & 0xFF, 0);
-+  int codepoint;
-+  if (decode(DECODE_3, data, &codepoint) == UTF16_ERR_LEN && codepoint >= 0xD800 && codepoint < 0xDFFF)
-+    return codepoint;
-+  return -1;
-+}
-+
-+// jvp_utf8_extended_join attempts to turn errors at the end of `a` and the
-+// beginning of `b` into a valid code point. if a correction is possible,
-+// `*alen_io`, `*bstart_io` and `*blen_io` are updated to exclude the existing
-+// errors, and the UTF-8 encoding of the code point to insert is stored in
-+// `out`. the number of bytes that should be inserted from `out` into the
-+// middle of the strings is returned (up to 4). this will be 0 if there are no
-+// bytes to insert.
-+int jvp_utf8_extended_join(const char* astart, uint32_t* alen_io, const char** bstart_io, uint32_t* blen_io, char* out) {
-+  const char* aend = astart + *alen_io;
-+  const char* bstart = *bstart_io;
-+  const char* bend = bstart + *blen_io;
-+  int bcp;
-+  bstart = jvp_utf8_extended_next(bstart, bend, JVP_UTF8_ERRORS_ALL, &bcp);
-+  if (!bstart) {
-+    // end of string
-+    return 0;
-+  }
-+  if (bcp >= 0xDC00 && bcp <= 0xDFFF) {
-+    // UTF-16 tail surrogate, look for lead surrogate at the end of `a`
-+    assert(bstart == *bstart_io + UTF16_ERR_LEN);
-+    if (aend - astart < UTF16_ERR_LEN)
-+      return 0;
-+    int acp = decode_utf16_error(aend - UTF16_ERR_LEN);
-+    if (acp >= 0xD800 && acp <= 0xDBFF) {
-+      // UTF-16 lead surrogate, decode matching UTF-16 pair
-+      *alen_io -= UTF16_ERR_LEN;
-+      *blen_io -= UTF16_ERR_LEN;
-+      *bstart_io += UTF16_ERR_LEN;
-+      int codepoint = 0x10000 + (((acp - 0xD800) << 10) | (bcp - 0xDC00));
-+      return jvp_utf8_encode(codepoint, out);
-+    }
-+    return 0;
-+  }
-+  if (bcp >= -0xFF && bcp <= -0x80) {
-+    // UTF-8 error, if it's a continuation byte, search backwards in `a` for the leading byte
-+    bcp = -bcp;
-+    assert(bstart == *bstart_io + UTF8_ERR_LEN);
-+    if (utf8_coding_length[bcp] != UTF8_CONTINUATION_BYTE)
-+      return 0;
-+    // if there's a correctable error, we will consume up to 4 encoded error bytes total, with up to 3 bytes from each of `a` and `b`
-+    unsigned char buf[6];
-+    unsigned char* bufstart = buf + 3;
-+    unsigned char* bufend = bufstart;
-+    *bufend++ = bcp;
-+    int length;
-+    // search backwards in `a` for a leading byte
-+    for (;;) {
-+      if (aend - astart < UTF8_ERR_LEN)
-+        return 0; // `a` is too short
-+      int acp = decode_utf8_error(aend - UTF8_ERR_LEN);
-+      if (acp == -1)
-+        return 0; // not a UTF-8 error
-+      aend -= UTF8_ERR_LEN;
-+      length = utf8_coding_length[acp];
-+      if (length == 0)
-+        return 0; // not a possible UTF-8 byte
-+      *--bufstart = acp;
-+      if (length != UTF8_CONTINUATION_BYTE)
-+        break; // found leading byte
-+      if (bufstart == buf)
-+        return 0; // too many continuation bytes
-+    }
-+    if (bufend - bufstart > length)
-+      return 0; // too many continuation bytes
-+    // search forwards in `b` for any more needed continuation bytes
-+    while (bufend - bufstart < length) {
-+      if (bend - bstart < UTF8_ERR_LEN)
-+        return 0; // `b` is too short
-+      bcp = decode_utf8_error(bstart);
-+      if (bcp == -1 || utf8_coding_length[bcp] != UTF8_CONTINUATION_BYTE)
-+        return 0; // not a UTF-8 error, didn't find enough continuation bytes
-+      bstart += UTF8_ERR_LEN;
-+      *bufend++ = bcp;
-+    }
-+    int codepoint;
-+    // check that the bytes are strict UTF-8
-+    jvp_utf8_extended_next((char*)bufstart, (char*)bufend, 0, &codepoint);
-+    if (codepoint != -1) {
-+      memcpy(out, bufstart, 4);
-+      *alen_io = aend - astart;
-+      *blen_io = bend - bstart;
-+      *bstart_io = bstart;
-+      return bufend - bufstart;
-+    }
-+  }
-+  return 0;
-+}
-+
- int jvp_utf8_is_valid(const char* in, const char* end) {
-   int codepoint;
-   while ((in = jvp_utf8_extended_next(in, end, 0, &codepoint))) {
-diff --git a/src/jv_unicode.h b/src/jv_unicode.h
-index 37c7fc0..ff2a437 100644
---- a/src/jv_unicode.h
-+++ b/src/jv_unicode.h
-@@ -1,6 +1,8 @@
- #ifndef JV_UNICODE_H
- #define JV_UNICODE_H
- 
-+#include <stdint.h>
-+
- enum jvp_utf8_flags {
-   /* Emit replacement character instead of -1 for errors */
-   JVP_UTF8_REPLACE = 1,
-@@ -14,6 +16,7 @@ enum jvp_utf8_flags {
- const char* jvp_utf8_backtrack(const char* start, const char* min, int *missing_bytes);
- const char* jvp_utf8_extended_next(const char* in, const char* end, enum jvp_utf8_flags flags, int* codepoint);
- const char* jvp_utf8_next(const char* in, const char* end, int* codepoint);
-+int jvp_utf8_extended_join(const char* astart, uint32_t* alen, const char** bstart, uint32_t* blen, char* out);
- int jvp_utf8_is_valid(const char* in, const char* end);
- 
- int jvp_utf8_decode_length(char startchar);
-diff --git a/tests/jq.test b/tests/jq.test
-index c882fd2..9e6c896 100644
---- a/tests/jq.test
-+++ b/tests/jq.test
-@@ -62,6 +62,11 @@ null
- null
- "∀\ud800∃\udc00∅\udfff"
- 
-+# Check that unpaired surrogates are paired when concatenated
-+add
-+["\ud83d","\ude43","\ud83e","\udd11","\ud83e","\udd17","\ud83e","\udd14","\ud83e","\udd10","\ud83d","\ude44","\ud83e","\udd12","\ud83e","\udd15","\ud83e","\udd13","\ud83e","\udd16","\ud83e","\udd18","\ud83c","\udffb","\ud83c","\udffc"]
-+"🙃🤑🤗🤔🤐🙄🤒🤕🤓🤖🤘🏻🏼"
-+
- "inter\("pol" + "ation")"
- null
- "interpolation"
-@@ -87,6 +92,16 @@ null
- "Zm/Ds2Jhcgo="
- "foóbar\n"
- 
-+# test correction of UTF-8 errors when concatenating as binary data (input is a random sequence of code points)
-+. as $text | @base64 | . as $b64 | [range(0, 300)] | map($b64[(.*4):((. + 1)*4)] | @base64d) | add | . == $text
-+"򍨼衍򙮬񪜁򻴠󖂡󔁰񗏷󛊭񢠃򍧝𭌞󹰞󙴋𿋓󧜹򳔎񦰓򅆹򽐟󂑛򶃯㾱ꕽ񂊛򉙲򅤎􃖣󻣸󁸦򴏜򽃿􄑏󠦱񄛲񄕵񡿚򮩒񡏂򨆯򶚒󎮆󉨗򡮟򆿴񬏪򻀅㫑񉒗󴍶󬪸񝶑񂾑򇔣򉩉􂞇𲡀𨫆򤵇𲺝\u001c񖂟񳐉󲔹𳨬􀮔𸒙񜶻㊬񓐊񽒬󑀧󗧚󞌶󦥥𗌽𘀍󴼹􌇺򫗛񂷶󏷕񜁍񥬟󼁁󓺉𗟒򷝊𩕃񞝏񧄀󁲩򐀄򳂸񲊷򃀋񃫫𝷏򏖝򷂍󢭣􋛨𞪒򁁅勸󯩥󵪭񚮚򻡍騎񾊯򪓚񗡈񎕫򡯬񋫠ᕴ𞨹󾄇񩠶𙯾񢥱𚯴񬥷󢶖񾹌񡈟򧓑񒾘𚸯񳗺񭟡𫸬񷤖񷆐𖋌񦰃椀𫎾󗚋𿋆󈝰񺥲򝕊𵯮򙧚󬱃󍗞󱆃󂟙󟆺񻢬󸮤󗗉񉛮𺵡𰣒􁋙񻍛􇡘ᮍ񕥸񨵂盕嗪𻸮򶆍򊈤񽓎󙴐𗬜󾱒󷹰􇡈񨦎􏥩񴲡𨑮򱏝𭢊󕁶򣙥󶡮󮰌󿙾氕񼻘􆔪񢕀񊿃󮨝񑛖󣴊󎎏򳞓㊁󒭀󇜳𯄌𻙩"
-+true
-+
-+# test preservation of binary data when concatenating (input is a random sequence of UTF-16 surrogates encoded in WTF-8, should be treated as regular UTF-8 errors)
-+@base64d | . as $text | @base64 | . as $b64 | [range(0, 300)] | map($b64[(.*4):((. + 1)*4)] | @base64d) | add | . == $text
-+"7bKv7aiz7auX7aG37aO77aOe7auy7bmm7bqk7aG87bSH7a6m7bmc7bum7bqj7au+7bqf7aap7buC7byq7aS37aCp7aSl7a+a7bur7aGV7bGl7b6M7biB7aOe7ayR7amW7aOX7b637a+P7bu+7ayP7bOw7ba/7ayp7b6G7aqd7bG37bK57b6O7bq27a+u7a2N7ayu7bKK"
-+true
-+
- @uri
- "\u03bc"
- "%CE%BC"
diff --git a/Update-base64-utf8bytelength-and-fromjson-to-handlebinary-strings.patch b/Update-base64-utf8bytelength-and-fromjson-to-handlebinary-strings.patch
deleted file mode 100644
index f3cd1dcf863aa1f9f21f8c98201015d360abca9f..0000000000000000000000000000000000000000
--- a/Update-base64-utf8bytelength-and-fromjson-to-handlebinary-strings.patch
+++ /dev/null
@@ -1,210 +0,0 @@
-From a6ccbaad05bea30c5700b10bd51e46d390496a9b Mon Sep 17 00:00:00 2001
-From: Max Zerzouri <maxdamantus@gmail.com>
-Date: Sun, 16 May 2021 09:18:51 +0000
-Subject: [PATCH] Update `@base64`, `utf8bytelength` and `fromjson` to handle
- binary strings
-
----
- docs/content/3.manual/manual.yml |   1 -
- src/builtin.c                    | 107 ++++++++++++++++++++++++++-----
- tests/base64.test                |  10 +++
- tests/shtest                     |  19 ++++--
- 4 files changed, 116 insertions(+), 21 deletions(-)
-
-diff --git a/docs/content/3.manual/manual.yml b/docs/content/3.manual/manual.yml
-index bfb17f4..1258dbf 100644
---- a/docs/content/3.manual/manual.yml
-+++ b/docs/content/3.manual/manual.yml
-@@ -1843,7 +1843,6 @@ sections:
-           * `@base64d`:
- 
-             The inverse of `@base64`, input is decoded as specified by RFC 4648.
--            Note\: If the decoded string is not UTF-8, the results are undefined.
- 
-           This syntax can be combined with string interpolation in a
-           useful way. You can follow a `@foo` token with a string
-diff --git a/src/builtin.c b/src/builtin.c
-index c6c8c2e..975bf49 100644
---- a/src/builtin.c
-+++ b/src/builtin.c
-@@ -409,10 +409,55 @@ static jv f_dump(jq_state *jq, jv input) {
- static jv f_json_parse(jq_state *jq, jv input) {
-   if (jv_get_kind(input) != JV_KIND_STRING)
-     return type_error(input, "only strings can be parsed");
--  jv res = jv_parse_sized(jv_string_value(input),
--                          jv_string_length_bytes(jv_copy(input)));
-+
-+  const char* i = jv_string_value(input);
-+  const char* end = i + jv_string_length_bytes(jv_copy(input));
-+
-+  struct jv_parser* parser = jv_parser_new(0);
-+  int count = 0;
-+  jv value = jv_invalid();
-+  while (i != NULL) {
-+    const int max_utf8_len = 4;
-+    unsigned char buf[100 + max_utf8_len];
-+    int buflen = 0;
-+    int c;
-+    while ((buflen + max_utf8_len < sizeof(buf)) && (i = jvp_utf8_extended_next(i, end, JVP_UTF8_REPLACE | JVP_UTF8_ERRORS_UTF8, &c))) {
-+      if (c >= -0xFF && c <= -0x80) {
-+        // Invalid UTF-8 byte, pass through
-+        buf[buflen++] = -c;
-+      } else
-+        buflen += jvp_utf8_encode(c, buf + buflen);
-+    }
-+    jv_parser_set_buf(parser, buf, buflen, i != NULL);
-+    for (;;) {
-+      jv next = jv_parser_next(parser);
-+      if (!jv_is_valid(next)) {
-+        if (jv_invalid_has_msg(jv_copy(next))) {
-+          count++;
-+          jv_free(value);
-+          value = next;
-+          i = NULL;
-+        }
-+        break;
-+      }
-+      jv_free(value);
-+      if (count++ == 0)
-+        value = next;
-+      else {
-+        jv_free(next);
-+        value = jv_invalid_with_msg(jv_string("Unexpected extra JSON values"));
-+        i = NULL;
-+        break;
-+      }
-+    }
-+  }
-+  jv_parser_free(parser);
-   jv_free(input);
--  return res;
-+  if (count == 0) {
-+    jv_free(value);
-+    value = jv_invalid_with_msg(jv_string("Expected JSON value"));
-+  }
-+  return value;
- }
- 
- static jv f_tonumber(jq_state *jq, jv input) {
-@@ -457,7 +502,19 @@ static jv f_tostring(jq_state *jq, jv input) {
- static jv f_utf8bytelength(jq_state *jq, jv input) {
-   if (jv_get_kind(input) != JV_KIND_STRING)
-     return type_error(input, "only strings have UTF-8 byte length");
--  return jv_number(jv_string_length_bytes(input));
-+  const char* i = jv_string_value(input);
-+  const char* end = i + jv_string_length_bytes(jv_copy(input));
-+  int len = 0;
-+  int c;
-+  while ((i = jvp_utf8_extended_next(i, end, JVP_UTF8_REPLACE | JVP_UTF8_ERRORS_UTF8, &c))) {
-+    if (c >= -0xFF && c <= -0x80) {
-+      // Invalid UTF-8 byte, will be passed through
-+      len++;
-+    } else
-+      len += jvp_utf8_encode_length(c);
-+  }
-+  jv_free(input);
-+  return jv_number(len);
- }
- 
- #define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
-@@ -632,21 +689,41 @@ static jv f_format(jq_state *jq, jv input, jv fmt) {
-     jv_free(fmt);
-     input = f_tostring(jq, input);
-     jv line = jv_string("");
--    const unsigned char* data = (const unsigned char*)jv_string_value(input);
--    int len = jv_string_length_bytes(jv_copy(input));
--    for (int i=0; i<len; i+=3) {
--      uint32_t code = 0;
--      int n = len - i >= 3 ? 3 : len-i;
--      for (int j=0; j<3; j++) {
-+    const char* i = jv_string_value(input);
-+    const char* end = i + jv_string_length_bytes(jv_copy(input));
-+    uint32_t code = 0;
-+    int n = 0;
-+    int c;
-+    while ((i = jvp_utf8_extended_next(i, end, JVP_UTF8_REPLACE | JVP_UTF8_ERRORS_UTF8, &c))) {
-+      unsigned char ubuf[4];
-+      int len = 0;
-+      if (c >= -0xFF && c <= -0x80) {
-+        // Invalid UTF-8 byte, pass through
-+        ubuf[len++] = -c;
-+      } else
-+        len += jvp_utf8_encode(c, ubuf);
-+      for (int x = 0; x < len; x++) {
-         code <<= 8;
--        code |= j < n ? (unsigned)data[i+j] : 0;
-+        code |= ubuf[x];
-+        if (++n == 3) {
-+          char buf[4];
-+          for (int j = 0; j < 4; j++)
-+            buf[j] = BASE64_ENCODE_TABLE[(code >> (18 - j*6)) & 0x3f];
-+          line = jv_string_append_buf(line, buf, sizeof(buf));
-+          n = 0;
-+          code = 0;
-+        }
-       }
-+    }
-+    if (n > 0) {
-+      assert(n < 3);
-+      code <<= 8*(3 - n);
-       char buf[4];
--      for (int j=0; j<4; j++) {
-+      for (int j = 0; j < 4; j++)
-         buf[j] = BASE64_ENCODE_TABLE[(code >> (18 - j*6)) & 0x3f];
--      }
--      if (n < 3) buf[3] = '=';
--      if (n < 2) buf[2] = '=';
-+      buf[3] = '=';
-+      if (n < 2)
-+        buf[2] = '=';
-       line = jv_string_append_buf(line, buf, sizeof(buf));
-     }
-     jv_free(input);
-diff --git a/tests/base64.test b/tests/base64.test
-index 0f82b0b..6507bb8 100644
---- a/tests/base64.test
-+++ b/tests/base64.test
-@@ -33,3 +33,13 @@
- . | try @base64d catch .
- "QUJDa"
- "string (\"QUJDa\") trailing base64 byte found"
-+
-+# random binary data
-+(. | @base64d | @base64) == .
-+"zns0Su1i4JjDfGiR95WOcU8iiPMOrfJTUBm9P1ot2qIMiyk04b0WSIFNTMD7w9ziMV8nSbwpPqNl3JKF1eWZrRRg24rbvh66O1e7Z1xIGPNqTqm+jdzRCkWSryR+67wXRVgD6Q=="
-+true
-+
-+# replace lone surrogates
-+@base64
-+"foo\udca9\ud83dbar"
-+"Zm9v77+977+9YmFy"
-diff --git a/tests/shtest b/tests/shtest
-index 4c8b57e..7de61e4 100755
---- a/tests/shtest
-+++ b/tests/shtest
-@@ -131,11 +131,20 @@ cmp $d/out $d/expected
- 
- 
- clean=false
--# Invalid UTF-8 bytes are preserved when encoding/decoding JSON
--dd if=/dev/urandom bs=1024 count=1024 >$d/rand 2>/dev/null
--$VALGRIND $Q $JQ -sR . $d/rand >$d/out.json
--$VALGRIND $Q $JQ -j . $d/out.json >$d/out
--cmp $d/out $d/rand
-+# Invalid UTF-8 bytes are preserved when encoding/decoding JSON and base64 and concatenating binary strings
-+if dd if=/dev/urandom bs=1024 count=1024 >$d/rand 2>/dev/null; then
-+    $VALGRIND $Q $JQ -sR . $d/rand >$d/out.json
-+    $VALGRIND $Q $JQ -j . $d/out.json >$d/out
-+    cmp $d/out $d/rand
-+    $VALGRIND $Q $JQ -jR fromjson $d/out.json >$d/out
-+    cmp $d/out $d/rand
-+    $VALGRIND $Q $JQ -j '@base64 | @base64d' $d/out.json >$d/out
-+    cmp $d/out $d/rand
-+    base64 $d/rand | $VALGRIND $Q $JQ -R '@base64d' | $VALGRIND $Q $JQ -sj 'add' >$d/out
-+    cmp $d/out $d/rand
-+    $VALGRIND $Q $JQ -nj '$a' --rawfile a $d/rand >$d/out
-+    cmp $d/out $d/rand
-+fi
- clean=true
- 
- 
diff --git a/jq.spec b/jq.spec
index 37538216d0388ddcbc976ca29854f4e7cb363d8b..02a26b7229611717fe64c8fe60804d76169d310e 100644
--- a/jq.spec
+++ b/jq.spec
@@ -1,10 +1,12 @@
 Name:           jq
 Version:        1.7.1
-Release:        2
+Release:        3
 Summary:        A lightweight and flexible command-line JSON processor
 License:        MIT and ASL 2.0 and CC-BY-3.0 and GPLv3
 URL:            http://stedolan.github.io/jq/
 Source0:        https://github.com/jqlang/jq/releases/download/jq-%{version}/jq-%{version}.tar.gz
+Patch0:         CVE-2024-53427-pre.patch
+Patch1:         CVE-2024-53427.patch
 BuildRequires:  make flex bison gcc chrpath oniguruma-devel
 %ifarch %{valgrind_arches}
 BuildRequires:  valgrind
@@ -74,6 +76,9 @@ make check
 
 
 %changelog
+* Fri Mar 07 2025 yaoxin <1024769339@qq.com> - 1.7.1-3
+- Fix CVE-2024-53427
+
 * Mon Sep 09 2024 laokz <zhangkai@iscas.ac.cn> - 1.7.1-2
 - Let valgrind depend on system arch macro