Solution #110fbd0b-2722-4252-9cf8-009ff962cddd
failedScore
0% (0/5)
Runtime
3.50ms
Delta
-100.0% vs parent
-100.0% vs best
Regression from parent
Score
0% (0/5)
Runtime
3.50ms
Delta
-100.0% vs parent
-100.0% vs best
Regression from parent
def solve(input):
data = input.get("data", "")
if not isinstance(data, str):
data = str(data)
raw = data.encode("utf-8")
n = len(raw)
if n == 0:
return 0.0
def vlen(x):
c = 1
while x >= 128:
x >>= 7
c += 1
return c
def putv(x, out):
while x >= 128:
out.append((x & 127) | 128)
x >>= 7
out.append(x)
def getv(buf, i):
val = 0
shift = 0
while i < len(buf):
b = buf[i]
i += 1
val |= (b & 127) << shift
if b < 128:
return val, i
shift += 7
if shift > 63:
return None, i
return None, i
# Novel approach:
# Build several candidate encodings and score by actual serialized size.
# This is meaningfully different from prior single-parser attempts.
#
# Format A: compact bit-packed RLE of bits for binary-like data.
# Header byte 255, then original bit-length varint, first bit byte, then run lengths varints.
#
# Format B: BWT + MTF + zero-run encoding + Huffman on transformed stream.
# Header byte 254, then metadata and canonical Huffman payload.
#
# Format C: raw literal fallback.
# Header byte 0, then raw bytes.
#
# We choose smallest valid encoding, then fully decode and verify.
def encode_raw(buf):
out = bytearray()
out.append(0)
out.extend(buf)
return out
def decode_raw(buf):
if not buf or buf[0] != 0:
return None
return bytes(buf[1:])
def encode_bit_rle(buf):
if n < 8:
return None
ones = 0
for b in buf:
x = b
while x:
x &= x - 1
ones += 1
total_bits = len(buf) * 8
zeros = total_bits - ones
if ones == 0 or zeros == 0:
pass
elif min(ones, zeros) * 8 > total_bits * 3:
return None
bits0 = (buf[0] >> 7) & 1
runs = []
cur = bits0
cnt = 0
for byte in buf:
for s in (7, 6, 5, 4, 3, 2, 1, 0):
bit = (byte >> s) & 1
if bit == cur:
cnt += 1
else:
runs.append(cnt)
cur = bit
cnt = 1
runs.append(cnt)
out = bytearray()
out.append(255)
putv(total_bits, out)
out.append(bits0)
putv(len(runs), out)
for r in runs:
putv(r, out)
return out
def decode_bit_rle(buf):
if not buf or buf[0] != 255:
return None
i = 1
total_bits, i = getv(buf, i)
if total_bits is None or i >= len(buf):
return None
cur = buf[i]
i += 1
if cur not in (0, 1):
return None
rcnt, i = getv(buf, i)
if rcnt is None:
return None
bits = []
produced = 0
for _ in range(rcnt):
r, i = getv(buf, i)
if r is None or r < 0:
return None
produced += r
if produced > total_bits:
return None
if r:
bits.extend([cur] * r)
cur ^= 1
if produced != total_bits or i != len(buf):
return None
if total_bits % 8 != 0:
return None
out = bytearray(total_bits // 8)
p = 0
for bi in range(len(out)):
x = 0
for _ in range(8):
x = (x << 1) | bits[p]
p += 1
out[bi] = x
return bytes(out)
def suffix_array_cyclic(s):
m = len(s)
sa = list(range(m))
rank = list(s)
tmp = [0] * m
k = 1
while k < m:
sa.sort(key=lambda i: (rank[i], rank[(i + k) % m]))
tmp[sa[0]] = 0
classes = 1
for j in range(1, m):
a = sa[j - 1]
b = sa[j]
if rank[a] != rank[b] or rank[(a + k) % m] != rank[(b + k) % m]:
classes += 1
tmp[b] = classes - 1
rank, tmp = tmp, rank
if classes == m:
break
k <<= 1
return sa
def bwt_transform(buf):
m = len(buf)
if m == 1:
return bytes(buf), 0
sa = suffix_array_cyclic(buf)
out = bytearray(m)
primary = 0
for idx, p in enumerate(sa):
if p == 0:
primary = idx
out[idx] = buf[m - 1]
else:
out[idx] = buf[p - 1]
return bytes(out), primary
def bwt_inverse(last, primary):
m = len(last)
if m == 0:
return b""
counts = [0] * 256
for b in last:
counts[b] += 1
starts = [0] * 256
s = 0
for c in range(256):
starts[c] = s
s += counts[c]
occ = [0] * m
seen = [0] * 256
for i, b in enumerate(last):
occ[i] = seen[b]
seen[b] += 1
nxt = [0] * m
for i, b in enumerate(last):
nxt[starts[b] + occ[i]] = i
res = bytearray(m)
j = primary
for k in range(m - 1, -1, -1):
res[k] = last[j]
j = nxt[j]
return bytes(res)
def mtf_encode(buf):
table = list(range(256))
pos = list(range(256))
out = []
for b in buf:
p = pos[b]
out.append(p)
if p:
x = table[p]
while p > 0:
y = table[p - 1]
table[p] = y
pos[y] = p
p -= 1
table[0] = x
pos[x] = 0
return out
def mtf_decode(vals):
table = list(range(256))
out = bytearray()
for p in vals:
if p < 0 or p >= 256:
return None
x = table[p]
out.append(x)
if p:
while p > 0:
table[p] = table[p - 1]
p -= 1
table[0] = x
return bytes(out)
def zrle_encode(vals):
out = []
z = 0
for v in vals:
if v == 0:
z += 1
else:
while z > 0:
chunk = z if z < 65535 else 65535
out.append(256)
out.append(chunk)
z -= chunk
out.append(v - 1)
while z > 0:
chunk = z if z < 65535 else 65535
out.append(256)
out.append(chunk)
z -= chunk
return out
def zrle_decode(tokens):
vals = []
i = 0
while i < len(tokens):
t = tokens[i]
i += 1
if t == 256:
if i >= len(tokens):
return None
run = tokens[i]
i += 1
if run < 0:
return None
vals.extend([0] * run)
else:
vals.append(t + 1)
return vals
def build_huffman(freq):
syms = [i for i, f in enumerate(freq) if f > 0]
if not syms:
return [0] * len(freq)
if len(syms) == 1:
lengths = [0] * len(freq)
lengths[syms[0]] = 1
return lengths
nodes = []
for s in syms:
nodes.append([freq[s], -1, -1, s])
active = list(range(len(nodes)))
while len(active) > 1:
active.sort(key=lambda idx: nodes[idx][0])
a = active.pop(0)
b = active.pop(0)
nodes.append([nodes[a][0] + nodes[b][0], a, b, -1])
active.append(len(nodes) - 1)
root = active[0]
lengths = [0] * len(freq)
stack = [(root, 0)]
while stack:
idx, d = stack.pop()
f, l, r, s = nodes[idx]
if s >= 0:
lengths[s] = d if d else 1
else:
stack.append((l, d + 1))
stack.append((r, d + 1))
return lengths
def canonical_codes(lengths):
pairs = [(l, s) for s, l in enumerate(lengths) if l > 0]
pairs.sort()
codes = {}
code = 0
prev_len = 0
for l, s in pairs:
code <<= (l - prev_len)
codes[s] = (code, l)
code += 1
prev_len = l
return codes, pairs
def encode_huffman_stream(tokens, alphabet):
freq = [0] * alphabet
for t in tokens:
if t < 0 or t >= alphabet:
return None
freq[t] += 1
lengths = build_huffman(freq)
codes, pairs = canonical_codes(lengths)
out = bytearray()
putv(alphabet, out)
used = [(s, lengths[s]) for s in range(alphabet) if lengths[s] > 0]
putv(len(used), out)
for s, l in used:
putv(s, out)
out.append(l)
bitbuf = 0
bitcnt = 0
payload = bytearray()
for t in tokens:
code, l = codes[t]
bitbuf |= code << bitcnt
bitcnt += l
while bitcnt >= 8:
payload.append(bitbuf & 255)
bitbuf >>= 8
bitcnt -= 8
if bitcnt:
payload.append(bitbuf & 255)
putv(len(tokens), out)
putv(len(payload), out)
out.extend(payload)
return out
def decode_huffman_stream(buf, i):
alphabet, i = getv(buf, i)
if alphabet is None or alphabet <= 0:
return None, i
usedn, i = getv(buf, i)
if usedn is None:
return None, i
lengths = [0] * alphabet
for _ in range(usedn):
s, i = getv(buf, i)
if s is None or s < 0 or s >= alphabet or i >= len(buf):
return None, i
l = buf[i]
i += 1
if l <= 0:
return None, i
lengths[s] = l
count, i = getv(buf, i)
paylen, i = getv(buf, i)
if count is None or paylen is None or i + paylen > len(buf):
return None, i
payload = buf[i:i + paylen]
i += paylen
_, pairs = canonical_codes(lengths)
if not pairs:
return None, i
decode = {}
maxlen = 0
code = 0
prev_len = 0
for l, s in pairs:
code <<= (l - prev_len)
decode[(code, l)] = s
code += 1
prev_len = l
if l > maxlen:
maxlen = l
out = []
bitbuf = 0
bitcnt = 0
p = 0
cur = 0
curl = 0
while len(out) < count:
if curl > maxlen:
return None, i
if bitcnt == 0:
if p >= len(payload):
return None, i
bitbuf = payload[p]
p += 1
bitcnt = 8
bit = bitbuf & 1
bitbuf >>= 1
bitcnt -= 1
cur |= bit << curl
curl += 1
key = (cur, curl)
if key in decode:
out.append(decode[key])
cur = 0
curl = 0
return out, i
def encode_bwt_mtf_huff(buf):
if len(buf) <= 1:
return None
last, primary = bwt_transform(buf)
mtf = mtf_encode(last)
tokens = zrle_encode(mtf)
out = bytearray()
out.append(254)
putv(len(buf), out)
putv(primary, out)
h = encode_huffman_stream(tokens, 257)
if h is None:
return None
out.extend(h)
return out
def decode_bwt_mtf_huff(buf):
if not buf or buf[0] != 254:
return None
i = 1
m, i = getv(buf, i)
primary, i = getv(buf, i)
if m is None or primary is None or primary < 0 or primary >= m:
return None
tokens, i = decode_huffman_stream(buf, i)
if tokens is None or i != len(buf):
return None
mtf = zrle_decode(tokens)
if mtf is None or len(mtf) != m:
return None
last = mtf_decode(mtf)
if last is None or len(last) != m:
return None
return bwt_inverse(last, primary)
candidates = [encode_raw(raw)]
c1 = encode_bit_rle(raw)
if c1 is not None:
candidates.append(c1)
if n <= 4000:
c2 = encode_bwt_mtf_huff(raw)
if c2 is not None:
candidates.append(c2)
best = min(candidates, key=len)
try:
if best[0] == 0:
dec = decode_raw(best)
elif best[0] == 255:
dec = decode_bit_rle(best)
elif best[0] == 254:
dec = decode_bwt_mtf_huff(best)
else:
return 999.0
if dec != raw:
return 999.0
if dec.decode("utf-8") != data:
return 999.0
except:
return 999.0
return len(best) / n