Solution #223a4552-c8b8-4131-95d0-6d24274964ad
completedScore
54% (0/5)
Runtime
13.26ms
Delta
+5.3% vs parent
-43.7% vs best
Improved from parent
Score
54% (0/5)
Runtime
13.26ms
Delta
+5.3% vs parent
-43.7% vs best
Improved from parent
def solve(input):
try:
data = input.get("data", "")
if not isinstance(data, str):
data = str(data)
n = len(data)
if n == 0:
return 0.0
def vlen(x):
c = 1
while x >= 128:
x >>= 7
c += 1
return c
def build_suffix_array(s):
n = len(s)
sa = list(range(n))
rank = [ord(c) for c in s]
tmp = [0] * n
k = 1
while True:
sa.sort(key=lambda i: (rank[i], rank[i + k] if i + k < n else -1))
tmp[sa[0]] = 0
classes = 1
for i in range(1, n):
a = sa[i - 1]
b = sa[i]
ra1 = rank[a]
rb1 = rank[b]
ra2 = rank[a + k] if a + k < n else -1
rb2 = rank[b + k] if b + k < n else -1
if ra1 != rb1 or ra2 != rb2:
classes += 1
tmp[b] = classes - 1
rank, tmp = tmp, rank
if classes == n:
break
k <<= 1
return sa, rank
def build_lcp(s, sa, rank):
n = len(s)
lcp = [0] * (n - 1 if n > 1 else 0)
h = 0
for i in range(n):
r = rank[i]
if r == 0:
continue
j = sa[r - 1]
while i + h < n and j + h < n and s[i + h] == s[j + h]:
h += 1
lcp[r - 1] = h
if h:
h -= 1
return lcp
# Novel approach: true byte-cost parsing via suffix-array-assisted LZ with dynamic programming.
# We estimate compressed size in bytes using a simple token model:
# literal run: 1-byte tag + varint length + raw chars
# match: 1-byte tag + varint offset + varint length
#
# Then add specialized analytical schemes for pure runs and whole-string periodicity.
best = n
# Scheme A: exact RLE for consecutive equal chars
runs = []
i = 0
while i < n:
j = i + 1
while j < n and data[j] == data[i]:
j += 1
runs.append((data[i], j - i))
i = j
if len(runs) < n:
rle_cost = 0
for _, cnt in runs:
if cnt >= 4:
rle_cost += 1 + vlen(cnt) + 1
else:
rle_cost += cnt
if rle_cost < best:
# verify
enc = runs[:]
def dec_rle(enc2):
out = []
for ch, cnt in enc2:
out.append(ch * cnt)
return "".join(out)
if dec_rle(enc) == data:
best = rle_cost
# Scheme B: whole-string periodicity
pi = [0] * n
j = 0
for i in range(1, n):
while j > 0 and data[i] != data[j]:
j = pi[j - 1]
if data[i] == data[j]:
j += 1
pi[i] = j
p = n - pi[-1]
if p < n and n % p == 0:
period_cost = p + 1 + vlen(n // p)
if period_cost < best:
base = data[:p]
reps = n // p
def dec_period(obj):
b, r = obj
return b * r
if dec_period((base, reps)) == data:
best = period_cost
# Scheme C: suffix-array-guided LZ DP
if n >= 2:
sa, rank = build_suffix_array(data)
lcp = build_lcp(data, sa, rank)
# For each position, gather a few strong candidate matches from nearby suffixes.
cand = [[] for _ in range(n)]
def add_match(pos, prev, length):
if prev < 0 or prev >= pos or length < 2:
return
off = pos - prev
cand[pos].append((off, length))
W = 24 # inspect nearby suffixes; enough to capture strong repetitions cheaply
for pos in range(n):
r = rank[pos]
cur = 10**18
steps = 0
k = r - 1
while k >= 0 and steps < W:
cur = min(cur, lcp[k])
if cur < 2:
break
prev = sa[k]
if prev < pos:
add_match(pos, prev, cur)
k -= 1
steps += 1
cur = 10**18
steps = 0
k = r
m = len(lcp)
while k < m and steps < W:
cur = min(cur, lcp[k])
if cur < 2:
break
prev = sa[k + 1]
if prev < pos:
add_match(pos, prev, cur)
k += 1
steps += 1
# Keep only best few unique offsets, and a few shortened lengths near coding thresholds.
for i in range(n):
if not cand[i]:
continue
by_off = {}
for off, ln in cand[i]:
if ln > by_off.get(off, 0):
by_off[off] = ln
arr = sorted(by_off.items(), key=lambda x: (-(x[1] - (1 + vlen(x[0]) + vlen(x[1]))), x[0]))
trimmed = []
for off, ln in arr[:8]:
lens = {ln}
if ln > 2:
lens.add(2)
if ln > 3:
lens.add(3)
if ln > 4:
lens.add(4)
if ln > 8:
lens.add(8)
if ln > 16:
lens.add(16)
if ln > 32:
lens.add(32)
for L in sorted(lens):
if L <= ln and i + L <= n:
trimmed.append((off, L))
cand[i] = trimmed
INF = 10**18
dp = [INF] * (n + 1)
dp[n] = 0
# Bound literal runs for speed; long unique tails are still representable by chaining.
max_lit = 64
for i in range(n - 1, -1, -1):
# literals
lim = n if n - i < max_lit else i + max_lit
for j in range(i + 1, lim + 1):
cost = 1 + vlen(j - i) + (j - i) + dp[j]
if cost < dp[i]:
dp[i] = cost
# matches
for off, ln in cand[i]:
cost = 1 + vlen(off) + vlen(ln) + dp[i + ln]
if cost < dp[i]:
dp[i] = cost
lz_cost = dp[0]
if lz_cost < best:
# reconstruct and verify
out_tokens = []
i = 0
while i < n:
chosen = None
lim = n if n - i < max_lit else i + max_lit
for j in range(i + 1, lim + 1):
cost = 1 + vlen(j - i) + (j - i) + dp[j]
if cost == dp[i]:
chosen = ("L", data[i:j])
i = j
break
if chosen is None:
for off, ln in cand[i]:
cost = 1 + vlen(off) + vlen(ln) + dp[i + ln]
if cost == dp[i]:
chosen = ("M", off, ln)
i += ln
break
if chosen is None:
return 999.0
out_tokens.append(chosen)
def dec_lz(tokens):
out = []
for tok in tokens:
if tok[0] == "L":
out.append(tok[1])
else:
_, off, ln = tok
cur = "".join(out)
start = len(cur) - off
if start < 0:
return None
piece = []
for k in range(ln):
idx = start + k
if idx < len(cur):
piece.append(cur[idx])
else:
built = "".join(piece)
ref = cur + built
if idx >= len(ref):
return None
piece.append(ref[idx])
out.append("".join(piece))
return "".join(out)
if dec_lz(out_tokens) == data:
best = lz_cost
ratio = best / n
return float(ratio) if ratio >= 0 else 999.0
except:
return 999.0Score Difference
-42.3%
Runtime Advantage
13.13ms slower
Code Size
276 vs 34 lines
| # | Your Solution | # | Champion |
|---|---|---|---|
| 1 | def solve(input): | 1 | def solve(input): |
| 2 | try: | 2 | data = input.get("data", "") |
| 3 | data = input.get("data", "") | 3 | if not isinstance(data, str) or not data: |
| 4 | if not isinstance(data, str): | 4 | return 999.0 |
| 5 | data = str(data) | 5 | |
| 6 | 6 | # Mathematical/analytical approach: Entropy-based redundancy calculation | |
| 7 | n = len(data) | 7 | |
| 8 | if n == 0: | 8 | from collections import Counter |
| 9 | return 0.0 | 9 | from math import log2 |
| 10 | 10 | ||
| 11 | def vlen(x): | 11 | def entropy(s): |
| 12 | c = 1 | 12 | probabilities = [freq / len(s) for freq in Counter(s).values()] |
| 13 | while x >= 128: | 13 | return -sum(p * log2(p) if p > 0 else 0 for p in probabilities) |
| 14 | x >>= 7 | 14 | |
| 15 | c += 1 | 15 | def redundancy(s): |
| 16 | return c | 16 | max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 0 |
| 17 | 17 | actual_entropy = entropy(s) | |
| 18 | def build_suffix_array(s): | 18 | return max_entropy - actual_entropy |
| 19 | n = len(s) | 19 | |
| 20 | sa = list(range(n)) | 20 | # Calculate reduction in size possible based on redundancy |
| 21 | rank = [ord(c) for c in s] | 21 | reduction_potential = redundancy(data) |
| 22 | tmp = [0] * n | 22 | |
| 23 | k = 1 | 23 | # Assuming compression is achieved based on redundancy |
| 24 | while True: | 24 | max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data))) |
| 25 | sa.sort(key=lambda i: (rank[i], rank[i + k] if i + k < n else -1)) | 25 | |
| 26 | tmp[sa[0]] = 0 | 26 | # Qualitative check if max_possible_compression_ratio makes sense |
| 27 | classes = 1 | 27 | if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0: |
| 28 | for i in range(1, n): | 28 | return 999.0 |
| 29 | a = sa[i - 1] | 29 | |
| 30 | b = sa[i] | 30 | # Verify compression is lossless (hypothetical check here) |
| 31 | ra1 = rank[a] | 31 | # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data |
| 32 | rb1 = rank[b] | 32 | |
| 33 | ra2 = rank[a + k] if a + k < n else -1 | 33 | # Returning the hypothetical compression performance |
| 34 | rb2 = rank[b + k] if b + k < n else -1 | 34 | return max_possible_compression_ratio |
| 35 | if ra1 != rb1 or ra2 != rb2: | 35 | |
| 36 | classes += 1 | 36 | |
| 37 | tmp[b] = classes - 1 | 37 | |
| 38 | rank, tmp = tmp, rank | 38 | |
| 39 | if classes == n: | 39 | |
| 40 | break | 40 | |
| 41 | k <<= 1 | 41 | |
| 42 | return sa, rank | 42 | |
| 43 | 43 | ||
| 44 | def build_lcp(s, sa, rank): | 44 | |
| 45 | n = len(s) | 45 | |
| 46 | lcp = [0] * (n - 1 if n > 1 else 0) | 46 | |
| 47 | h = 0 | 47 | |
| 48 | for i in range(n): | 48 | |
| 49 | r = rank[i] | 49 | |
| 50 | if r == 0: | 50 | |
| 51 | continue | 51 | |
| 52 | j = sa[r - 1] | 52 | |
| 53 | while i + h < n and j + h < n and s[i + h] == s[j + h]: | 53 | |
| 54 | h += 1 | 54 | |
| 55 | lcp[r - 1] = h | 55 | |
| 56 | if h: | 56 | |
| 57 | h -= 1 | 57 | |
| 58 | return lcp | 58 | |
| 59 | 59 | ||
| 60 | # Novel approach: true byte-cost parsing via suffix-array-assisted LZ with dynamic programming. | 60 | |
| 61 | # We estimate compressed size in bytes using a simple token model: | 61 | |
| 62 | # literal run: 1-byte tag + varint length + raw chars | 62 | |
| 63 | # match: 1-byte tag + varint offset + varint length | 63 | |
| 64 | # | 64 | |
| 65 | # Then add specialized analytical schemes for pure runs and whole-string periodicity. | 65 | |
| 66 | 66 | ||
| 67 | best = n | 67 | |
| 68 | 68 | ||
| 69 | # Scheme A: exact RLE for consecutive equal chars | 69 | |
| 70 | runs = [] | 70 | |
| 71 | i = 0 | 71 | |
| 72 | while i < n: | 72 | |
| 73 | j = i + 1 | 73 | |
| 74 | while j < n and data[j] == data[i]: | 74 | |
| 75 | j += 1 | 75 | |
| 76 | runs.append((data[i], j - i)) | 76 | |
| 77 | i = j | 77 | |
| 78 | 78 | ||
| 79 | if len(runs) < n: | 79 | |
| 80 | rle_cost = 0 | 80 | |
| 81 | for _, cnt in runs: | 81 | |
| 82 | if cnt >= 4: | 82 | |
| 83 | rle_cost += 1 + vlen(cnt) + 1 | 83 | |
| 84 | else: | 84 | |
| 85 | rle_cost += cnt | 85 | |
| 86 | if rle_cost < best: | 86 | |
| 87 | # verify | 87 | |
| 88 | enc = runs[:] | 88 | |
| 89 | 89 | ||
| 90 | def dec_rle(enc2): | 90 | |
| 91 | out = [] | 91 | |
| 92 | for ch, cnt in enc2: | 92 | |
| 93 | out.append(ch * cnt) | 93 | |
| 94 | return "".join(out) | 94 | |
| 95 | 95 | ||
| 96 | if dec_rle(enc) == data: | 96 | |
| 97 | best = rle_cost | 97 | |
| 98 | 98 | ||
| 99 | # Scheme B: whole-string periodicity | 99 | |
| 100 | pi = [0] * n | 100 | |
| 101 | j = 0 | 101 | |
| 102 | for i in range(1, n): | 102 | |
| 103 | while j > 0 and data[i] != data[j]: | 103 | |
| 104 | j = pi[j - 1] | 104 | |
| 105 | if data[i] == data[j]: | 105 | |
| 106 | j += 1 | 106 | |
| 107 | pi[i] = j | 107 | |
| 108 | p = n - pi[-1] | 108 | |
| 109 | if p < n and n % p == 0: | 109 | |
| 110 | period_cost = p + 1 + vlen(n // p) | 110 | |
| 111 | if period_cost < best: | 111 | |
| 112 | base = data[:p] | 112 | |
| 113 | reps = n // p | 113 | |
| 114 | 114 | ||
| 115 | def dec_period(obj): | 115 | |
| 116 | b, r = obj | 116 | |
| 117 | return b * r | 117 | |
| 118 | 118 | ||
| 119 | if dec_period((base, reps)) == data: | 119 | |
| 120 | best = period_cost | 120 | |
| 121 | 121 | ||
| 122 | # Scheme C: suffix-array-guided LZ DP | 122 | |
| 123 | if n >= 2: | 123 | |
| 124 | sa, rank = build_suffix_array(data) | 124 | |
| 125 | lcp = build_lcp(data, sa, rank) | 125 | |
| 126 | 126 | ||
| 127 | # For each position, gather a few strong candidate matches from nearby suffixes. | 127 | |
| 128 | cand = [[] for _ in range(n)] | 128 | |
| 129 | 129 | ||
| 130 | def add_match(pos, prev, length): | 130 | |
| 131 | if prev < 0 or prev >= pos or length < 2: | 131 | |
| 132 | return | 132 | |
| 133 | off = pos - prev | 133 | |
| 134 | cand[pos].append((off, length)) | 134 | |
| 135 | 135 | ||
| 136 | W = 24 # inspect nearby suffixes; enough to capture strong repetitions cheaply | 136 | |
| 137 | for pos in range(n): | 137 | |
| 138 | r = rank[pos] | 138 | |
| 139 | 139 | ||
| 140 | cur = 10**18 | 140 | |
| 141 | steps = 0 | 141 | |
| 142 | k = r - 1 | 142 | |
| 143 | while k >= 0 and steps < W: | 143 | |
| 144 | cur = min(cur, lcp[k]) | 144 | |
| 145 | if cur < 2: | 145 | |
| 146 | break | 146 | |
| 147 | prev = sa[k] | 147 | |
| 148 | if prev < pos: | 148 | |
| 149 | add_match(pos, prev, cur) | 149 | |
| 150 | k -= 1 | 150 | |
| 151 | steps += 1 | 151 | |
| 152 | 152 | ||
| 153 | cur = 10**18 | 153 | |
| 154 | steps = 0 | 154 | |
| 155 | k = r | 155 | |
| 156 | m = len(lcp) | 156 | |
| 157 | while k < m and steps < W: | 157 | |
| 158 | cur = min(cur, lcp[k]) | 158 | |
| 159 | if cur < 2: | 159 | |
| 160 | break | 160 | |
| 161 | prev = sa[k + 1] | 161 | |
| 162 | if prev < pos: | 162 | |
| 163 | add_match(pos, prev, cur) | 163 | |
| 164 | k += 1 | 164 | |
| 165 | steps += 1 | 165 | |
| 166 | 166 | ||
| 167 | # Keep only best few unique offsets, and a few shortened lengths near coding thresholds. | 167 | |
| 168 | for i in range(n): | 168 | |
| 169 | if not cand[i]: | 169 | |
| 170 | continue | 170 | |
| 171 | by_off = {} | 171 | |
| 172 | for off, ln in cand[i]: | 172 | |
| 173 | if ln > by_off.get(off, 0): | 173 | |
| 174 | by_off[off] = ln | 174 | |
| 175 | arr = sorted(by_off.items(), key=lambda x: (-(x[1] - (1 + vlen(x[0]) + vlen(x[1]))), x[0])) | 175 | |
| 176 | trimmed = [] | 176 | |
| 177 | for off, ln in arr[:8]: | 177 | |
| 178 | lens = {ln} | 178 | |
| 179 | if ln > 2: | 179 | |
| 180 | lens.add(2) | 180 | |
| 181 | if ln > 3: | 181 | |
| 182 | lens.add(3) | 182 | |
| 183 | if ln > 4: | 183 | |
| 184 | lens.add(4) | 184 | |
| 185 | if ln > 8: | 185 | |
| 186 | lens.add(8) | 186 | |
| 187 | if ln > 16: | 187 | |
| 188 | lens.add(16) | 188 | |
| 189 | if ln > 32: | 189 | |
| 190 | lens.add(32) | 190 | |
| 191 | for L in sorted(lens): | 191 | |
| 192 | if L <= ln and i + L <= n: | 192 | |
| 193 | trimmed.append((off, L)) | 193 | |
| 194 | cand[i] = trimmed | 194 | |
| 195 | 195 | ||
| 196 | INF = 10**18 | 196 | |
| 197 | dp = [INF] * (n + 1) | 197 | |
| 198 | dp[n] = 0 | 198 | |
| 199 | 199 | ||
| 200 | # Bound literal runs for speed; long unique tails are still representable by chaining. | 200 | |
| 201 | max_lit = 64 | 201 | |
| 202 | 202 | ||
| 203 | for i in range(n - 1, -1, -1): | 203 | |
| 204 | # literals | 204 | |
| 205 | lim = n if n - i < max_lit else i + max_lit | 205 | |
| 206 | for j in range(i + 1, lim + 1): | 206 | |
| 207 | cost = 1 + vlen(j - i) + (j - i) + dp[j] | 207 | |
| 208 | if cost < dp[i]: | 208 | |
| 209 | dp[i] = cost | 209 | |
| 210 | 210 | ||
| 211 | # matches | 211 | |
| 212 | for off, ln in cand[i]: | 212 | |
| 213 | cost = 1 + vlen(off) + vlen(ln) + dp[i + ln] | 213 | |
| 214 | if cost < dp[i]: | 214 | |
| 215 | dp[i] = cost | 215 | |
| 216 | 216 | ||
| 217 | lz_cost = dp[0] | 217 | |
| 218 | if lz_cost < best: | 218 | |
| 219 | # reconstruct and verify | 219 | |
| 220 | out_tokens = [] | 220 | |
| 221 | i = 0 | 221 | |
| 222 | while i < n: | 222 | |
| 223 | chosen = None | 223 | |
| 224 | 224 | ||
| 225 | lim = n if n - i < max_lit else i + max_lit | 225 | |
| 226 | for j in range(i + 1, lim + 1): | 226 | |
| 227 | cost = 1 + vlen(j - i) + (j - i) + dp[j] | 227 | |
| 228 | if cost == dp[i]: | 228 | |
| 229 | chosen = ("L", data[i:j]) | 229 | |
| 230 | i = j | 230 | |
| 231 | break | 231 | |
| 232 | 232 | ||
| 233 | if chosen is None: | 233 | |
| 234 | for off, ln in cand[i]: | 234 | |
| 235 | cost = 1 + vlen(off) + vlen(ln) + dp[i + ln] | 235 | |
| 236 | if cost == dp[i]: | 236 | |
| 237 | chosen = ("M", off, ln) | 237 | |
| 238 | i += ln | 238 | |
| 239 | break | 239 | |
| 240 | 240 | ||
| 241 | if chosen is None: | 241 | |
| 242 | return 999.0 | 242 | |
| 243 | out_tokens.append(chosen) | 243 | |
| 244 | 244 | ||
| 245 | def dec_lz(tokens): | 245 | |
| 246 | out = [] | 246 | |
| 247 | for tok in tokens: | 247 | |
| 248 | if tok[0] == "L": | 248 | |
| 249 | out.append(tok[1]) | 249 | |
| 250 | else: | 250 | |
| 251 | _, off, ln = tok | 251 | |
| 252 | cur = "".join(out) | 252 | |
| 253 | start = len(cur) - off | 253 | |
| 254 | if start < 0: | 254 | |
| 255 | return None | 255 | |
| 256 | piece = [] | 256 | |
| 257 | for k in range(ln): | 257 | |
| 258 | idx = start + k | 258 | |
| 259 | if idx < len(cur): | 259 | |
| 260 | piece.append(cur[idx]) | 260 | |
| 261 | else: | 261 | |
| 262 | built = "".join(piece) | 262 | |
| 263 | ref = cur + built | 263 | |
| 264 | if idx >= len(ref): | 264 | |
| 265 | return None | 265 | |
| 266 | piece.append(ref[idx]) | 266 | |
| 267 | out.append("".join(piece)) | 267 | |
| 268 | return "".join(out) | 268 | |
| 269 | 269 | ||
| 270 | if dec_lz(out_tokens) == data: | 270 | |
| 271 | best = lz_cost | 271 | |
| 272 | 272 | ||
| 273 | ratio = best / n | 273 | |
| 274 | return float(ratio) if ratio >= 0 else 999.0 | 274 | |
| 275 | except: | 275 | |
| 276 | return 999.0 | 276 |
1def solve(input):2 try:3 data = input.get("data", "")4 if not isinstance(data, str):5 data = str(data)67 n = len(data)8 if n == 0:9 return 0.01011 def vlen(x):12 c = 113 while x >= 128:14 x >>= 715 c += 116 return c1718 def build_suffix_array(s):19 n = len(s)20 sa = list(range(n))21 rank = [ord(c) for c in s]22 tmp = [0] * n23 k = 124 while True:25 sa.sort(key=lambda i: (rank[i], rank[i + k] if i + k < n else -1))26 tmp[sa[0]] = 027 classes = 128 for i in range(1, n):29 a = sa[i - 1]30 b = sa[i]31 ra1 = rank[a]32 rb1 = rank[b]33 ra2 = rank[a + k] if a + k < n else -134 rb2 = rank[b + k] if b + k < n else -135 if ra1 != rb1 or ra2 != rb2:36 classes += 137 tmp[b] = classes - 138 rank, tmp = tmp, rank39 if classes == n:40 break41 k <<= 142 return sa, rank4344 def build_lcp(s, sa, rank):45 n = len(s)46 lcp = [0] * (n - 1 if n > 1 else 0)47 h = 048 for i in range(n):49 r = rank[i]50 if r == 0:51 continue52 j = sa[r - 1]53 while i + h < n and j + h < n and s[i + h] == s[j + h]:54 h += 155 lcp[r - 1] = h56 if h:57 h -= 158 return lcp5960 # Novel approach: true byte-cost parsing via suffix-array-assisted LZ with dynamic programming.61 # We estimate compressed size in bytes using a simple token model:62 # literal run: 1-byte tag + varint length + raw chars63 # match: 1-byte tag + varint offset + varint length64 #65 # Then add specialized analytical schemes for pure runs and whole-string periodicity.6667 best = n6869 # Scheme A: exact RLE for consecutive equal chars70 runs = []71 i = 072 while i < n:73 j = i + 174 while j < n and data[j] == data[i]:75 j += 176 runs.append((data[i], j - i))77 i = j7879 if len(runs) < n:80 rle_cost = 081 for _, cnt in runs:82 if cnt >= 4:83 rle_cost += 1 + vlen(cnt) + 184 else:85 rle_cost += cnt86 if rle_cost < best:87 # verify88 enc = runs[:]8990 def dec_rle(enc2):91 out = []92 for ch, cnt in enc2:93 out.append(ch * cnt)94 return "".join(out)9596 if dec_rle(enc) == data:97 best = rle_cost9899 # Scheme B: whole-string periodicity100 pi = [0] * n101 j = 0102 for i in range(1, n):103 while j > 0 and data[i] != data[j]:104 j = pi[j - 1]105 if data[i] == data[j]:106 j += 1107 pi[i] = j108 p = n - pi[-1]109 if p < n and n % p == 0:110 period_cost = p + 1 + vlen(n // p)111 if period_cost < best:112 base = data[:p]113 reps = n // p114115 def dec_period(obj):116 b, r = obj117 return b * r118119 if dec_period((base, reps)) == data:120 best = period_cost121122 # Scheme C: suffix-array-guided LZ DP123 if n >= 2:124 sa, rank = build_suffix_array(data)125 lcp = build_lcp(data, sa, rank)126127 # For each position, gather a few strong candidate matches from nearby suffixes.128 cand = [[] for _ in range(n)]129130 def add_match(pos, prev, length):131 if prev < 0 or prev >= pos or length < 2:132 return133 off = pos - prev134 cand[pos].append((off, length))135136 W = 24 # inspect nearby suffixes; enough to capture strong repetitions cheaply137 for pos in range(n):138 r = rank[pos]139140 cur = 10**18141 steps = 0142 k = r - 1143 while k >= 0 and steps < W:144 cur = min(cur, lcp[k])145 if cur < 2:146 break147 prev = sa[k]148 if prev < pos:149 add_match(pos, prev, cur)150 k -= 1151 steps += 1152153 cur = 10**18154 steps = 0155 k = r156 m = len(lcp)157 while k < m and steps < W:158 cur = min(cur, lcp[k])159 if cur < 2:160 break161 prev = sa[k + 1]162 if prev < pos:163 add_match(pos, prev, cur)164 k += 1165 steps += 1166167 # Keep only best few unique offsets, and a few shortened lengths near coding thresholds.168 for i in range(n):169 if not cand[i]:170 continue171 by_off = {}172 for off, ln in cand[i]:173 if ln > by_off.get(off, 0):174 by_off[off] = ln175 arr = sorted(by_off.items(), key=lambda x: (-(x[1] - (1 + vlen(x[0]) + vlen(x[1]))), x[0]))176 trimmed = []177 for off, ln in arr[:8]:178 lens = {ln}179 if ln > 2:180 lens.add(2)181 if ln > 3:182 lens.add(3)183 if ln > 4:184 lens.add(4)185 if ln > 8:186 lens.add(8)187 if ln > 16:188 lens.add(16)189 if ln > 32:190 lens.add(32)191 for L in sorted(lens):192 if L <= ln and i + L <= n:193 trimmed.append((off, L))194 cand[i] = trimmed195196 INF = 10**18197 dp = [INF] * (n + 1)198 dp[n] = 0199200 # Bound literal runs for speed; long unique tails are still representable by chaining.201 max_lit = 64202203 for i in range(n - 1, -1, -1):204 # literals205 lim = n if n - i < max_lit else i + max_lit206 for j in range(i + 1, lim + 1):207 cost = 1 + vlen(j - i) + (j - i) + dp[j]208 if cost < dp[i]:209 dp[i] = cost210211 # matches212 for off, ln in cand[i]:213 cost = 1 + vlen(off) + vlen(ln) + dp[i + ln]214 if cost < dp[i]:215 dp[i] = cost216217 lz_cost = dp[0]218 if lz_cost < best:219 # reconstruct and verify220 out_tokens = []221 i = 0222 while i < n:223 chosen = None224225 lim = n if n - i < max_lit else i + max_lit226 for j in range(i + 1, lim + 1):227 cost = 1 + vlen(j - i) + (j - i) + dp[j]228 if cost == dp[i]:229 chosen = ("L", data[i:j])230 i = j231 break232233 if chosen is None:234 for off, ln in cand[i]:235 cost = 1 + vlen(off) + vlen(ln) + dp[i + ln]236 if cost == dp[i]:237 chosen = ("M", off, ln)238 i += ln239 break240241 if chosen is None:242 return 999.0243 out_tokens.append(chosen)244245 def dec_lz(tokens):246 out = []247 for tok in tokens:248 if tok[0] == "L":249 out.append(tok[1])250 else:251 _, off, ln = tok252 cur = "".join(out)253 start = len(cur) - off254 if start < 0:255 return None256 piece = []257 for k in range(ln):258 idx = start + k259 if idx < len(cur):260 piece.append(cur[idx])261 else:262 built = "".join(piece)263 ref = cur + built264 if idx >= len(ref):265 return None266 piece.append(ref[idx])267 out.append("".join(piece))268 return "".join(out)269270 if dec_lz(out_tokens) == data:271 best = lz_cost272273 ratio = best / n274 return float(ratio) if ratio >= 0 else 999.0275 except:276 return 999.01def solve(input):2 data = input.get("data", "")3 if not isinstance(data, str) or not data:4 return 999.056 # Mathematical/analytical approach: Entropy-based redundancy calculation7 8 from collections import Counter9 from math import log21011 def entropy(s):12 probabilities = [freq / len(s) for freq in Counter(s).values()]13 return -sum(p * log2(p) if p > 0 else 0 for p in probabilities)1415 def redundancy(s):16 max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 017 actual_entropy = entropy(s)18 return max_entropy - actual_entropy1920 # Calculate reduction in size possible based on redundancy21 reduction_potential = redundancy(data)2223 # Assuming compression is achieved based on redundancy24 max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data)))25 26 # Qualitative check if max_possible_compression_ratio makes sense27 if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0:28 return 999.02930 # Verify compression is lossless (hypothetical check here)31 # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data32 33 # Returning the hypothetical compression performance34 return max_possible_compression_ratio