Solution #f4280a71-1fa9-4f44-8f24-cb866bb89c01
completedScore
58% (0/5)
Runtime
8.55ms
Delta
+1.3% vs parent
-40.3% vs best
Improved from parent
Score
58% (0/5)
Runtime
8.55ms
Delta
+1.3% vs parent
-40.3% vs best
Improved from parent
def solve(input):
try:
data = input.get("data", "") if isinstance(input, dict) else ""
if not isinstance(data, str):
data = str(data)
n = len(data)
if n == 0:
return 0.0
# Lossless verification helper for a concrete token stream.
# Token format:
# ('L', string) literal chunk
# ('R', dist, length) LZ-style backreference, overlap allowed
def decompress(tokens):
out = []
total = 0
for tok in tokens:
if tok[0] == 'L':
s = tok[1]
out.append(s)
total += len(s)
else:
_, dist, ln = tok
cur = "".join(out)
if dist <= 0 or dist > len(cur) or ln < 0:
return None
start = len(cur) - dist
piece = []
for k in range(ln):
idx = start + k
if idx < len(cur):
piece.append(cur[idx])
else:
prev = "".join(piece)
j = idx - len(cur)
if j < 0 or j >= len(prev):
return None
piece.append(prev[j])
piece = "".join(piece)
out.append(piece)
total += ln
return "".join(out)
best_ratio = 1.0
# Candidate 1: exact periodic string
# Cost model: store period once + repeat count
def periodic_candidate(s):
m = len(s)
if m <= 1:
return None
pi = [0] * m
for i in range(1, m):
j = pi[i - 1]
while j and s[i] != s[j]:
j = pi[j - 1]
if s[i] == s[j]:
j += 1
pi[i] = j
p = m - pi[-1]
if p < m and m % p == 0:
tokens = [('L', s[:p]), ('RPT', m // p)]
if s[:p] * (m // p) != s:
return None
cost = p + 1
return cost / float(m)
return None
r = periodic_candidate(data)
if r is not None and r < best_ratio:
best_ratio = r
# Candidate 2: pure run-length over entire string if beneficial
def rle_candidate(s):
m = len(s)
runs = []
i = 0
while i < m:
j = i + 1
while j < m and s[j] == s[i]:
j += 1
runs.append((s[i], j - i))
i = j
dec = "".join(ch * cnt for ch, cnt in runs)
if dec != s:
return None
cost = 2 * len(runs)
return cost / float(m)
r = rle_candidate(data)
if r is not None and r < best_ratio:
best_ratio = r
# Candidate 3: greedy LZ77 with overlap and hashed 3-gram index.
# Novel direction: use pre-indexed buckets (pre-sort/pre-index hint) for O(1)-ish lookup,
# then dynamic token-cost model with literal block packing.
def lz_greedy_ratio(s):
m = len(s)
if m <= 3:
tokens = [('L', s)]
dec = decompress(tokens)
if dec != s:
return None
return 1.0
# Pre-index positions by 3-gram
buckets = {}
for i in range(m - 2):
k = s[i:i + 3]
if k in buckets:
buckets[k].append(i)
else:
buckets[k] = [i]
tokens = []
lit_buf = []
i = 0
def flush_lit():
nonlocal lit_buf
if lit_buf:
tokens.append(('L', "".join(lit_buf)))
lit_buf = []
while i < m:
best_len = 0
best_dist = 0
if i + 2 < m:
k = s[i:i + 3]
cand = buckets.get(k, [])
# Check only recent candidates to stay fast
checked = 0
idx = len(cand) - 1
while idx >= 0 and checked < 24:
p = cand[idx]
idx -= 1
if p >= i:
continue
checked += 1
dist = i - p
# extend match with overlap allowed
l = 0
while i + l < m:
src = p + l
if src < i:
c = s[src]
else:
c = s[i + (src - i)]
if c != s[i + l]:
break
l += 1
if l > best_len:
best_len = l
best_dist = dist
if l >= 64:
break
# Token cost model:
# literal chars counted raw, backref counted as 2 units
if best_len >= 4 and best_len > 2:
flush_lit()
tokens.append(('R', best_dist, best_len))
i += best_len
else:
lit_buf.append(s[i])
i += 1
flush_lit()
dec = decompress(tokens)
if dec != s:
return None
cost = 0
for tok in tokens:
if tok[0] == 'L':
cost += len(tok[1])
else:
cost += 2
return cost / float(m)
r = lz_greedy_ratio(data)
if r is not None and r < best_ratio:
best_ratio = r
# Candidate 4: blockwise dominant-substring substitution.
# Find a repeated chunk length k with many non-overlapping uses, then pack others as literals.
def block_macro_ratio(s):
m = len(s)
best = None
maxk = min(24, m // 2)
for k in range(2, maxk + 1):
freq = {}
for i in range(m - k + 1):
sub = s[i:i + k]
freq[sub] = freq.get(sub, 0) + 1
# try only the most frequent few
items = sorted(freq.items(), key=lambda x: x[1], reverse=True)[:6]
for sub, cnt in items:
if cnt < 2:
continue
positions = []
i = 0
while i <= m - k:
if s[i:i + k] == sub:
positions.append(i)
i += k
else:
i += 1
if len(positions) < 2:
continue
toks = []
last = 0
for p in positions:
if p > last:
toks.append(('L', s[last:p]))
toks.append(('M',))
last = p + k
if last < m:
toks.append(('L', s[last:]))
dec_parts = []
for t in toks:
if t[0] == 'L':
dec_parts.append(t[1])
else:
dec_parts.append(sub)
if "".join(dec_parts) != s:
continue
cost = k # store macro
for t in toks:
if t[0] == 'L':
cost += len(t[1])
else:
cost += 1
ratio = cost / float(m)
if best is None or ratio < best:
best = ratio
return best
r = block_macro_ratio(data)
if r is not None and r < best_ratio:
best_ratio = r
# Candidate 5: compress sorted Burrows-like clusters indirectly via char-position lists.
# Works well when alphabet is tiny and chars repeat in long patterns.
def char_index_ratio(s):
m = len(s)
pos = {}
for i, ch in enumerate(s):
if ch in pos:
pos[ch].append(i)
else:
pos[ch] = [i]
# Decompress by filling positions
out = [""] * m
for ch, arr in pos.items():
for p in arr:
out[p] = ch
if "".join(out) != s:
return None
# Cost model: unique chars + counts for each position list delta-coded
# approximate with unique chars + number of runs in each position list
cost = 0
for ch, arr in pos.items():
cost += 1
if arr:
runs = 1
for j in range(1, len(arr)):
if arr[j] != arr[j - 1] + 1:
runs += 1
cost += runs
return cost / float(m)
r = char_index_ratio(data)
if r is not None and r < best_ratio:
best_ratio = r
if best_ratio < 0.0:
best_ratio = 0.0
return float(best_ratio)
except:
return 999.0Score Difference
-39.0%
Runtime Advantage
8.42ms slower
Code Size
286 vs 34 lines
| # | Your Solution | # | Champion |
|---|---|---|---|
| 1 | def solve(input): | 1 | def solve(input): |
| 2 | try: | 2 | data = input.get("data", "") |
| 3 | data = input.get("data", "") if isinstance(input, dict) else "" | 3 | if not isinstance(data, str) or not data: |
| 4 | if not isinstance(data, str): | 4 | return 999.0 |
| 5 | data = str(data) | 5 | |
| 6 | 6 | # Mathematical/analytical approach: Entropy-based redundancy calculation | |
| 7 | n = len(data) | 7 | |
| 8 | if n == 0: | 8 | from collections import Counter |
| 9 | return 0.0 | 9 | from math import log2 |
| 10 | 10 | ||
| 11 | # Lossless verification helper for a concrete token stream. | 11 | def entropy(s): |
| 12 | # Token format: | 12 | probabilities = [freq / len(s) for freq in Counter(s).values()] |
| 13 | # ('L', string) literal chunk | 13 | return -sum(p * log2(p) if p > 0 else 0 for p in probabilities) |
| 14 | # ('R', dist, length) LZ-style backreference, overlap allowed | 14 | |
| 15 | def decompress(tokens): | 15 | def redundancy(s): |
| 16 | out = [] | 16 | max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 0 |
| 17 | total = 0 | 17 | actual_entropy = entropy(s) |
| 18 | for tok in tokens: | 18 | return max_entropy - actual_entropy |
| 19 | if tok[0] == 'L': | 19 | |
| 20 | s = tok[1] | 20 | # Calculate reduction in size possible based on redundancy |
| 21 | out.append(s) | 21 | reduction_potential = redundancy(data) |
| 22 | total += len(s) | 22 | |
| 23 | else: | 23 | # Assuming compression is achieved based on redundancy |
| 24 | _, dist, ln = tok | 24 | max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data))) |
| 25 | cur = "".join(out) | 25 | |
| 26 | if dist <= 0 or dist > len(cur) or ln < 0: | 26 | # Qualitative check if max_possible_compression_ratio makes sense |
| 27 | return None | 27 | if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0: |
| 28 | start = len(cur) - dist | 28 | return 999.0 |
| 29 | piece = [] | 29 | |
| 30 | for k in range(ln): | 30 | # Verify compression is lossless (hypothetical check here) |
| 31 | idx = start + k | 31 | # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data |
| 32 | if idx < len(cur): | 32 | |
| 33 | piece.append(cur[idx]) | 33 | # Returning the hypothetical compression performance |
| 34 | else: | 34 | return max_possible_compression_ratio |
| 35 | prev = "".join(piece) | 35 | |
| 36 | j = idx - len(cur) | 36 | |
| 37 | if j < 0 or j >= len(prev): | 37 | |
| 38 | return None | 38 | |
| 39 | piece.append(prev[j]) | 39 | |
| 40 | piece = "".join(piece) | 40 | |
| 41 | out.append(piece) | 41 | |
| 42 | total += ln | 42 | |
| 43 | return "".join(out) | 43 | |
| 44 | 44 | ||
| 45 | best_ratio = 1.0 | 45 | |
| 46 | 46 | ||
| 47 | # Candidate 1: exact periodic string | 47 | |
| 48 | # Cost model: store period once + repeat count | 48 | |
| 49 | def periodic_candidate(s): | 49 | |
| 50 | m = len(s) | 50 | |
| 51 | if m <= 1: | 51 | |
| 52 | return None | 52 | |
| 53 | pi = [0] * m | 53 | |
| 54 | for i in range(1, m): | 54 | |
| 55 | j = pi[i - 1] | 55 | |
| 56 | while j and s[i] != s[j]: | 56 | |
| 57 | j = pi[j - 1] | 57 | |
| 58 | if s[i] == s[j]: | 58 | |
| 59 | j += 1 | 59 | |
| 60 | pi[i] = j | 60 | |
| 61 | p = m - pi[-1] | 61 | |
| 62 | if p < m and m % p == 0: | 62 | |
| 63 | tokens = [('L', s[:p]), ('RPT', m // p)] | 63 | |
| 64 | if s[:p] * (m // p) != s: | 64 | |
| 65 | return None | 65 | |
| 66 | cost = p + 1 | 66 | |
| 67 | return cost / float(m) | 67 | |
| 68 | return None | 68 | |
| 69 | 69 | ||
| 70 | r = periodic_candidate(data) | 70 | |
| 71 | if r is not None and r < best_ratio: | 71 | |
| 72 | best_ratio = r | 72 | |
| 73 | 73 | ||
| 74 | # Candidate 2: pure run-length over entire string if beneficial | 74 | |
| 75 | def rle_candidate(s): | 75 | |
| 76 | m = len(s) | 76 | |
| 77 | runs = [] | 77 | |
| 78 | i = 0 | 78 | |
| 79 | while i < m: | 79 | |
| 80 | j = i + 1 | 80 | |
| 81 | while j < m and s[j] == s[i]: | 81 | |
| 82 | j += 1 | 82 | |
| 83 | runs.append((s[i], j - i)) | 83 | |
| 84 | i = j | 84 | |
| 85 | dec = "".join(ch * cnt for ch, cnt in runs) | 85 | |
| 86 | if dec != s: | 86 | |
| 87 | return None | 87 | |
| 88 | cost = 2 * len(runs) | 88 | |
| 89 | return cost / float(m) | 89 | |
| 90 | 90 | ||
| 91 | r = rle_candidate(data) | 91 | |
| 92 | if r is not None and r < best_ratio: | 92 | |
| 93 | best_ratio = r | 93 | |
| 94 | 94 | ||
| 95 | # Candidate 3: greedy LZ77 with overlap and hashed 3-gram index. | 95 | |
| 96 | # Novel direction: use pre-indexed buckets (pre-sort/pre-index hint) for O(1)-ish lookup, | 96 | |
| 97 | # then dynamic token-cost model with literal block packing. | 97 | |
| 98 | def lz_greedy_ratio(s): | 98 | |
| 99 | m = len(s) | 99 | |
| 100 | if m <= 3: | 100 | |
| 101 | tokens = [('L', s)] | 101 | |
| 102 | dec = decompress(tokens) | 102 | |
| 103 | if dec != s: | 103 | |
| 104 | return None | 104 | |
| 105 | return 1.0 | 105 | |
| 106 | 106 | ||
| 107 | # Pre-index positions by 3-gram | 107 | |
| 108 | buckets = {} | 108 | |
| 109 | for i in range(m - 2): | 109 | |
| 110 | k = s[i:i + 3] | 110 | |
| 111 | if k in buckets: | 111 | |
| 112 | buckets[k].append(i) | 112 | |
| 113 | else: | 113 | |
| 114 | buckets[k] = [i] | 114 | |
| 115 | 115 | ||
| 116 | tokens = [] | 116 | |
| 117 | lit_buf = [] | 117 | |
| 118 | i = 0 | 118 | |
| 119 | 119 | ||
| 120 | def flush_lit(): | 120 | |
| 121 | nonlocal lit_buf | 121 | |
| 122 | if lit_buf: | 122 | |
| 123 | tokens.append(('L', "".join(lit_buf))) | 123 | |
| 124 | lit_buf = [] | 124 | |
| 125 | 125 | ||
| 126 | while i < m: | 126 | |
| 127 | best_len = 0 | 127 | |
| 128 | best_dist = 0 | 128 | |
| 129 | 129 | ||
| 130 | if i + 2 < m: | 130 | |
| 131 | k = s[i:i + 3] | 131 | |
| 132 | cand = buckets.get(k, []) | 132 | |
| 133 | # Check only recent candidates to stay fast | 133 | |
| 134 | checked = 0 | 134 | |
| 135 | idx = len(cand) - 1 | 135 | |
| 136 | while idx >= 0 and checked < 24: | 136 | |
| 137 | p = cand[idx] | 137 | |
| 138 | idx -= 1 | 138 | |
| 139 | if p >= i: | 139 | |
| 140 | continue | 140 | |
| 141 | checked += 1 | 141 | |
| 142 | dist = i - p | 142 | |
| 143 | # extend match with overlap allowed | 143 | |
| 144 | l = 0 | 144 | |
| 145 | while i + l < m: | 145 | |
| 146 | src = p + l | 146 | |
| 147 | if src < i: | 147 | |
| 148 | c = s[src] | 148 | |
| 149 | else: | 149 | |
| 150 | c = s[i + (src - i)] | 150 | |
| 151 | if c != s[i + l]: | 151 | |
| 152 | break | 152 | |
| 153 | l += 1 | 153 | |
| 154 | if l > best_len: | 154 | |
| 155 | best_len = l | 155 | |
| 156 | best_dist = dist | 156 | |
| 157 | if l >= 64: | 157 | |
| 158 | break | 158 | |
| 159 | 159 | ||
| 160 | # Token cost model: | 160 | |
| 161 | # literal chars counted raw, backref counted as 2 units | 161 | |
| 162 | if best_len >= 4 and best_len > 2: | 162 | |
| 163 | flush_lit() | 163 | |
| 164 | tokens.append(('R', best_dist, best_len)) | 164 | |
| 165 | i += best_len | 165 | |
| 166 | else: | 166 | |
| 167 | lit_buf.append(s[i]) | 167 | |
| 168 | i += 1 | 168 | |
| 169 | 169 | ||
| 170 | flush_lit() | 170 | |
| 171 | 171 | ||
| 172 | dec = decompress(tokens) | 172 | |
| 173 | if dec != s: | 173 | |
| 174 | return None | 174 | |
| 175 | 175 | ||
| 176 | cost = 0 | 176 | |
| 177 | for tok in tokens: | 177 | |
| 178 | if tok[0] == 'L': | 178 | |
| 179 | cost += len(tok[1]) | 179 | |
| 180 | else: | 180 | |
| 181 | cost += 2 | 181 | |
| 182 | return cost / float(m) | 182 | |
| 183 | 183 | ||
| 184 | r = lz_greedy_ratio(data) | 184 | |
| 185 | if r is not None and r < best_ratio: | 185 | |
| 186 | best_ratio = r | 186 | |
| 187 | 187 | ||
| 188 | # Candidate 4: blockwise dominant-substring substitution. | 188 | |
| 189 | # Find a repeated chunk length k with many non-overlapping uses, then pack others as literals. | 189 | |
| 190 | def block_macro_ratio(s): | 190 | |
| 191 | m = len(s) | 191 | |
| 192 | best = None | 192 | |
| 193 | maxk = min(24, m // 2) | 193 | |
| 194 | for k in range(2, maxk + 1): | 194 | |
| 195 | freq = {} | 195 | |
| 196 | for i in range(m - k + 1): | 196 | |
| 197 | sub = s[i:i + k] | 197 | |
| 198 | freq[sub] = freq.get(sub, 0) + 1 | 198 | |
| 199 | # try only the most frequent few | 199 | |
| 200 | items = sorted(freq.items(), key=lambda x: x[1], reverse=True)[:6] | 200 | |
| 201 | for sub, cnt in items: | 201 | |
| 202 | if cnt < 2: | 202 | |
| 203 | continue | 203 | |
| 204 | positions = [] | 204 | |
| 205 | i = 0 | 205 | |
| 206 | while i <= m - k: | 206 | |
| 207 | if s[i:i + k] == sub: | 207 | |
| 208 | positions.append(i) | 208 | |
| 209 | i += k | 209 | |
| 210 | else: | 210 | |
| 211 | i += 1 | 211 | |
| 212 | if len(positions) < 2: | 212 | |
| 213 | continue | 213 | |
| 214 | toks = [] | 214 | |
| 215 | last = 0 | 215 | |
| 216 | for p in positions: | 216 | |
| 217 | if p > last: | 217 | |
| 218 | toks.append(('L', s[last:p])) | 218 | |
| 219 | toks.append(('M',)) | 219 | |
| 220 | last = p + k | 220 | |
| 221 | if last < m: | 221 | |
| 222 | toks.append(('L', s[last:])) | 222 | |
| 223 | 223 | ||
| 224 | dec_parts = [] | 224 | |
| 225 | for t in toks: | 225 | |
| 226 | if t[0] == 'L': | 226 | |
| 227 | dec_parts.append(t[1]) | 227 | |
| 228 | else: | 228 | |
| 229 | dec_parts.append(sub) | 229 | |
| 230 | if "".join(dec_parts) != s: | 230 | |
| 231 | continue | 231 | |
| 232 | 232 | ||
| 233 | cost = k # store macro | 233 | |
| 234 | for t in toks: | 234 | |
| 235 | if t[0] == 'L': | 235 | |
| 236 | cost += len(t[1]) | 236 | |
| 237 | else: | 237 | |
| 238 | cost += 1 | 238 | |
| 239 | ratio = cost / float(m) | 239 | |
| 240 | if best is None or ratio < best: | 240 | |
| 241 | best = ratio | 241 | |
| 242 | return best | 242 | |
| 243 | 243 | ||
| 244 | r = block_macro_ratio(data) | 244 | |
| 245 | if r is not None and r < best_ratio: | 245 | |
| 246 | best_ratio = r | 246 | |
| 247 | 247 | ||
| 248 | # Candidate 5: compress sorted Burrows-like clusters indirectly via char-position lists. | 248 | |
| 249 | # Works well when alphabet is tiny and chars repeat in long patterns. | 249 | |
| 250 | def char_index_ratio(s): | 250 | |
| 251 | m = len(s) | 251 | |
| 252 | pos = {} | 252 | |
| 253 | for i, ch in enumerate(s): | 253 | |
| 254 | if ch in pos: | 254 | |
| 255 | pos[ch].append(i) | 255 | |
| 256 | else: | 256 | |
| 257 | pos[ch] = [i] | 257 | |
| 258 | # Decompress by filling positions | 258 | |
| 259 | out = [""] * m | 259 | |
| 260 | for ch, arr in pos.items(): | 260 | |
| 261 | for p in arr: | 261 | |
| 262 | out[p] = ch | 262 | |
| 263 | if "".join(out) != s: | 263 | |
| 264 | return None | 264 | |
| 265 | # Cost model: unique chars + counts for each position list delta-coded | 265 | |
| 266 | # approximate with unique chars + number of runs in each position list | 266 | |
| 267 | cost = 0 | 267 | |
| 268 | for ch, arr in pos.items(): | 268 | |
| 269 | cost += 1 | 269 | |
| 270 | if arr: | 270 | |
| 271 | runs = 1 | 271 | |
| 272 | for j in range(1, len(arr)): | 272 | |
| 273 | if arr[j] != arr[j - 1] + 1: | 273 | |
| 274 | runs += 1 | 274 | |
| 275 | cost += runs | 275 | |
| 276 | return cost / float(m) | 276 | |
| 277 | 277 | ||
| 278 | r = char_index_ratio(data) | 278 | |
| 279 | if r is not None and r < best_ratio: | 279 | |
| 280 | best_ratio = r | 280 | |
| 281 | 281 | ||
| 282 | if best_ratio < 0.0: | 282 | |
| 283 | best_ratio = 0.0 | 283 | |
| 284 | return float(best_ratio) | 284 | |
| 285 | except: | 285 | |
| 286 | return 999.0 | 286 |
1def solve(input):2 try:3 data = input.get("data", "") if isinstance(input, dict) else ""4 if not isinstance(data, str):5 data = str(data)67 n = len(data)8 if n == 0:9 return 0.01011 # Lossless verification helper for a concrete token stream.12 # Token format:13 # ('L', string) literal chunk14 # ('R', dist, length) LZ-style backreference, overlap allowed15 def decompress(tokens):16 out = []17 total = 018 for tok in tokens:19 if tok[0] == 'L':20 s = tok[1]21 out.append(s)22 total += len(s)23 else:24 _, dist, ln = tok25 cur = "".join(out)26 if dist <= 0 or dist > len(cur) or ln < 0:27 return None28 start = len(cur) - dist29 piece = []30 for k in range(ln):31 idx = start + k32 if idx < len(cur):33 piece.append(cur[idx])34 else:35 prev = "".join(piece)36 j = idx - len(cur)37 if j < 0 or j >= len(prev):38 return None39 piece.append(prev[j])40 piece = "".join(piece)41 out.append(piece)42 total += ln43 return "".join(out)4445 best_ratio = 1.04647 # Candidate 1: exact periodic string48 # Cost model: store period once + repeat count49 def periodic_candidate(s):50 m = len(s)51 if m <= 1:52 return None53 pi = [0] * m54 for i in range(1, m):55 j = pi[i - 1]56 while j and s[i] != s[j]:57 j = pi[j - 1]58 if s[i] == s[j]:59 j += 160 pi[i] = j61 p = m - pi[-1]62 if p < m and m % p == 0:63 tokens = [('L', s[:p]), ('RPT', m // p)]64 if s[:p] * (m // p) != s:65 return None66 cost = p + 167 return cost / float(m)68 return None6970 r = periodic_candidate(data)71 if r is not None and r < best_ratio:72 best_ratio = r7374 # Candidate 2: pure run-length over entire string if beneficial75 def rle_candidate(s):76 m = len(s)77 runs = []78 i = 079 while i < m:80 j = i + 181 while j < m and s[j] == s[i]:82 j += 183 runs.append((s[i], j - i))84 i = j85 dec = "".join(ch * cnt for ch, cnt in runs)86 if dec != s:87 return None88 cost = 2 * len(runs)89 return cost / float(m)9091 r = rle_candidate(data)92 if r is not None and r < best_ratio:93 best_ratio = r9495 # Candidate 3: greedy LZ77 with overlap and hashed 3-gram index.96 # Novel direction: use pre-indexed buckets (pre-sort/pre-index hint) for O(1)-ish lookup,97 # then dynamic token-cost model with literal block packing.98 def lz_greedy_ratio(s):99 m = len(s)100 if m <= 3:101 tokens = [('L', s)]102 dec = decompress(tokens)103 if dec != s:104 return None105 return 1.0106107 # Pre-index positions by 3-gram108 buckets = {}109 for i in range(m - 2):110 k = s[i:i + 3]111 if k in buckets:112 buckets[k].append(i)113 else:114 buckets[k] = [i]115116 tokens = []117 lit_buf = []118 i = 0119120 def flush_lit():121 nonlocal lit_buf122 if lit_buf:123 tokens.append(('L', "".join(lit_buf)))124 lit_buf = []125126 while i < m:127 best_len = 0128 best_dist = 0129130 if i + 2 < m:131 k = s[i:i + 3]132 cand = buckets.get(k, [])133 # Check only recent candidates to stay fast134 checked = 0135 idx = len(cand) - 1136 while idx >= 0 and checked < 24:137 p = cand[idx]138 idx -= 1139 if p >= i:140 continue141 checked += 1142 dist = i - p143 # extend match with overlap allowed144 l = 0145 while i + l < m:146 src = p + l147 if src < i:148 c = s[src]149 else:150 c = s[i + (src - i)]151 if c != s[i + l]:152 break153 l += 1154 if l > best_len:155 best_len = l156 best_dist = dist157 if l >= 64:158 break159160 # Token cost model:161 # literal chars counted raw, backref counted as 2 units162 if best_len >= 4 and best_len > 2:163 flush_lit()164 tokens.append(('R', best_dist, best_len))165 i += best_len166 else:167 lit_buf.append(s[i])168 i += 1169170 flush_lit()171172 dec = decompress(tokens)173 if dec != s:174 return None175176 cost = 0177 for tok in tokens:178 if tok[0] == 'L':179 cost += len(tok[1])180 else:181 cost += 2182 return cost / float(m)183184 r = lz_greedy_ratio(data)185 if r is not None and r < best_ratio:186 best_ratio = r187188 # Candidate 4: blockwise dominant-substring substitution.189 # Find a repeated chunk length k with many non-overlapping uses, then pack others as literals.190 def block_macro_ratio(s):191 m = len(s)192 best = None193 maxk = min(24, m // 2)194 for k in range(2, maxk + 1):195 freq = {}196 for i in range(m - k + 1):197 sub = s[i:i + k]198 freq[sub] = freq.get(sub, 0) + 1199 # try only the most frequent few200 items = sorted(freq.items(), key=lambda x: x[1], reverse=True)[:6]201 for sub, cnt in items:202 if cnt < 2:203 continue204 positions = []205 i = 0206 while i <= m - k:207 if s[i:i + k] == sub:208 positions.append(i)209 i += k210 else:211 i += 1212 if len(positions) < 2:213 continue214 toks = []215 last = 0216 for p in positions:217 if p > last:218 toks.append(('L', s[last:p]))219 toks.append(('M',))220 last = p + k221 if last < m:222 toks.append(('L', s[last:]))223224 dec_parts = []225 for t in toks:226 if t[0] == 'L':227 dec_parts.append(t[1])228 else:229 dec_parts.append(sub)230 if "".join(dec_parts) != s:231 continue232233 cost = k # store macro234 for t in toks:235 if t[0] == 'L':236 cost += len(t[1])237 else:238 cost += 1239 ratio = cost / float(m)240 if best is None or ratio < best:241 best = ratio242 return best243244 r = block_macro_ratio(data)245 if r is not None and r < best_ratio:246 best_ratio = r247248 # Candidate 5: compress sorted Burrows-like clusters indirectly via char-position lists.249 # Works well when alphabet is tiny and chars repeat in long patterns.250 def char_index_ratio(s):251 m = len(s)252 pos = {}253 for i, ch in enumerate(s):254 if ch in pos:255 pos[ch].append(i)256 else:257 pos[ch] = [i]258 # Decompress by filling positions259 out = [""] * m260 for ch, arr in pos.items():261 for p in arr:262 out[p] = ch263 if "".join(out) != s:264 return None265 # Cost model: unique chars + counts for each position list delta-coded266 # approximate with unique chars + number of runs in each position list267 cost = 0268 for ch, arr in pos.items():269 cost += 1270 if arr:271 runs = 1272 for j in range(1, len(arr)):273 if arr[j] != arr[j - 1] + 1:274 runs += 1275 cost += runs276 return cost / float(m)277278 r = char_index_ratio(data)279 if r is not None and r < best_ratio:280 best_ratio = r281282 if best_ratio < 0.0:283 best_ratio = 0.0284 return float(best_ratio)285 except:286 return 999.01def solve(input):2 data = input.get("data", "")3 if not isinstance(data, str) or not data:4 return 999.056 # Mathematical/analytical approach: Entropy-based redundancy calculation7 8 from collections import Counter9 from math import log21011 def entropy(s):12 probabilities = [freq / len(s) for freq in Counter(s).values()]13 return -sum(p * log2(p) if p > 0 else 0 for p in probabilities)1415 def redundancy(s):16 max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 017 actual_entropy = entropy(s)18 return max_entropy - actual_entropy1920 # Calculate reduction in size possible based on redundancy21 reduction_potential = redundancy(data)2223 # Assuming compression is achieved based on redundancy24 max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data)))25 26 # Qualitative check if max_possible_compression_ratio makes sense27 if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0:28 return 999.02930 # Verify compression is lossless (hypothetical check here)31 # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data32 33 # Returning the hypothetical compression performance34 return max_possible_compression_ratio