Solution #2c6b7429-a60d-4614-8b41-e193aff0d4e7
completedScore
34% (0/5)
Runtime
21.68ms
Delta
-36.9% vs parent
-64.5% vs best
Regression from parent
Score
34% (0/5)
Runtime
21.68ms
Delta
-36.9% vs parent
-64.5% vs best
Regression from parent
def solve(input):
try:
data = input.get("data", "")
if not isinstance(data, str):
data = str(data)
n = len(data)
if n == 0:
return 0.0
def vlen(x):
c = 1
while x >= 128:
x >>= 7
c += 1
return c
# Token model:
# 0: literal block cost = 1 + vlen(len) + len
# 1: backref match cost = 1 + vlen(off) + vlen(len)
# 2: repeated pattern cost = 1 + vlen(pat_len) + vlen(reps) + pat_len
#
# New approach: bottom-up DP over substrings using exact small-period detection
# and bounded-window LZ-style matching from rolling buckets.
max_lit = 64
max_window = 2048
max_match = 512
bucket_span = 3
best_next = [10**18] * (n + 1)
choice = [None] * n
best_next[n] = 0
# Rolling buckets keyed by short substrings to find prior matches
pos_lists = {}
# We'll process right-to-left for DP, but maintain left-side match candidates
# by building them separately.
cands = [[] for _ in range(n)]
if n >= 2:
for i in range(n):
if i + bucket_span <= n:
key = data[i:i + bucket_span]
lst = pos_lists.get(key)
if lst is None:
lst = []
pos_lists[key] = lst
# Compare against recent prior positions
for p in lst[::-1]:
off = i - p
if off > max_window:
break
l = 0
limit = n - i
if limit > max_match:
limit = max_match
while l < limit and data[p + l] == data[i + l]:
l += 1
if l >= 2:
cands[i].append((off, l))
if len(cands[i]) >= 12:
break
lst.append(i)
# prune old positions
while lst and i - lst[0] > max_window:
lst.pop(0)
# Add run-based candidates and exact local periodic candidates
for i in range(n):
# repeated-char run
j = i + 1
while j < n and data[j] == data[i] and j - i < max_match:
j += 1
run_len = j - i
if run_len >= 2:
cands[i].append((1, run_len))
def add_choice(i, cost, ch):
if cost < best_next[i]:
best_next[i] = cost
choice[i] = ch
for i in range(n - 1, -1, -1):
# Literal blocks
lim = n - i
if lim > max_lit:
lim = max_lit
for L in range(1, lim + 1):
cost = 1 + vlen(L) + L + best_next[i + L]
if cost < best_next[i]:
best_next[i] = cost
choice[i] = ("L", L)
# Backreferences
if cands[i]:
seen = {}
for off, ln in cands[i]:
if off <= 0 or ln < 2:
continue
prev = seen.get(off, 0)
if ln > prev:
seen[off] = ln
for off, ln in seen.items():
lens = [ln]
if ln > 2:
lens.append(2)
if ln > 3:
lens.append(3)
if ln > 4:
lens.append(4)
if ln > 8:
lens.append(8)
if ln > 16:
lens.append(16)
if ln > 32:
lens.append(32)
if ln > 64:
lens.append(64)
if ln > 128:
lens.append(128)
used = set()
for L in lens:
if L in used or i + L > n:
continue
used.add(L)
cost = 1 + vlen(off) + vlen(L) + best_next[i + L]
if cost < best_next[i]:
best_next[i] = cost
choice[i] = ("M", off, L)
# Repeated-pattern token for local exact periods
max_pat = 16
rem = n - i
if rem >= 4:
upper = max_pat if max_pat < rem // 2 else rem // 2
for p in range(1, upper + 1):
reps = 1
while i + (reps + 1) * p <= n and data[i:i + p] == data[i + reps * p:i + (reps + 1) * p]:
reps += 1
if reps >= 2:
total = reps * p
cost = 1 + vlen(p) + vlen(reps) + p + best_next[i + total]
if cost < best_next[i]:
best_next[i] = cost
choice[i] = ("P", p, reps)
compressed = best_next[0]
# Reconstruct and verify
tokens = []
i = 0
while i < n:
ch = choice[i]
if ch is None:
return 999.0
tokens.append(ch)
if ch[0] == "L":
i += ch[1]
elif ch[0] == "M":
i += ch[2]
else:
i += ch[1] * ch[2]
out = []
for tok in tokens:
t = tok[0]
if t == "L":
L = tok[1]
start = sum(
x[1] if x[0] == "L" else (x[2] if x[0] == "M" else x[1] * x[2])
for x in tokens[:len(out)]
)
# Avoid O(n^2) from the above by not using it; fallback below
out = None
break
# Efficient decompression
built = []
src_i = 0
for tok in tokens:
t = tok[0]
if t == "L":
L = tok[1]
built.append(data[src_i:src_i + L])
src_i += L
elif t == "M":
off, L = tok[1], tok[2]
cur = "".join(built)
start = len(cur) - off
if start < 0:
return 999.0
piece = []
for k in range(L):
idx = start + k
if idx < len(cur):
piece.append(cur[idx])
else:
ref = cur + "".join(piece)
if idx >= len(ref):
return 999.0
piece.append(ref[idx])
built.append("".join(piece))
src_i += L
else:
p, reps = tok[1], tok[2]
pat = data[src_i:src_i + p]
built.append(pat * reps)
src_i += p * reps
dec = "".join(built)
if dec != data:
return 999.0
ratio = compressed / n
return float(ratio)
except:
return 999.0Score Difference
-62.3%
Runtime Advantage
21.55ms slower
Code Size
221 vs 34 lines
| # | Your Solution | # | Champion |
|---|---|---|---|
| 1 | def solve(input): | 1 | def solve(input): |
| 2 | try: | 2 | data = input.get("data", "") |
| 3 | data = input.get("data", "") | 3 | if not isinstance(data, str) or not data: |
| 4 | if not isinstance(data, str): | 4 | return 999.0 |
| 5 | data = str(data) | 5 | |
| 6 | 6 | # Mathematical/analytical approach: Entropy-based redundancy calculation | |
| 7 | n = len(data) | 7 | |
| 8 | if n == 0: | 8 | from collections import Counter |
| 9 | return 0.0 | 9 | from math import log2 |
| 10 | 10 | ||
| 11 | def vlen(x): | 11 | def entropy(s): |
| 12 | c = 1 | 12 | probabilities = [freq / len(s) for freq in Counter(s).values()] |
| 13 | while x >= 128: | 13 | return -sum(p * log2(p) if p > 0 else 0 for p in probabilities) |
| 14 | x >>= 7 | 14 | |
| 15 | c += 1 | 15 | def redundancy(s): |
| 16 | return c | 16 | max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 0 |
| 17 | 17 | actual_entropy = entropy(s) | |
| 18 | # Token model: | 18 | return max_entropy - actual_entropy |
| 19 | # 0: literal block cost = 1 + vlen(len) + len | 19 | |
| 20 | # 1: backref match cost = 1 + vlen(off) + vlen(len) | 20 | # Calculate reduction in size possible based on redundancy |
| 21 | # 2: repeated pattern cost = 1 + vlen(pat_len) + vlen(reps) + pat_len | 21 | reduction_potential = redundancy(data) |
| 22 | # | 22 | |
| 23 | # New approach: bottom-up DP over substrings using exact small-period detection | 23 | # Assuming compression is achieved based on redundancy |
| 24 | # and bounded-window LZ-style matching from rolling buckets. | 24 | max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data))) |
| 25 | 25 | ||
| 26 | max_lit = 64 | 26 | # Qualitative check if max_possible_compression_ratio makes sense |
| 27 | max_window = 2048 | 27 | if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0: |
| 28 | max_match = 512 | 28 | return 999.0 |
| 29 | bucket_span = 3 | 29 | |
| 30 | 30 | # Verify compression is lossless (hypothetical check here) | |
| 31 | best_next = [10**18] * (n + 1) | 31 | # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data |
| 32 | choice = [None] * n | 32 | |
| 33 | best_next[n] = 0 | 33 | # Returning the hypothetical compression performance |
| 34 | 34 | return max_possible_compression_ratio | |
| 35 | # Rolling buckets keyed by short substrings to find prior matches | 35 | |
| 36 | pos_lists = {} | 36 | |
| 37 | 37 | ||
| 38 | # We'll process right-to-left for DP, but maintain left-side match candidates | 38 | |
| 39 | # by building them separately. | 39 | |
| 40 | cands = [[] for _ in range(n)] | 40 | |
| 41 | 41 | ||
| 42 | if n >= 2: | 42 | |
| 43 | for i in range(n): | 43 | |
| 44 | if i + bucket_span <= n: | 44 | |
| 45 | key = data[i:i + bucket_span] | 45 | |
| 46 | lst = pos_lists.get(key) | 46 | |
| 47 | if lst is None: | 47 | |
| 48 | lst = [] | 48 | |
| 49 | pos_lists[key] = lst | 49 | |
| 50 | 50 | ||
| 51 | # Compare against recent prior positions | 51 | |
| 52 | for p in lst[::-1]: | 52 | |
| 53 | off = i - p | 53 | |
| 54 | if off > max_window: | 54 | |
| 55 | break | 55 | |
| 56 | l = 0 | 56 | |
| 57 | limit = n - i | 57 | |
| 58 | if limit > max_match: | 58 | |
| 59 | limit = max_match | 59 | |
| 60 | while l < limit and data[p + l] == data[i + l]: | 60 | |
| 61 | l += 1 | 61 | |
| 62 | if l >= 2: | 62 | |
| 63 | cands[i].append((off, l)) | 63 | |
| 64 | if len(cands[i]) >= 12: | 64 | |
| 65 | break | 65 | |
| 66 | 66 | ||
| 67 | lst.append(i) | 67 | |
| 68 | # prune old positions | 68 | |
| 69 | while lst and i - lst[0] > max_window: | 69 | |
| 70 | lst.pop(0) | 70 | |
| 71 | 71 | ||
| 72 | # Add run-based candidates and exact local periodic candidates | 72 | |
| 73 | for i in range(n): | 73 | |
| 74 | # repeated-char run | 74 | |
| 75 | j = i + 1 | 75 | |
| 76 | while j < n and data[j] == data[i] and j - i < max_match: | 76 | |
| 77 | j += 1 | 77 | |
| 78 | run_len = j - i | 78 | |
| 79 | if run_len >= 2: | 79 | |
| 80 | cands[i].append((1, run_len)) | 80 | |
| 81 | 81 | ||
| 82 | def add_choice(i, cost, ch): | 82 | |
| 83 | if cost < best_next[i]: | 83 | |
| 84 | best_next[i] = cost | 84 | |
| 85 | choice[i] = ch | 85 | |
| 86 | 86 | ||
| 87 | for i in range(n - 1, -1, -1): | 87 | |
| 88 | # Literal blocks | 88 | |
| 89 | lim = n - i | 89 | |
| 90 | if lim > max_lit: | 90 | |
| 91 | lim = max_lit | 91 | |
| 92 | for L in range(1, lim + 1): | 92 | |
| 93 | cost = 1 + vlen(L) + L + best_next[i + L] | 93 | |
| 94 | if cost < best_next[i]: | 94 | |
| 95 | best_next[i] = cost | 95 | |
| 96 | choice[i] = ("L", L) | 96 | |
| 97 | 97 | ||
| 98 | # Backreferences | 98 | |
| 99 | if cands[i]: | 99 | |
| 100 | seen = {} | 100 | |
| 101 | for off, ln in cands[i]: | 101 | |
| 102 | if off <= 0 or ln < 2: | 102 | |
| 103 | continue | 103 | |
| 104 | prev = seen.get(off, 0) | 104 | |
| 105 | if ln > prev: | 105 | |
| 106 | seen[off] = ln | 106 | |
| 107 | for off, ln in seen.items(): | 107 | |
| 108 | lens = [ln] | 108 | |
| 109 | if ln > 2: | 109 | |
| 110 | lens.append(2) | 110 | |
| 111 | if ln > 3: | 111 | |
| 112 | lens.append(3) | 112 | |
| 113 | if ln > 4: | 113 | |
| 114 | lens.append(4) | 114 | |
| 115 | if ln > 8: | 115 | |
| 116 | lens.append(8) | 116 | |
| 117 | if ln > 16: | 117 | |
| 118 | lens.append(16) | 118 | |
| 119 | if ln > 32: | 119 | |
| 120 | lens.append(32) | 120 | |
| 121 | if ln > 64: | 121 | |
| 122 | lens.append(64) | 122 | |
| 123 | if ln > 128: | 123 | |
| 124 | lens.append(128) | 124 | |
| 125 | used = set() | 125 | |
| 126 | for L in lens: | 126 | |
| 127 | if L in used or i + L > n: | 127 | |
| 128 | continue | 128 | |
| 129 | used.add(L) | 129 | |
| 130 | cost = 1 + vlen(off) + vlen(L) + best_next[i + L] | 130 | |
| 131 | if cost < best_next[i]: | 131 | |
| 132 | best_next[i] = cost | 132 | |
| 133 | choice[i] = ("M", off, L) | 133 | |
| 134 | 134 | ||
| 135 | # Repeated-pattern token for local exact periods | 135 | |
| 136 | max_pat = 16 | 136 | |
| 137 | rem = n - i | 137 | |
| 138 | if rem >= 4: | 138 | |
| 139 | upper = max_pat if max_pat < rem // 2 else rem // 2 | 139 | |
| 140 | for p in range(1, upper + 1): | 140 | |
| 141 | reps = 1 | 141 | |
| 142 | while i + (reps + 1) * p <= n and data[i:i + p] == data[i + reps * p:i + (reps + 1) * p]: | 142 | |
| 143 | reps += 1 | 143 | |
| 144 | if reps >= 2: | 144 | |
| 145 | total = reps * p | 145 | |
| 146 | cost = 1 + vlen(p) + vlen(reps) + p + best_next[i + total] | 146 | |
| 147 | if cost < best_next[i]: | 147 | |
| 148 | best_next[i] = cost | 148 | |
| 149 | choice[i] = ("P", p, reps) | 149 | |
| 150 | 150 | ||
| 151 | compressed = best_next[0] | 151 | |
| 152 | 152 | ||
| 153 | # Reconstruct and verify | 153 | |
| 154 | tokens = [] | 154 | |
| 155 | i = 0 | 155 | |
| 156 | while i < n: | 156 | |
| 157 | ch = choice[i] | 157 | |
| 158 | if ch is None: | 158 | |
| 159 | return 999.0 | 159 | |
| 160 | tokens.append(ch) | 160 | |
| 161 | if ch[0] == "L": | 161 | |
| 162 | i += ch[1] | 162 | |
| 163 | elif ch[0] == "M": | 163 | |
| 164 | i += ch[2] | 164 | |
| 165 | else: | 165 | |
| 166 | i += ch[1] * ch[2] | 166 | |
| 167 | 167 | ||
| 168 | out = [] | 168 | |
| 169 | for tok in tokens: | 169 | |
| 170 | t = tok[0] | 170 | |
| 171 | if t == "L": | 171 | |
| 172 | L = tok[1] | 172 | |
| 173 | start = sum( | 173 | |
| 174 | x[1] if x[0] == "L" else (x[2] if x[0] == "M" else x[1] * x[2]) | 174 | |
| 175 | for x in tokens[:len(out)] | 175 | |
| 176 | ) | 176 | |
| 177 | # Avoid O(n^2) from the above by not using it; fallback below | 177 | |
| 178 | out = None | 178 | |
| 179 | break | 179 | |
| 180 | 180 | ||
| 181 | # Efficient decompression | 181 | |
| 182 | built = [] | 182 | |
| 183 | src_i = 0 | 183 | |
| 184 | for tok in tokens: | 184 | |
| 185 | t = tok[0] | 185 | |
| 186 | if t == "L": | 186 | |
| 187 | L = tok[1] | 187 | |
| 188 | built.append(data[src_i:src_i + L]) | 188 | |
| 189 | src_i += L | 189 | |
| 190 | elif t == "M": | 190 | |
| 191 | off, L = tok[1], tok[2] | 191 | |
| 192 | cur = "".join(built) | 192 | |
| 193 | start = len(cur) - off | 193 | |
| 194 | if start < 0: | 194 | |
| 195 | return 999.0 | 195 | |
| 196 | piece = [] | 196 | |
| 197 | for k in range(L): | 197 | |
| 198 | idx = start + k | 198 | |
| 199 | if idx < len(cur): | 199 | |
| 200 | piece.append(cur[idx]) | 200 | |
| 201 | else: | 201 | |
| 202 | ref = cur + "".join(piece) | 202 | |
| 203 | if idx >= len(ref): | 203 | |
| 204 | return 999.0 | 204 | |
| 205 | piece.append(ref[idx]) | 205 | |
| 206 | built.append("".join(piece)) | 206 | |
| 207 | src_i += L | 207 | |
| 208 | else: | 208 | |
| 209 | p, reps = tok[1], tok[2] | 209 | |
| 210 | pat = data[src_i:src_i + p] | 210 | |
| 211 | built.append(pat * reps) | 211 | |
| 212 | src_i += p * reps | 212 | |
| 213 | 213 | ||
| 214 | dec = "".join(built) | 214 | |
| 215 | if dec != data: | 215 | |
| 216 | return 999.0 | 216 | |
| 217 | 217 | ||
| 218 | ratio = compressed / n | 218 | |
| 219 | return float(ratio) | 219 | |
| 220 | except: | 220 | |
| 221 | return 999.0 | 221 |
1def solve(input):2 try:3 data = input.get("data", "")4 if not isinstance(data, str):5 data = str(data)67 n = len(data)8 if n == 0:9 return 0.01011 def vlen(x):12 c = 113 while x >= 128:14 x >>= 715 c += 116 return c1718 # Token model:19 # 0: literal block cost = 1 + vlen(len) + len20 # 1: backref match cost = 1 + vlen(off) + vlen(len)21 # 2: repeated pattern cost = 1 + vlen(pat_len) + vlen(reps) + pat_len22 #23 # New approach: bottom-up DP over substrings using exact small-period detection24 # and bounded-window LZ-style matching from rolling buckets.2526 max_lit = 6427 max_window = 204828 max_match = 51229 bucket_span = 33031 best_next = [10**18] * (n + 1)32 choice = [None] * n33 best_next[n] = 03435 # Rolling buckets keyed by short substrings to find prior matches36 pos_lists = {}3738 # We'll process right-to-left for DP, but maintain left-side match candidates39 # by building them separately.40 cands = [[] for _ in range(n)]4142 if n >= 2:43 for i in range(n):44 if i + bucket_span <= n:45 key = data[i:i + bucket_span]46 lst = pos_lists.get(key)47 if lst is None:48 lst = []49 pos_lists[key] = lst5051 # Compare against recent prior positions52 for p in lst[::-1]:53 off = i - p54 if off > max_window:55 break56 l = 057 limit = n - i58 if limit > max_match:59 limit = max_match60 while l < limit and data[p + l] == data[i + l]:61 l += 162 if l >= 2:63 cands[i].append((off, l))64 if len(cands[i]) >= 12:65 break6667 lst.append(i)68 # prune old positions69 while lst and i - lst[0] > max_window:70 lst.pop(0)7172 # Add run-based candidates and exact local periodic candidates73 for i in range(n):74 # repeated-char run75 j = i + 176 while j < n and data[j] == data[i] and j - i < max_match:77 j += 178 run_len = j - i79 if run_len >= 2:80 cands[i].append((1, run_len))8182 def add_choice(i, cost, ch):83 if cost < best_next[i]:84 best_next[i] = cost85 choice[i] = ch8687 for i in range(n - 1, -1, -1):88 # Literal blocks89 lim = n - i90 if lim > max_lit:91 lim = max_lit92 for L in range(1, lim + 1):93 cost = 1 + vlen(L) + L + best_next[i + L]94 if cost < best_next[i]:95 best_next[i] = cost96 choice[i] = ("L", L)9798 # Backreferences99 if cands[i]:100 seen = {}101 for off, ln in cands[i]:102 if off <= 0 or ln < 2:103 continue104 prev = seen.get(off, 0)105 if ln > prev:106 seen[off] = ln107 for off, ln in seen.items():108 lens = [ln]109 if ln > 2:110 lens.append(2)111 if ln > 3:112 lens.append(3)113 if ln > 4:114 lens.append(4)115 if ln > 8:116 lens.append(8)117 if ln > 16:118 lens.append(16)119 if ln > 32:120 lens.append(32)121 if ln > 64:122 lens.append(64)123 if ln > 128:124 lens.append(128)125 used = set()126 for L in lens:127 if L in used or i + L > n:128 continue129 used.add(L)130 cost = 1 + vlen(off) + vlen(L) + best_next[i + L]131 if cost < best_next[i]:132 best_next[i] = cost133 choice[i] = ("M", off, L)134135 # Repeated-pattern token for local exact periods136 max_pat = 16137 rem = n - i138 if rem >= 4:139 upper = max_pat if max_pat < rem // 2 else rem // 2140 for p in range(1, upper + 1):141 reps = 1142 while i + (reps + 1) * p <= n and data[i:i + p] == data[i + reps * p:i + (reps + 1) * p]:143 reps += 1144 if reps >= 2:145 total = reps * p146 cost = 1 + vlen(p) + vlen(reps) + p + best_next[i + total]147 if cost < best_next[i]:148 best_next[i] = cost149 choice[i] = ("P", p, reps)150151 compressed = best_next[0]152153 # Reconstruct and verify154 tokens = []155 i = 0156 while i < n:157 ch = choice[i]158 if ch is None:159 return 999.0160 tokens.append(ch)161 if ch[0] == "L":162 i += ch[1]163 elif ch[0] == "M":164 i += ch[2]165 else:166 i += ch[1] * ch[2]167168 out = []169 for tok in tokens:170 t = tok[0]171 if t == "L":172 L = tok[1]173 start = sum(174 x[1] if x[0] == "L" else (x[2] if x[0] == "M" else x[1] * x[2])175 for x in tokens[:len(out)]176 )177 # Avoid O(n^2) from the above by not using it; fallback below178 out = None179 break180181 # Efficient decompression182 built = []183 src_i = 0184 for tok in tokens:185 t = tok[0]186 if t == "L":187 L = tok[1]188 built.append(data[src_i:src_i + L])189 src_i += L190 elif t == "M":191 off, L = tok[1], tok[2]192 cur = "".join(built)193 start = len(cur) - off194 if start < 0:195 return 999.0196 piece = []197 for k in range(L):198 idx = start + k199 if idx < len(cur):200 piece.append(cur[idx])201 else:202 ref = cur + "".join(piece)203 if idx >= len(ref):204 return 999.0205 piece.append(ref[idx])206 built.append("".join(piece))207 src_i += L208 else:209 p, reps = tok[1], tok[2]210 pat = data[src_i:src_i + p]211 built.append(pat * reps)212 src_i += p * reps213214 dec = "".join(built)215 if dec != data:216 return 999.0217218 ratio = compressed / n219 return float(ratio)220 except:221 return 999.01def solve(input):2 data = input.get("data", "")3 if not isinstance(data, str) or not data:4 return 999.056 # Mathematical/analytical approach: Entropy-based redundancy calculation7 8 from collections import Counter9 from math import log21011 def entropy(s):12 probabilities = [freq / len(s) for freq in Counter(s).values()]13 return -sum(p * log2(p) if p > 0 else 0 for p in probabilities)1415 def redundancy(s):16 max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 017 actual_entropy = entropy(s)18 return max_entropy - actual_entropy1920 # Calculate reduction in size possible based on redundancy21 reduction_potential = redundancy(data)2223 # Assuming compression is achieved based on redundancy24 max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data)))25 26 # Qualitative check if max_possible_compression_ratio makes sense27 if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0:28 return 999.02930 # Verify compression is lossless (hypothetical check here)31 # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data32 33 # Returning the hypothetical compression performance34 return max_possible_compression_ratio