Solution #1265a3fc-1854-4d14-b020-8f8ca014ff5d
completedScore
48% (0/5)
Runtime
38.27ms
Delta
+44.9% vs parent
-50.1% vs best
Improved from parent
Score
48% (0/5)
Runtime
38.27ms
Delta
+44.9% vs parent
-50.1% vs best
Improved from parent
def solve(input):
try:
data = input.get("data", "") if isinstance(input, dict) else ""
if not isinstance(data, str):
data = str(data)
n = len(data)
if n == 0:
return 0.0
# Novel approach: suffix-trie-like rolling window graph parse.
# We build a compact directed parse using:
# - literal runs
# - run-length repeats of one char
# - backreferences found via a trie over recent suffixes
# Then compute shortest path cost on positions.
# Cost model is abstract but consistent with decompressor:
# Literal token: ('L', text) cost = 1 + len(text)
# Run token: ('R', ch, cnt) cost = 3 + len(str(cnt))
# Copy token: ('C', dist, ln) cost = 3 + len(str(dist)) + len(str(ln))
#
# To improve over prior attempts, we use a graph of candidate edges
# generated from a recent-substring trie and dynamic programming.
max_window = min(n, 2048)
max_match = min(n, 256)
# Build edges lazily. dp[i] = min cost to encode data[:i]
INF = 10**18
dp = [INF] * (n + 1)
prev = [None] * (n + 1)
dp[0] = 0
# Trie nodes: {char: child_index, '_pos': latest end position list}
children = []
ends = []
def new_node():
children.append({})
ends.append([])
return len(children) - 1
root = new_node()
def trie_add_suffix(start, current_pos):
node = root
lim = min(n, start + max_match)
for j in range(start, lim):
ch = data[j]
nxt = children[node].get(ch)
if nxt is None:
nxt = new_node()
children[node][ch] = nxt
node = nxt
lst = ends[node]
lst.append(current_pos)
if len(lst) > 8:
del lst[0]
def trie_matches(i):
node = root
best = []
lim = min(n, i + max_match)
for j in range(i, lim):
ch = data[j]
nxt = children[node].get(ch)
if nxt is None:
break
node = nxt
ln = j - i + 1
for pos in ends[node]:
dist = i - pos
if 0 < dist <= max_window:
best.append((ln, dist))
# keep only strongest few candidates
if not best:
return []
best.sort(key=lambda x: (x[0], -x[1]), reverse=True)
out = []
seen = set()
for ln, dist in best:
key = (ln, dist)
if key not in seen:
seen.add(key)
out.append((ln, dist))
if len(out) >= 6:
break
return out
# Seed trie with suffixes ending before current index as we advance.
inserted_upto = 0
for i in range(n):
# Ensure trie contains suffixes starting in recent window before i
while inserted_upto < i:
if inserted_upto >= i - max_window:
trie_add_suffix(inserted_upto, inserted_upto)
inserted_upto += 1
if dp[i] >= INF:
continue
# 1) Literal runs
max_lit = min(32, n - i)
for l in range(1, max_lit + 1):
j = i + l
cost = dp[i] + 1 + l
if cost < dp[j]:
dp[j] = cost
prev[j] = ('L', i, j)
# 2) Single-char run
ch = data[i]
r = i + 1
while r < n and data[r] == ch and r - i < 9999:
r += 1
run_len = r - i
if run_len >= 3:
for l in (run_len, min(run_len, 9), min(run_len, 99)):
if l >= 3:
j = i + l
cost = dp[i] + 3 + len(str(l))
if cost < dp[j]:
dp[j] = cost
prev[j] = ('R', ch, l, i)
# 3) Trie-discovered copies
for ln, dist in trie_matches(i):
if ln >= 3:
j = i + ln
cost = dp[i] + 3 + len(str(dist)) + len(str(ln))
if cost < dp[j]:
dp[j] = cost
prev[j] = ('C', dist, ln, i)
if dp[n] >= INF:
return 999.0
# Reconstruct token stream
tokens = []
cur = n
while cur > 0:
p = prev[cur]
if p is None:
return 999.0
kind = p[0]
tokens.append(p)
if kind == 'L':
cur = p[1]
elif kind == 'R':
cur = p[3]
else:
cur = p[3]
tokens.reverse()
# Decompress and verify
out = []
built = ""
for tok in tokens:
kind = tok[0]
if kind == 'L':
s = data[tok[1]:tok[2]]
out.append(s)
built += s
elif kind == 'R':
ch, l = tok[1], tok[2]
s = ch * l
out.append(s)
built += s
elif kind == 'C':
dist, ln = tok[1], tok[2]
cur_len = len(built)
if dist <= 0 or dist > cur_len:
return 999.0
start = cur_len - dist
s_parts = []
for k in range(ln):
idx = start + k
if idx < len(built):
s_parts.append(built[idx])
else:
# allow overlapping copy semantics
s_parts.append(s_parts[idx - len(built)])
s = "".join(s_parts)
out.append(s)
built += s
else:
return 999.0
restored = "".join(out)
if restored != data:
return 999.0
return float(dp[n] / n)
except:
return 999.0Score Difference
-48.4%
Runtime Advantage
38.14ms slower
Code Size
197 vs 34 lines
| # | Your Solution | # | Champion |
|---|---|---|---|
| 1 | def solve(input): | 1 | def solve(input): |
| 2 | try: | 2 | data = input.get("data", "") |
| 3 | data = input.get("data", "") if isinstance(input, dict) else "" | 3 | if not isinstance(data, str) or not data: |
| 4 | if not isinstance(data, str): | 4 | return 999.0 |
| 5 | data = str(data) | 5 | |
| 6 | 6 | # Mathematical/analytical approach: Entropy-based redundancy calculation | |
| 7 | n = len(data) | 7 | |
| 8 | if n == 0: | 8 | from collections import Counter |
| 9 | return 0.0 | 9 | from math import log2 |
| 10 | 10 | ||
| 11 | # Novel approach: suffix-trie-like rolling window graph parse. | 11 | def entropy(s): |
| 12 | # We build a compact directed parse using: | 12 | probabilities = [freq / len(s) for freq in Counter(s).values()] |
| 13 | # - literal runs | 13 | return -sum(p * log2(p) if p > 0 else 0 for p in probabilities) |
| 14 | # - run-length repeats of one char | 14 | |
| 15 | # - backreferences found via a trie over recent suffixes | 15 | def redundancy(s): |
| 16 | # Then compute shortest path cost on positions. | 16 | max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 0 |
| 17 | 17 | actual_entropy = entropy(s) | |
| 18 | # Cost model is abstract but consistent with decompressor: | 18 | return max_entropy - actual_entropy |
| 19 | # Literal token: ('L', text) cost = 1 + len(text) | 19 | |
| 20 | # Run token: ('R', ch, cnt) cost = 3 + len(str(cnt)) | 20 | # Calculate reduction in size possible based on redundancy |
| 21 | # Copy token: ('C', dist, ln) cost = 3 + len(str(dist)) + len(str(ln)) | 21 | reduction_potential = redundancy(data) |
| 22 | # | 22 | |
| 23 | # To improve over prior attempts, we use a graph of candidate edges | 23 | # Assuming compression is achieved based on redundancy |
| 24 | # generated from a recent-substring trie and dynamic programming. | 24 | max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data))) |
| 25 | 25 | ||
| 26 | max_window = min(n, 2048) | 26 | # Qualitative check if max_possible_compression_ratio makes sense |
| 27 | max_match = min(n, 256) | 27 | if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0: |
| 28 | 28 | return 999.0 | |
| 29 | # Build edges lazily. dp[i] = min cost to encode data[:i] | 29 | |
| 30 | INF = 10**18 | 30 | # Verify compression is lossless (hypothetical check here) |
| 31 | dp = [INF] * (n + 1) | 31 | # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data |
| 32 | prev = [None] * (n + 1) | 32 | |
| 33 | dp[0] = 0 | 33 | # Returning the hypothetical compression performance |
| 34 | 34 | return max_possible_compression_ratio | |
| 35 | # Trie nodes: {char: child_index, '_pos': latest end position list} | 35 | |
| 36 | children = [] | 36 | |
| 37 | ends = [] | 37 | |
| 38 | 38 | ||
| 39 | def new_node(): | 39 | |
| 40 | children.append({}) | 40 | |
| 41 | ends.append([]) | 41 | |
| 42 | return len(children) - 1 | 42 | |
| 43 | 43 | ||
| 44 | root = new_node() | 44 | |
| 45 | 45 | ||
| 46 | def trie_add_suffix(start, current_pos): | 46 | |
| 47 | node = root | 47 | |
| 48 | lim = min(n, start + max_match) | 48 | |
| 49 | for j in range(start, lim): | 49 | |
| 50 | ch = data[j] | 50 | |
| 51 | nxt = children[node].get(ch) | 51 | |
| 52 | if nxt is None: | 52 | |
| 53 | nxt = new_node() | 53 | |
| 54 | children[node][ch] = nxt | 54 | |
| 55 | node = nxt | 55 | |
| 56 | lst = ends[node] | 56 | |
| 57 | lst.append(current_pos) | 57 | |
| 58 | if len(lst) > 8: | 58 | |
| 59 | del lst[0] | 59 | |
| 60 | 60 | ||
| 61 | def trie_matches(i): | 61 | |
| 62 | node = root | 62 | |
| 63 | best = [] | 63 | |
| 64 | lim = min(n, i + max_match) | 64 | |
| 65 | for j in range(i, lim): | 65 | |
| 66 | ch = data[j] | 66 | |
| 67 | nxt = children[node].get(ch) | 67 | |
| 68 | if nxt is None: | 68 | |
| 69 | break | 69 | |
| 70 | node = nxt | 70 | |
| 71 | ln = j - i + 1 | 71 | |
| 72 | for pos in ends[node]: | 72 | |
| 73 | dist = i - pos | 73 | |
| 74 | if 0 < dist <= max_window: | 74 | |
| 75 | best.append((ln, dist)) | 75 | |
| 76 | # keep only strongest few candidates | 76 | |
| 77 | if not best: | 77 | |
| 78 | return [] | 78 | |
| 79 | best.sort(key=lambda x: (x[0], -x[1]), reverse=True) | 79 | |
| 80 | out = [] | 80 | |
| 81 | seen = set() | 81 | |
| 82 | for ln, dist in best: | 82 | |
| 83 | key = (ln, dist) | 83 | |
| 84 | if key not in seen: | 84 | |
| 85 | seen.add(key) | 85 | |
| 86 | out.append((ln, dist)) | 86 | |
| 87 | if len(out) >= 6: | 87 | |
| 88 | break | 88 | |
| 89 | return out | 89 | |
| 90 | 90 | ||
| 91 | # Seed trie with suffixes ending before current index as we advance. | 91 | |
| 92 | inserted_upto = 0 | 92 | |
| 93 | 93 | ||
| 94 | for i in range(n): | 94 | |
| 95 | # Ensure trie contains suffixes starting in recent window before i | 95 | |
| 96 | while inserted_upto < i: | 96 | |
| 97 | if inserted_upto >= i - max_window: | 97 | |
| 98 | trie_add_suffix(inserted_upto, inserted_upto) | 98 | |
| 99 | inserted_upto += 1 | 99 | |
| 100 | 100 | ||
| 101 | if dp[i] >= INF: | 101 | |
| 102 | continue | 102 | |
| 103 | 103 | ||
| 104 | # 1) Literal runs | 104 | |
| 105 | max_lit = min(32, n - i) | 105 | |
| 106 | for l in range(1, max_lit + 1): | 106 | |
| 107 | j = i + l | 107 | |
| 108 | cost = dp[i] + 1 + l | 108 | |
| 109 | if cost < dp[j]: | 109 | |
| 110 | dp[j] = cost | 110 | |
| 111 | prev[j] = ('L', i, j) | 111 | |
| 112 | 112 | ||
| 113 | # 2) Single-char run | 113 | |
| 114 | ch = data[i] | 114 | |
| 115 | r = i + 1 | 115 | |
| 116 | while r < n and data[r] == ch and r - i < 9999: | 116 | |
| 117 | r += 1 | 117 | |
| 118 | run_len = r - i | 118 | |
| 119 | if run_len >= 3: | 119 | |
| 120 | for l in (run_len, min(run_len, 9), min(run_len, 99)): | 120 | |
| 121 | if l >= 3: | 121 | |
| 122 | j = i + l | 122 | |
| 123 | cost = dp[i] + 3 + len(str(l)) | 123 | |
| 124 | if cost < dp[j]: | 124 | |
| 125 | dp[j] = cost | 125 | |
| 126 | prev[j] = ('R', ch, l, i) | 126 | |
| 127 | 127 | ||
| 128 | # 3) Trie-discovered copies | 128 | |
| 129 | for ln, dist in trie_matches(i): | 129 | |
| 130 | if ln >= 3: | 130 | |
| 131 | j = i + ln | 131 | |
| 132 | cost = dp[i] + 3 + len(str(dist)) + len(str(ln)) | 132 | |
| 133 | if cost < dp[j]: | 133 | |
| 134 | dp[j] = cost | 134 | |
| 135 | prev[j] = ('C', dist, ln, i) | 135 | |
| 136 | 136 | ||
| 137 | if dp[n] >= INF: | 137 | |
| 138 | return 999.0 | 138 | |
| 139 | 139 | ||
| 140 | # Reconstruct token stream | 140 | |
| 141 | tokens = [] | 141 | |
| 142 | cur = n | 142 | |
| 143 | while cur > 0: | 143 | |
| 144 | p = prev[cur] | 144 | |
| 145 | if p is None: | 145 | |
| 146 | return 999.0 | 146 | |
| 147 | kind = p[0] | 147 | |
| 148 | tokens.append(p) | 148 | |
| 149 | if kind == 'L': | 149 | |
| 150 | cur = p[1] | 150 | |
| 151 | elif kind == 'R': | 151 | |
| 152 | cur = p[3] | 152 | |
| 153 | else: | 153 | |
| 154 | cur = p[3] | 154 | |
| 155 | tokens.reverse() | 155 | |
| 156 | 156 | ||
| 157 | # Decompress and verify | 157 | |
| 158 | out = [] | 158 | |
| 159 | built = "" | 159 | |
| 160 | for tok in tokens: | 160 | |
| 161 | kind = tok[0] | 161 | |
| 162 | if kind == 'L': | 162 | |
| 163 | s = data[tok[1]:tok[2]] | 163 | |
| 164 | out.append(s) | 164 | |
| 165 | built += s | 165 | |
| 166 | elif kind == 'R': | 166 | |
| 167 | ch, l = tok[1], tok[2] | 167 | |
| 168 | s = ch * l | 168 | |
| 169 | out.append(s) | 169 | |
| 170 | built += s | 170 | |
| 171 | elif kind == 'C': | 171 | |
| 172 | dist, ln = tok[1], tok[2] | 172 | |
| 173 | cur_len = len(built) | 173 | |
| 174 | if dist <= 0 or dist > cur_len: | 174 | |
| 175 | return 999.0 | 175 | |
| 176 | start = cur_len - dist | 176 | |
| 177 | s_parts = [] | 177 | |
| 178 | for k in range(ln): | 178 | |
| 179 | idx = start + k | 179 | |
| 180 | if idx < len(built): | 180 | |
| 181 | s_parts.append(built[idx]) | 181 | |
| 182 | else: | 182 | |
| 183 | # allow overlapping copy semantics | 183 | |
| 184 | s_parts.append(s_parts[idx - len(built)]) | 184 | |
| 185 | s = "".join(s_parts) | 185 | |
| 186 | out.append(s) | 186 | |
| 187 | built += s | 187 | |
| 188 | else: | 188 | |
| 189 | return 999.0 | 189 | |
| 190 | 190 | ||
| 191 | restored = "".join(out) | 191 | |
| 192 | if restored != data: | 192 | |
| 193 | return 999.0 | 193 | |
| 194 | 194 | ||
| 195 | return float(dp[n] / n) | 195 | |
| 196 | except: | 196 | |
| 197 | return 999.0 | 197 |
1def solve(input):2 try:3 data = input.get("data", "") if isinstance(input, dict) else ""4 if not isinstance(data, str):5 data = str(data)67 n = len(data)8 if n == 0:9 return 0.01011 # Novel approach: suffix-trie-like rolling window graph parse.12 # We build a compact directed parse using:13 # - literal runs14 # - run-length repeats of one char15 # - backreferences found via a trie over recent suffixes16 # Then compute shortest path cost on positions.1718 # Cost model is abstract but consistent with decompressor:19 # Literal token: ('L', text) cost = 1 + len(text)20 # Run token: ('R', ch, cnt) cost = 3 + len(str(cnt))21 # Copy token: ('C', dist, ln) cost = 3 + len(str(dist)) + len(str(ln))22 #23 # To improve over prior attempts, we use a graph of candidate edges24 # generated from a recent-substring trie and dynamic programming.2526 max_window = min(n, 2048)27 max_match = min(n, 256)2829 # Build edges lazily. dp[i] = min cost to encode data[:i]30 INF = 10**1831 dp = [INF] * (n + 1)32 prev = [None] * (n + 1)33 dp[0] = 03435 # Trie nodes: {char: child_index, '_pos': latest end position list}36 children = []37 ends = []3839 def new_node():40 children.append({})41 ends.append([])42 return len(children) - 14344 root = new_node()4546 def trie_add_suffix(start, current_pos):47 node = root48 lim = min(n, start + max_match)49 for j in range(start, lim):50 ch = data[j]51 nxt = children[node].get(ch)52 if nxt is None:53 nxt = new_node()54 children[node][ch] = nxt55 node = nxt56 lst = ends[node]57 lst.append(current_pos)58 if len(lst) > 8:59 del lst[0]6061 def trie_matches(i):62 node = root63 best = []64 lim = min(n, i + max_match)65 for j in range(i, lim):66 ch = data[j]67 nxt = children[node].get(ch)68 if nxt is None:69 break70 node = nxt71 ln = j - i + 172 for pos in ends[node]:73 dist = i - pos74 if 0 < dist <= max_window:75 best.append((ln, dist))76 # keep only strongest few candidates77 if not best:78 return []79 best.sort(key=lambda x: (x[0], -x[1]), reverse=True)80 out = []81 seen = set()82 for ln, dist in best:83 key = (ln, dist)84 if key not in seen:85 seen.add(key)86 out.append((ln, dist))87 if len(out) >= 6:88 break89 return out9091 # Seed trie with suffixes ending before current index as we advance.92 inserted_upto = 09394 for i in range(n):95 # Ensure trie contains suffixes starting in recent window before i96 while inserted_upto < i:97 if inserted_upto >= i - max_window:98 trie_add_suffix(inserted_upto, inserted_upto)99 inserted_upto += 1100101 if dp[i] >= INF:102 continue103104 # 1) Literal runs105 max_lit = min(32, n - i)106 for l in range(1, max_lit + 1):107 j = i + l108 cost = dp[i] + 1 + l109 if cost < dp[j]:110 dp[j] = cost111 prev[j] = ('L', i, j)112113 # 2) Single-char run114 ch = data[i]115 r = i + 1116 while r < n and data[r] == ch and r - i < 9999:117 r += 1118 run_len = r - i119 if run_len >= 3:120 for l in (run_len, min(run_len, 9), min(run_len, 99)):121 if l >= 3:122 j = i + l123 cost = dp[i] + 3 + len(str(l))124 if cost < dp[j]:125 dp[j] = cost126 prev[j] = ('R', ch, l, i)127128 # 3) Trie-discovered copies129 for ln, dist in trie_matches(i):130 if ln >= 3:131 j = i + ln132 cost = dp[i] + 3 + len(str(dist)) + len(str(ln))133 if cost < dp[j]:134 dp[j] = cost135 prev[j] = ('C', dist, ln, i)136137 if dp[n] >= INF:138 return 999.0139140 # Reconstruct token stream141 tokens = []142 cur = n143 while cur > 0:144 p = prev[cur]145 if p is None:146 return 999.0147 kind = p[0]148 tokens.append(p)149 if kind == 'L':150 cur = p[1]151 elif kind == 'R':152 cur = p[3]153 else:154 cur = p[3]155 tokens.reverse()156157 # Decompress and verify158 out = []159 built = ""160 for tok in tokens:161 kind = tok[0]162 if kind == 'L':163 s = data[tok[1]:tok[2]]164 out.append(s)165 built += s166 elif kind == 'R':167 ch, l = tok[1], tok[2]168 s = ch * l169 out.append(s)170 built += s171 elif kind == 'C':172 dist, ln = tok[1], tok[2]173 cur_len = len(built)174 if dist <= 0 or dist > cur_len:175 return 999.0176 start = cur_len - dist177 s_parts = []178 for k in range(ln):179 idx = start + k180 if idx < len(built):181 s_parts.append(built[idx])182 else:183 # allow overlapping copy semantics184 s_parts.append(s_parts[idx - len(built)])185 s = "".join(s_parts)186 out.append(s)187 built += s188 else:189 return 999.0190191 restored = "".join(out)192 if restored != data:193 return 999.0194195 return float(dp[n] / n)196 except:197 return 999.01def solve(input):2 data = input.get("data", "")3 if not isinstance(data, str) or not data:4 return 999.056 # Mathematical/analytical approach: Entropy-based redundancy calculation7 8 from collections import Counter9 from math import log21011 def entropy(s):12 probabilities = [freq / len(s) for freq in Counter(s).values()]13 return -sum(p * log2(p) if p > 0 else 0 for p in probabilities)1415 def redundancy(s):16 max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 017 actual_entropy = entropy(s)18 return max_entropy - actual_entropy1920 # Calculate reduction in size possible based on redundancy21 reduction_potential = redundancy(data)2223 # Assuming compression is achieved based on redundancy24 max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data)))25 26 # Qualitative check if max_possible_compression_ratio makes sense27 if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0:28 return 999.02930 # Verify compression is lossless (hypothetical check here)31 # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data32 33 # Returning the hypothetical compression performance34 return max_possible_compression_ratio