Solution #1c6ceef2-7fe6-480e-8f88-45d792e0bf77
completedScore
37% (0/5)
Runtime
195.86ms
Delta
-35.5% vs parent
-61.8% vs best
Regression from parent
Score
37% (0/5)
Runtime
195.86ms
Delta
-35.5% vs parent
-61.8% vs best
Regression from parent
def solve(input):
try:
data = input.get("data", "") if isinstance(input, dict) else ""
if not isinstance(data, str):
data = str(data)
n = len(data)
if n == 0:
return 0.0
# Bottom-up DP on the original string (character-based, matching scorer).
# Cost model:
# - literal char: 1
# - run of same char length L>=2: 2
# - repetition of a smaller pattern repeated k>=2 times: pattern_cost + 1
# - backreference to any earlier substring of length >=2: 2
#
# We also build a decodable parse tree and verify losslessness.
# Longest common prefix table for fast substring equality checks.
lcp = [[0] * (n + 1) for _ in range(n + 1)]
for i in range(n - 1, -1, -1):
row = lcp[i]
next_row = lcp[i + 1]
ci = data[i]
for j in range(n - 1, -1, -1):
if ci == data[j]:
row[j] = next_row[j + 1] + 1
dp = [[0] * (n + 1) for _ in range(n)]
node = [[None] * (n + 1) for _ in range(n)]
for length in range(1, n + 1):
for l in range(0, n - length + 1):
r = l + length
# Default: all literals
best = length
best_node = ("L", data[l:r])
# Split into two parts
for m in range(l + 1, r):
c = dp[l][m] + dp[m][r]
if c < best:
best = c
best_node = ("C", node[l][m], node[m][r])
# Run-length of single repeated char
if length >= 2:
all_same = True
ch = data[l]
for i in range(l + 1, r):
if data[i] != ch:
all_same = False
break
if all_same and 2 < best:
best = 2
best_node = ("S", ch, length)
# Repetition of a smaller pattern
if length >= 2:
for p in range(1, length // 2 + 1):
if length % p == 0:
reps = length // p
if reps >= 2:
ok = True
for start in range(l + p, r, p):
if lcp[l][start] < p:
ok = False
break
if ok:
c = dp[l][l + p] + 1
if c < best:
best = c
best_node = ("P", reps, node[l][l + p])
# Backreference to an earlier identical substring
if length >= 2 and best > 2:
found_ref = False
for prev in range(0, l):
if lcp[prev][l] >= length:
best = 2
best_node = ("R", prev, length)
found_ref = True
break
if found_ref:
dp[l][r] = best
node[l][r] = best_node
continue
dp[l][r] = best
node[l][r] = best_node
root = node[0][n]
def decode(nd, out):
t = nd[0]
if t == "L":
out.append(nd[1])
return True
if t == "C":
return decode(nd[1], out) and decode(nd[2], out)
if t == "S":
out.append(nd[1] * nd[2])
return True
if t == "P":
tmp = []
if not decode(nd[2], tmp):
return False
piece = "".join(tmp)
out.append(piece * nd[1])
return True
if t == "R":
start, ln = nd[1], nd[2]
current = "".join(out)
if start < 0 or start + ln > len(current):
return False
out.append(current[start:start + ln])
return True
return False
out = []
if not decode(root, out):
return 999.0
reconstructed = "".join(out)
if reconstructed != data:
return 999.0
return dp[0][n] / float(n)
except:
return 999.0Score Difference
-59.7%
Runtime Advantage
195.73ms slower
Code Size
131 vs 34 lines
| # | Your Solution | # | Champion |
|---|---|---|---|
| 1 | def solve(input): | 1 | def solve(input): |
| 2 | try: | 2 | data = input.get("data", "") |
| 3 | data = input.get("data", "") if isinstance(input, dict) else "" | 3 | if not isinstance(data, str) or not data: |
| 4 | if not isinstance(data, str): | 4 | return 999.0 |
| 5 | data = str(data) | 5 | |
| 6 | 6 | # Mathematical/analytical approach: Entropy-based redundancy calculation | |
| 7 | n = len(data) | 7 | |
| 8 | if n == 0: | 8 | from collections import Counter |
| 9 | return 0.0 | 9 | from math import log2 |
| 10 | 10 | ||
| 11 | # Bottom-up DP on the original string (character-based, matching scorer). | 11 | def entropy(s): |
| 12 | # Cost model: | 12 | probabilities = [freq / len(s) for freq in Counter(s).values()] |
| 13 | # - literal char: 1 | 13 | return -sum(p * log2(p) if p > 0 else 0 for p in probabilities) |
| 14 | # - run of same char length L>=2: 2 | 14 | |
| 15 | # - repetition of a smaller pattern repeated k>=2 times: pattern_cost + 1 | 15 | def redundancy(s): |
| 16 | # - backreference to any earlier substring of length >=2: 2 | 16 | max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 0 |
| 17 | # | 17 | actual_entropy = entropy(s) |
| 18 | # We also build a decodable parse tree and verify losslessness. | 18 | return max_entropy - actual_entropy |
| 19 | 19 | ||
| 20 | # Longest common prefix table for fast substring equality checks. | 20 | # Calculate reduction in size possible based on redundancy |
| 21 | lcp = [[0] * (n + 1) for _ in range(n + 1)] | 21 | reduction_potential = redundancy(data) |
| 22 | for i in range(n - 1, -1, -1): | 22 | |
| 23 | row = lcp[i] | 23 | # Assuming compression is achieved based on redundancy |
| 24 | next_row = lcp[i + 1] | 24 | max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data))) |
| 25 | ci = data[i] | 25 | |
| 26 | for j in range(n - 1, -1, -1): | 26 | # Qualitative check if max_possible_compression_ratio makes sense |
| 27 | if ci == data[j]: | 27 | if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0: |
| 28 | row[j] = next_row[j + 1] + 1 | 28 | return 999.0 |
| 29 | 29 | ||
| 30 | dp = [[0] * (n + 1) for _ in range(n)] | 30 | # Verify compression is lossless (hypothetical check here) |
| 31 | node = [[None] * (n + 1) for _ in range(n)] | 31 | # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data |
| 32 | 32 | ||
| 33 | for length in range(1, n + 1): | 33 | # Returning the hypothetical compression performance |
| 34 | for l in range(0, n - length + 1): | 34 | return max_possible_compression_ratio |
| 35 | r = l + length | 35 | |
| 36 | 36 | ||
| 37 | # Default: all literals | 37 | |
| 38 | best = length | 38 | |
| 39 | best_node = ("L", data[l:r]) | 39 | |
| 40 | 40 | ||
| 41 | # Split into two parts | 41 | |
| 42 | for m in range(l + 1, r): | 42 | |
| 43 | c = dp[l][m] + dp[m][r] | 43 | |
| 44 | if c < best: | 44 | |
| 45 | best = c | 45 | |
| 46 | best_node = ("C", node[l][m], node[m][r]) | 46 | |
| 47 | 47 | ||
| 48 | # Run-length of single repeated char | 48 | |
| 49 | if length >= 2: | 49 | |
| 50 | all_same = True | 50 | |
| 51 | ch = data[l] | 51 | |
| 52 | for i in range(l + 1, r): | 52 | |
| 53 | if data[i] != ch: | 53 | |
| 54 | all_same = False | 54 | |
| 55 | break | 55 | |
| 56 | if all_same and 2 < best: | 56 | |
| 57 | best = 2 | 57 | |
| 58 | best_node = ("S", ch, length) | 58 | |
| 59 | 59 | ||
| 60 | # Repetition of a smaller pattern | 60 | |
| 61 | if length >= 2: | 61 | |
| 62 | for p in range(1, length // 2 + 1): | 62 | |
| 63 | if length % p == 0: | 63 | |
| 64 | reps = length // p | 64 | |
| 65 | if reps >= 2: | 65 | |
| 66 | ok = True | 66 | |
| 67 | for start in range(l + p, r, p): | 67 | |
| 68 | if lcp[l][start] < p: | 68 | |
| 69 | ok = False | 69 | |
| 70 | break | 70 | |
| 71 | if ok: | 71 | |
| 72 | c = dp[l][l + p] + 1 | 72 | |
| 73 | if c < best: | 73 | |
| 74 | best = c | 74 | |
| 75 | best_node = ("P", reps, node[l][l + p]) | 75 | |
| 76 | 76 | ||
| 77 | # Backreference to an earlier identical substring | 77 | |
| 78 | if length >= 2 and best > 2: | 78 | |
| 79 | found_ref = False | 79 | |
| 80 | for prev in range(0, l): | 80 | |
| 81 | if lcp[prev][l] >= length: | 81 | |
| 82 | best = 2 | 82 | |
| 83 | best_node = ("R", prev, length) | 83 | |
| 84 | found_ref = True | 84 | |
| 85 | break | 85 | |
| 86 | if found_ref: | 86 | |
| 87 | dp[l][r] = best | 87 | |
| 88 | node[l][r] = best_node | 88 | |
| 89 | continue | 89 | |
| 90 | 90 | ||
| 91 | dp[l][r] = best | 91 | |
| 92 | node[l][r] = best_node | 92 | |
| 93 | 93 | ||
| 94 | root = node[0][n] | 94 | |
| 95 | 95 | ||
| 96 | def decode(nd, out): | 96 | |
| 97 | t = nd[0] | 97 | |
| 98 | if t == "L": | 98 | |
| 99 | out.append(nd[1]) | 99 | |
| 100 | return True | 100 | |
| 101 | if t == "C": | 101 | |
| 102 | return decode(nd[1], out) and decode(nd[2], out) | 102 | |
| 103 | if t == "S": | 103 | |
| 104 | out.append(nd[1] * nd[2]) | 104 | |
| 105 | return True | 105 | |
| 106 | if t == "P": | 106 | |
| 107 | tmp = [] | 107 | |
| 108 | if not decode(nd[2], tmp): | 108 | |
| 109 | return False | 109 | |
| 110 | piece = "".join(tmp) | 110 | |
| 111 | out.append(piece * nd[1]) | 111 | |
| 112 | return True | 112 | |
| 113 | if t == "R": | 113 | |
| 114 | start, ln = nd[1], nd[2] | 114 | |
| 115 | current = "".join(out) | 115 | |
| 116 | if start < 0 or start + ln > len(current): | 116 | |
| 117 | return False | 117 | |
| 118 | out.append(current[start:start + ln]) | 118 | |
| 119 | return True | 119 | |
| 120 | return False | 120 | |
| 121 | 121 | ||
| 122 | out = [] | 122 | |
| 123 | if not decode(root, out): | 123 | |
| 124 | return 999.0 | 124 | |
| 125 | reconstructed = "".join(out) | 125 | |
| 126 | if reconstructed != data: | 126 | |
| 127 | return 999.0 | 127 | |
| 128 | 128 | ||
| 129 | return dp[0][n] / float(n) | 129 | |
| 130 | except: | 130 | |
| 131 | return 999.0 | 131 |
1def solve(input):2 try:3 data = input.get("data", "") if isinstance(input, dict) else ""4 if not isinstance(data, str):5 data = str(data)67 n = len(data)8 if n == 0:9 return 0.01011 # Bottom-up DP on the original string (character-based, matching scorer).12 # Cost model:13 # - literal char: 114 # - run of same char length L>=2: 215 # - repetition of a smaller pattern repeated k>=2 times: pattern_cost + 116 # - backreference to any earlier substring of length >=2: 217 #18 # We also build a decodable parse tree and verify losslessness.1920 # Longest common prefix table for fast substring equality checks.21 lcp = [[0] * (n + 1) for _ in range(n + 1)]22 for i in range(n - 1, -1, -1):23 row = lcp[i]24 next_row = lcp[i + 1]25 ci = data[i]26 for j in range(n - 1, -1, -1):27 if ci == data[j]:28 row[j] = next_row[j + 1] + 12930 dp = [[0] * (n + 1) for _ in range(n)]31 node = [[None] * (n + 1) for _ in range(n)]3233 for length in range(1, n + 1):34 for l in range(0, n - length + 1):35 r = l + length3637 # Default: all literals38 best = length39 best_node = ("L", data[l:r])4041 # Split into two parts42 for m in range(l + 1, r):43 c = dp[l][m] + dp[m][r]44 if c < best:45 best = c46 best_node = ("C", node[l][m], node[m][r])4748 # Run-length of single repeated char49 if length >= 2:50 all_same = True51 ch = data[l]52 for i in range(l + 1, r):53 if data[i] != ch:54 all_same = False55 break56 if all_same and 2 < best:57 best = 258 best_node = ("S", ch, length)5960 # Repetition of a smaller pattern61 if length >= 2:62 for p in range(1, length // 2 + 1):63 if length % p == 0:64 reps = length // p65 if reps >= 2:66 ok = True67 for start in range(l + p, r, p):68 if lcp[l][start] < p:69 ok = False70 break71 if ok:72 c = dp[l][l + p] + 173 if c < best:74 best = c75 best_node = ("P", reps, node[l][l + p])7677 # Backreference to an earlier identical substring78 if length >= 2 and best > 2:79 found_ref = False80 for prev in range(0, l):81 if lcp[prev][l] >= length:82 best = 283 best_node = ("R", prev, length)84 found_ref = True85 break86 if found_ref:87 dp[l][r] = best88 node[l][r] = best_node89 continue9091 dp[l][r] = best92 node[l][r] = best_node9394 root = node[0][n]9596 def decode(nd, out):97 t = nd[0]98 if t == "L":99 out.append(nd[1])100 return True101 if t == "C":102 return decode(nd[1], out) and decode(nd[2], out)103 if t == "S":104 out.append(nd[1] * nd[2])105 return True106 if t == "P":107 tmp = []108 if not decode(nd[2], tmp):109 return False110 piece = "".join(tmp)111 out.append(piece * nd[1])112 return True113 if t == "R":114 start, ln = nd[1], nd[2]115 current = "".join(out)116 if start < 0 or start + ln > len(current):117 return False118 out.append(current[start:start + ln])119 return True120 return False121122 out = []123 if not decode(root, out):124 return 999.0125 reconstructed = "".join(out)126 if reconstructed != data:127 return 999.0128129 return dp[0][n] / float(n)130 except:131 return 999.01def solve(input):2 data = input.get("data", "")3 if not isinstance(data, str) or not data:4 return 999.056 # Mathematical/analytical approach: Entropy-based redundancy calculation7 8 from collections import Counter9 from math import log21011 def entropy(s):12 probabilities = [freq / len(s) for freq in Counter(s).values()]13 return -sum(p * log2(p) if p > 0 else 0 for p in probabilities)1415 def redundancy(s):16 max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 017 actual_entropy = entropy(s)18 return max_entropy - actual_entropy1920 # Calculate reduction in size possible based on redundancy21 reduction_potential = redundancy(data)2223 # Assuming compression is achieved based on redundancy24 max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data)))25 26 # Qualitative check if max_possible_compression_ratio makes sense27 if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0:28 return 999.02930 # Verify compression is lossless (hypothetical check here)31 # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data32 33 # Returning the hypothetical compression performance34 return max_possible_compression_ratio