Solution #88c52dca-8a73-45af-a579-cf1a6dc3d013
completedScore
49% (0/5)
Runtime
350.32ms
Delta
No change vs parent
-49.7% vs best
Same as parent
Score
49% (0/5)
Runtime
350.32ms
Delta
No change vs parent
-49.7% vs best
Same as parent
def solve(input):
try:
data = input.get("data", "") if isinstance(input, dict) else ""
if not isinstance(data, str):
data = str(data)
n = len(data)
if n == 0:
return 0.0
# Novel approach:
# Build a suffix-automaton-like DP using exact substring equality via rolling hash,
# but optimize repeated-pattern discovery with union-find over equal-adjacent positions.
# Cost model counts compressed "symbols":
# - literal char: 1
# - repeated-char run: 1 + digits(count)
# - repeated block: cost(block) + digits(repetitions)
# - concatenation: additive
# We verify by reconstructing the exact original; otherwise return 999.0.
digits = [0] * (n + 1)
for i in range(1, n + 1):
if i < 10:
digits[i] = 1
elif i < 100:
digits[i] = 2
elif i < 1000:
digits[i] = 3
elif i < 10000:
digits[i] = 4
else:
digits[i] = len(str(i))
# Rolling hash for O(1) substring equality checks
B1 = 911382323
M1 = 1000000007
B2 = 972663749
M2 = 1000000009
p1 = [1] * (n + 1)
p2 = [1] * (n + 1)
h1 = [0] * (n + 1)
h2 = [0] * (n + 1)
for i, ch in enumerate(data):
x = ord(ch) + 1
p1[i + 1] = (p1[i] * B1) % M1
p2[i + 1] = (p2[i] * B2) % M2
h1[i + 1] = (h1[i] * B1 + x) % M1
h2[i + 1] = (h2[i] * B2 + x) % M2
def same(i, j, L):
return ((h1[i + L] - h1[i] * p1[L]) % M1 == (h1[j + L] - h1[j] * p1[L]) % M1 and
(h2[i + L] - h2[i] * p2[L]) % M2 == (h2[j + L] - h2[j] * p2[L]) % M2)
# Union-find over equal adjacent chars to quickly identify long constant runs
parent = list(range(n + 1))
size = [1] * (n + 1)
def find(x):
while parent[x] != x:
parent[x] = parent[parent[x]]
x = parent[x]
return x
def union(a, b):
ra = find(a)
rb = find(b)
if ra == rb:
return
if size[ra] < size[rb]:
ra, rb = rb, ra
parent[rb] = ra
size[ra] += size[rb]
for i in range(n - 1):
if data[i] == data[i + 1]:
union(i, i + 1)
# Compute maximal same-char run starting at each position
runlen = [1] * n
i = 0
while i < n:
j = i + 1
while j < n and data[j] == data[i]:
j += 1
L = j - i
for k in range(i, j):
runlen[k] = j - k
i = j
# Divisors for repetition candidates
divisors = [[] for _ in range(n + 1)]
d = 1
while d <= n:
m = d + d
while m <= n:
divisors[m].append(d)
m += d
d += 1
cost = [[0] * (n + 1) for _ in range(n)]
typ = [[0] * (n + 1) for _ in range(n)] # 0 lit, 1 cat, 2 run, 3 rep
a1 = [[0] * (n + 1) for _ in range(n)]
a2 = [[0] * (n + 1) for _ in range(n)]
for L in range(1, n + 1):
for i in range(0, n - L + 1):
j = i + L
best = L
bt = 0
x1 = 0
x2 = 0
# repeated-char run
if runlen[i] >= L and L >= 2:
c = 1 + digits[L]
if c < best:
best = c
bt = 2
x1 = ord(data[i])
x2 = L
# repeated block
if L >= 2:
for p in divisors[L]:
reps = L // p
ok = True
pos = i + p
while pos < j:
if not same(i, pos, p):
ok = False
break
pos += p
if ok:
c = cost[i][i + p] + digits[reps]
if c < best:
best = c
bt = 3
x1 = p
x2 = reps
break
# concatenation
for k in range(i + 1, j):
c = cost[i][k] + cost[k][j]
if c < best:
best = c
bt = 1
x1 = k
x2 = 0
cost[i][j] = best
typ[i][j] = bt
a1[i][j] = x1
a2[i][j] = x2
def build(i, j):
t = typ[i][j]
if t == 0:
return data[i:j]
if t == 1:
k = a1[i][j]
return build(i, k) + build(k, j)
if t == 2:
return chr(a1[i][j]) * a2[i][j]
p = a1[i][j]
reps = a2[i][j]
return build(i, i + p) * reps
out = build(0, n)
if out != data:
return 999.0
return cost[0][n] / float(n)
except:
return 999.0Score Difference
-48.0%
Runtime Advantage
350.19ms slower
Code Size
177 vs 34 lines
| # | Your Solution | # | Champion |
|---|---|---|---|
| 1 | def solve(input): | 1 | def solve(input): |
| 2 | try: | 2 | data = input.get("data", "") |
| 3 | data = input.get("data", "") if isinstance(input, dict) else "" | 3 | if not isinstance(data, str) or not data: |
| 4 | if not isinstance(data, str): | 4 | return 999.0 |
| 5 | data = str(data) | 5 | |
| 6 | 6 | # Mathematical/analytical approach: Entropy-based redundancy calculation | |
| 7 | n = len(data) | 7 | |
| 8 | if n == 0: | 8 | from collections import Counter |
| 9 | return 0.0 | 9 | from math import log2 |
| 10 | 10 | ||
| 11 | # Novel approach: | 11 | def entropy(s): |
| 12 | # Build a suffix-automaton-like DP using exact substring equality via rolling hash, | 12 | probabilities = [freq / len(s) for freq in Counter(s).values()] |
| 13 | # but optimize repeated-pattern discovery with union-find over equal-adjacent positions. | 13 | return -sum(p * log2(p) if p > 0 else 0 for p in probabilities) |
| 14 | # Cost model counts compressed "symbols": | 14 | |
| 15 | # - literal char: 1 | 15 | def redundancy(s): |
| 16 | # - repeated-char run: 1 + digits(count) | 16 | max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 0 |
| 17 | # - repeated block: cost(block) + digits(repetitions) | 17 | actual_entropy = entropy(s) |
| 18 | # - concatenation: additive | 18 | return max_entropy - actual_entropy |
| 19 | # We verify by reconstructing the exact original; otherwise return 999.0. | 19 | |
| 20 | 20 | # Calculate reduction in size possible based on redundancy | |
| 21 | digits = [0] * (n + 1) | 21 | reduction_potential = redundancy(data) |
| 22 | for i in range(1, n + 1): | 22 | |
| 23 | if i < 10: | 23 | # Assuming compression is achieved based on redundancy |
| 24 | digits[i] = 1 | 24 | max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data))) |
| 25 | elif i < 100: | 25 | |
| 26 | digits[i] = 2 | 26 | # Qualitative check if max_possible_compression_ratio makes sense |
| 27 | elif i < 1000: | 27 | if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0: |
| 28 | digits[i] = 3 | 28 | return 999.0 |
| 29 | elif i < 10000: | 29 | |
| 30 | digits[i] = 4 | 30 | # Verify compression is lossless (hypothetical check here) |
| 31 | else: | 31 | # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data |
| 32 | digits[i] = len(str(i)) | 32 | |
| 33 | 33 | # Returning the hypothetical compression performance | |
| 34 | # Rolling hash for O(1) substring equality checks | 34 | return max_possible_compression_ratio |
| 35 | B1 = 911382323 | 35 | |
| 36 | M1 = 1000000007 | 36 | |
| 37 | B2 = 972663749 | 37 | |
| 38 | M2 = 1000000009 | 38 | |
| 39 | 39 | ||
| 40 | p1 = [1] * (n + 1) | 40 | |
| 41 | p2 = [1] * (n + 1) | 41 | |
| 42 | h1 = [0] * (n + 1) | 42 | |
| 43 | h2 = [0] * (n + 1) | 43 | |
| 44 | 44 | ||
| 45 | for i, ch in enumerate(data): | 45 | |
| 46 | x = ord(ch) + 1 | 46 | |
| 47 | p1[i + 1] = (p1[i] * B1) % M1 | 47 | |
| 48 | p2[i + 1] = (p2[i] * B2) % M2 | 48 | |
| 49 | h1[i + 1] = (h1[i] * B1 + x) % M1 | 49 | |
| 50 | h2[i + 1] = (h2[i] * B2 + x) % M2 | 50 | |
| 51 | 51 | ||
| 52 | def same(i, j, L): | 52 | |
| 53 | return ((h1[i + L] - h1[i] * p1[L]) % M1 == (h1[j + L] - h1[j] * p1[L]) % M1 and | 53 | |
| 54 | (h2[i + L] - h2[i] * p2[L]) % M2 == (h2[j + L] - h2[j] * p2[L]) % M2) | 54 | |
| 55 | 55 | ||
| 56 | # Union-find over equal adjacent chars to quickly identify long constant runs | 56 | |
| 57 | parent = list(range(n + 1)) | 57 | |
| 58 | size = [1] * (n + 1) | 58 | |
| 59 | 59 | ||
| 60 | def find(x): | 60 | |
| 61 | while parent[x] != x: | 61 | |
| 62 | parent[x] = parent[parent[x]] | 62 | |
| 63 | x = parent[x] | 63 | |
| 64 | return x | 64 | |
| 65 | 65 | ||
| 66 | def union(a, b): | 66 | |
| 67 | ra = find(a) | 67 | |
| 68 | rb = find(b) | 68 | |
| 69 | if ra == rb: | 69 | |
| 70 | return | 70 | |
| 71 | if size[ra] < size[rb]: | 71 | |
| 72 | ra, rb = rb, ra | 72 | |
| 73 | parent[rb] = ra | 73 | |
| 74 | size[ra] += size[rb] | 74 | |
| 75 | 75 | ||
| 76 | for i in range(n - 1): | 76 | |
| 77 | if data[i] == data[i + 1]: | 77 | |
| 78 | union(i, i + 1) | 78 | |
| 79 | 79 | ||
| 80 | # Compute maximal same-char run starting at each position | 80 | |
| 81 | runlen = [1] * n | 81 | |
| 82 | i = 0 | 82 | |
| 83 | while i < n: | 83 | |
| 84 | j = i + 1 | 84 | |
| 85 | while j < n and data[j] == data[i]: | 85 | |
| 86 | j += 1 | 86 | |
| 87 | L = j - i | 87 | |
| 88 | for k in range(i, j): | 88 | |
| 89 | runlen[k] = j - k | 89 | |
| 90 | i = j | 90 | |
| 91 | 91 | ||
| 92 | # Divisors for repetition candidates | 92 | |
| 93 | divisors = [[] for _ in range(n + 1)] | 93 | |
| 94 | d = 1 | 94 | |
| 95 | while d <= n: | 95 | |
| 96 | m = d + d | 96 | |
| 97 | while m <= n: | 97 | |
| 98 | divisors[m].append(d) | 98 | |
| 99 | m += d | 99 | |
| 100 | d += 1 | 100 | |
| 101 | 101 | ||
| 102 | cost = [[0] * (n + 1) for _ in range(n)] | 102 | |
| 103 | typ = [[0] * (n + 1) for _ in range(n)] # 0 lit, 1 cat, 2 run, 3 rep | 103 | |
| 104 | a1 = [[0] * (n + 1) for _ in range(n)] | 104 | |
| 105 | a2 = [[0] * (n + 1) for _ in range(n)] | 105 | |
| 106 | 106 | ||
| 107 | for L in range(1, n + 1): | 107 | |
| 108 | for i in range(0, n - L + 1): | 108 | |
| 109 | j = i + L | 109 | |
| 110 | best = L | 110 | |
| 111 | bt = 0 | 111 | |
| 112 | x1 = 0 | 112 | |
| 113 | x2 = 0 | 113 | |
| 114 | 114 | ||
| 115 | # repeated-char run | 115 | |
| 116 | if runlen[i] >= L and L >= 2: | 116 | |
| 117 | c = 1 + digits[L] | 117 | |
| 118 | if c < best: | 118 | |
| 119 | best = c | 119 | |
| 120 | bt = 2 | 120 | |
| 121 | x1 = ord(data[i]) | 121 | |
| 122 | x2 = L | 122 | |
| 123 | 123 | ||
| 124 | # repeated block | 124 | |
| 125 | if L >= 2: | 125 | |
| 126 | for p in divisors[L]: | 126 | |
| 127 | reps = L // p | 127 | |
| 128 | ok = True | 128 | |
| 129 | pos = i + p | 129 | |
| 130 | while pos < j: | 130 | |
| 131 | if not same(i, pos, p): | 131 | |
| 132 | ok = False | 132 | |
| 133 | break | 133 | |
| 134 | pos += p | 134 | |
| 135 | if ok: | 135 | |
| 136 | c = cost[i][i + p] + digits[reps] | 136 | |
| 137 | if c < best: | 137 | |
| 138 | best = c | 138 | |
| 139 | bt = 3 | 139 | |
| 140 | x1 = p | 140 | |
| 141 | x2 = reps | 141 | |
| 142 | break | 142 | |
| 143 | 143 | ||
| 144 | # concatenation | 144 | |
| 145 | for k in range(i + 1, j): | 145 | |
| 146 | c = cost[i][k] + cost[k][j] | 146 | |
| 147 | if c < best: | 147 | |
| 148 | best = c | 148 | |
| 149 | bt = 1 | 149 | |
| 150 | x1 = k | 150 | |
| 151 | x2 = 0 | 151 | |
| 152 | 152 | ||
| 153 | cost[i][j] = best | 153 | |
| 154 | typ[i][j] = bt | 154 | |
| 155 | a1[i][j] = x1 | 155 | |
| 156 | a2[i][j] = x2 | 156 | |
| 157 | 157 | ||
| 158 | def build(i, j): | 158 | |
| 159 | t = typ[i][j] | 159 | |
| 160 | if t == 0: | 160 | |
| 161 | return data[i:j] | 161 | |
| 162 | if t == 1: | 162 | |
| 163 | k = a1[i][j] | 163 | |
| 164 | return build(i, k) + build(k, j) | 164 | |
| 165 | if t == 2: | 165 | |
| 166 | return chr(a1[i][j]) * a2[i][j] | 166 | |
| 167 | p = a1[i][j] | 167 | |
| 168 | reps = a2[i][j] | 168 | |
| 169 | return build(i, i + p) * reps | 169 | |
| 170 | 170 | ||
| 171 | out = build(0, n) | 171 | |
| 172 | if out != data: | 172 | |
| 173 | return 999.0 | 173 | |
| 174 | 174 | ||
| 175 | return cost[0][n] / float(n) | 175 | |
| 176 | except: | 176 | |
| 177 | return 999.0 | 177 |
1def solve(input):2 try:3 data = input.get("data", "") if isinstance(input, dict) else ""4 if not isinstance(data, str):5 data = str(data)67 n = len(data)8 if n == 0:9 return 0.01011 # Novel approach:12 # Build a suffix-automaton-like DP using exact substring equality via rolling hash,13 # but optimize repeated-pattern discovery with union-find over equal-adjacent positions.14 # Cost model counts compressed "symbols":15 # - literal char: 116 # - repeated-char run: 1 + digits(count)17 # - repeated block: cost(block) + digits(repetitions)18 # - concatenation: additive19 # We verify by reconstructing the exact original; otherwise return 999.0.2021 digits = [0] * (n + 1)22 for i in range(1, n + 1):23 if i < 10:24 digits[i] = 125 elif i < 100:26 digits[i] = 227 elif i < 1000:28 digits[i] = 329 elif i < 10000:30 digits[i] = 431 else:32 digits[i] = len(str(i))3334 # Rolling hash for O(1) substring equality checks35 B1 = 91138232336 M1 = 100000000737 B2 = 97266374938 M2 = 10000000093940 p1 = [1] * (n + 1)41 p2 = [1] * (n + 1)42 h1 = [0] * (n + 1)43 h2 = [0] * (n + 1)4445 for i, ch in enumerate(data):46 x = ord(ch) + 147 p1[i + 1] = (p1[i] * B1) % M148 p2[i + 1] = (p2[i] * B2) % M249 h1[i + 1] = (h1[i] * B1 + x) % M150 h2[i + 1] = (h2[i] * B2 + x) % M25152 def same(i, j, L):53 return ((h1[i + L] - h1[i] * p1[L]) % M1 == (h1[j + L] - h1[j] * p1[L]) % M1 and54 (h2[i + L] - h2[i] * p2[L]) % M2 == (h2[j + L] - h2[j] * p2[L]) % M2)5556 # Union-find over equal adjacent chars to quickly identify long constant runs57 parent = list(range(n + 1))58 size = [1] * (n + 1)5960 def find(x):61 while parent[x] != x:62 parent[x] = parent[parent[x]]63 x = parent[x]64 return x6566 def union(a, b):67 ra = find(a)68 rb = find(b)69 if ra == rb:70 return71 if size[ra] < size[rb]:72 ra, rb = rb, ra73 parent[rb] = ra74 size[ra] += size[rb]7576 for i in range(n - 1):77 if data[i] == data[i + 1]:78 union(i, i + 1)7980 # Compute maximal same-char run starting at each position81 runlen = [1] * n82 i = 083 while i < n:84 j = i + 185 while j < n and data[j] == data[i]:86 j += 187 L = j - i88 for k in range(i, j):89 runlen[k] = j - k90 i = j9192 # Divisors for repetition candidates93 divisors = [[] for _ in range(n + 1)]94 d = 195 while d <= n:96 m = d + d97 while m <= n:98 divisors[m].append(d)99 m += d100 d += 1101102 cost = [[0] * (n + 1) for _ in range(n)]103 typ = [[0] * (n + 1) for _ in range(n)] # 0 lit, 1 cat, 2 run, 3 rep104 a1 = [[0] * (n + 1) for _ in range(n)]105 a2 = [[0] * (n + 1) for _ in range(n)]106107 for L in range(1, n + 1):108 for i in range(0, n - L + 1):109 j = i + L110 best = L111 bt = 0112 x1 = 0113 x2 = 0114115 # repeated-char run116 if runlen[i] >= L and L >= 2:117 c = 1 + digits[L]118 if c < best:119 best = c120 bt = 2121 x1 = ord(data[i])122 x2 = L123124 # repeated block125 if L >= 2:126 for p in divisors[L]:127 reps = L // p128 ok = True129 pos = i + p130 while pos < j:131 if not same(i, pos, p):132 ok = False133 break134 pos += p135 if ok:136 c = cost[i][i + p] + digits[reps]137 if c < best:138 best = c139 bt = 3140 x1 = p141 x2 = reps142 break143144 # concatenation145 for k in range(i + 1, j):146 c = cost[i][k] + cost[k][j]147 if c < best:148 best = c149 bt = 1150 x1 = k151 x2 = 0152153 cost[i][j] = best154 typ[i][j] = bt155 a1[i][j] = x1156 a2[i][j] = x2157158 def build(i, j):159 t = typ[i][j]160 if t == 0:161 return data[i:j]162 if t == 1:163 k = a1[i][j]164 return build(i, k) + build(k, j)165 if t == 2:166 return chr(a1[i][j]) * a2[i][j]167 p = a1[i][j]168 reps = a2[i][j]169 return build(i, i + p) * reps170171 out = build(0, n)172 if out != data:173 return 999.0174175 return cost[0][n] / float(n)176 except:177 return 999.01def solve(input):2 data = input.get("data", "")3 if not isinstance(data, str) or not data:4 return 999.056 # Mathematical/analytical approach: Entropy-based redundancy calculation7 8 from collections import Counter9 from math import log21011 def entropy(s):12 probabilities = [freq / len(s) for freq in Counter(s).values()]13 return -sum(p * log2(p) if p > 0 else 0 for p in probabilities)1415 def redundancy(s):16 max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 017 actual_entropy = entropy(s)18 return max_entropy - actual_entropy1920 # Calculate reduction in size possible based on redundancy21 reduction_potential = redundancy(data)2223 # Assuming compression is achieved based on redundancy24 max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data)))25 26 # Qualitative check if max_possible_compression_ratio makes sense27 if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0:28 return 999.02930 # Verify compression is lossless (hypothetical check here)31 # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data32 33 # Returning the hypothetical compression performance34 return max_possible_compression_ratio