Solution #99326a14-b924-43d8-9a28-ae8a8f6ffe60

completed

Score

32% (0/5)

Runtime

2.12ms

Delta

+71.0% vs parent

-66.7% vs best

Improved from parent

Solution Lineage

Current32%Improved from parent
d8629f4919%Regression from parent
0deb287347%Improved from parent
e4b007c347%Improved from parent
32b7128c43%Regression from parent
f209f80655%Improved from parent
9161b31714%Regression from parent
9ab0f66324%Improved from parent
110fbd0b0%Regression from parent
e3d01a5c52%Improved from parent
c6fc252643%Regression from parent
23b4491152%Improved from parent
03aea6db43%Regression from parent
5f1a15ce53%Improved from parent
f22b171153%Same as parent
7b6d9f0953%Improved from parent
0401f74f12%Regression from parent
b96fbcb340%Improved from parent
84cc9d0420%First in chain

Code

def solve(input):
    try:
        data = input.get("data", "")
        if not isinstance(data, str):
            data = str(data)

        n = len(data)
        if n == 0:
            return 0.0

        # Novel approach:
        # Evaluate several robust, exactly-decodable text compressors and
        # return the best verified ratio. This avoids overfitting to one scheme.
        #
        # Schemes:
        # 1) Pure literal
        # 2) RLE with escape
        # 3) BPE-style dictionary substitution using rare private-use chars
        # 4) Queue/stack inspired macro system: repeated substring macros + escapes
        #
        # Ratio is measured in "encoded character count" / original character count.

        used = set(data)

        def verify(encoded, decoder):
            try:
                dec = decoder(encoded)
                return dec == data
            except:
                return False

        best = float(n)  # encoded size

        # 1) Pure literal with one-char header
        # format: 'L' + data
        enc1 = "L" + data
        def dec1(s):
            if not s or s[0] != "L":
                return None
            return s[1:]
        if verify(enc1, dec1):
            best = min(best, len(enc1))

        # choose escape chars from private use area if possible
        esc = None
        for cp in range(0xE000, 0xF8FF + 1):
            ch = chr(cp)
            if ch not in used:
                esc = ch
                break

        # 2) RLE with escape encoding
        # raw char c is emitted as c unless c==esc, then esc esc
        # run len>=4 emitted as esc 'R' <count digits> esc <char>
        if esc is not None:
            def enc_rle(s):
                out = []
                i = 0
                m = len(s)
                while i < m:
                    j = i + 1
                    while j < m and s[j] == s[i]:
                        j += 1
                    run = j - i
                    ch = s[i]
                    if run >= 4:
                        out.append(esc)
                        out.append("R")
                        out.append(str(run))
                        out.append(esc)
                        out.append(ch)
                    else:
                        for _ in range(run):
                            if ch == esc:
                                out.append(esc)
                                out.append(esc)
                            else:
                                out.append(ch)
                    i = j
                return "".join(out)

            def dec_rle(s):
                out = []
                i = 0
                m = len(s)
                while i < m:
                    ch = s[i]
                    if ch != esc:
                        out.append(ch)
                        i += 1
                    else:
                        if i + 1 >= m:
                            return None
                        t = s[i + 1]
                        if t == esc:
                            out.append(esc)
                            i += 2
                        elif t == "R":
                            k = i + 2
                            if k >= m:
                                return None
                            start = k
                            while k < m and s[k] != esc:
                                if s[k] < "0" or s[k] > "9":
                                    return None
                                k += 1
                            if k == start or k + 1 >= m:
                                return None
                            cnt = int(s[start:k])
                            out.extend(s[k + 1] for _ in range(cnt))
                            i = k + 2
                        else:
                            return None
                return "".join(out)

            enc2 = enc_rle(data)
            if verify(enc2, dec_rle):
                best = min(best, len(enc2))

        # 3) BPE-style iterative pair substitution
        # Header:
        #   M <sep> k <sep> [tok pair tok sep]* encoded_body
        # token chars are fresh private-use chars not in input.
        fresh = []
        for cp in range(0xE000, 0xF8FF + 1):
            ch = chr(cp)
            if ch not in used:
                fresh.append(ch)
                if len(fresh) >= 64:
                    break

        if len(fresh) >= 3:
            sep = fresh[0]
            available = fresh[1:]

            def bpe_encode(s):
                rules = []
                cur = s
                avail_idx = 0
                # iterative best-pair replacement
                for _ in range(min(32, len(available))):
                    freq = {}
                    for i in range(len(cur) - 1):
                        p = cur[i:i+2]
                        freq[p] = freq.get(p, 0) + 1
                    best_pair = None
                    best_count = 0
                    for p, c in freq.items():
                        if c > best_count:
                            best_pair = p
                            best_count = c
                    if best_pair is None or best_count < 3 or avail_idx >= len(available):
                        break
                    tok = available[avail_idx]
                    avail_idx += 1

                    new_cur_parts = []
                    i = 0
                    changed = 0
                    while i < len(cur):
                        if i + 1 < len(cur) and cur[i:i+2] == best_pair:
                            new_cur_parts.append(tok)
                            i += 2
                            changed += 1
                        else:
                            new_cur_parts.append(cur[i])
                            i += 1
                    new_cur = "".join(new_cur_parts)

                    # only keep if worthwhile after rule cost
                    # rule storage cost about 4 chars: tok + pair + sep
                    if len(new_cur) + len(rules) * 4 + 4 >= len(cur) + max(0, len(rules)-1) * 4 + 4:
                        break

                    rules.append((tok, best_pair))
                    cur = new_cur

                header = ["M", sep, str(len(rules)), sep]
                for tok, pair in rules:
                    header.append(tok)
                    header.append(pair)
                    header.append(sep)
                return "".join(header) + cur, rules, cur

            def bpe_decode(s):
                if not s or s[0] != "M":
                    return None
                if len(s) < 3:
                    return None
                loc_sep = s[1]
                i = 2
                j = i
                while j < len(s) and s[j] != loc_sep:
                    if s[j] < "0" or s[j] > "9":
                        return None
                    j += 1
                if j == i or j >= len(s):
                    return None
                k = int(s[i:j])
                i = j + 1
                rules = []
                for _ in range(k):
                    if i + 3 > len(s):
                        return None
                    tok = s[i]
                    pair = s[i + 1:i + 3]
                    i += 3
                    if i > len(s) or i == len(s) or s[i] != loc_sep:
                        return None
                    i += 1
                    rules.append((tok, pair))
                body = s[i:]
                cur = body
                for tok, pair in reversed(rules):
                    parts = []
                    for ch in cur:
                        if ch == tok:
                            parts.append(pair)
                        else:
                            parts.append(ch)
                    cur = "".join(parts)
                return cur

            enc3, _, _ = bpe_encode(data)
            if verify(enc3, bpe_decode):
                best = min(best, len(enc3))

        # 4) Macro compression using repeated substrings and a stack/queue style expansion.
        # Build up to a few macros for profitable repeated substrings.
        if len(fresh) >= 8:
            sep = fresh[0]
            cmd = fresh[1]
            tokens = fresh[2:8]

            def macro_encode(s):
                # Find profitable repeated substrings of length 3..8
                best_subs = []
                for L in range(3, min(9, len(s) + 1)):
                    count = {}
                    for i in range(len(s) - L + 1):
                        sub = s[i:i+L]
                        count[sub] = count.get(sub, 0) + 1
                    cand = []
                    for sub, c in count.items():
                        gain = c * (L - 1) - (L + 3)  # rough net gain with rule cost
                        if c >= 3 and gain > 0:
                            cand.append((gain, c, sub))
                    cand.sort(reverse=True)
                    for item in cand[:2]:
                        best_subs.append(item)

                best_subs.sort(reverse=True)
                macros = []
                used_tok = 0
                cur = s

                for _, _, sub in best_subs:
                    if used_tok >= len(tokens):
                        break
                    if sub not in cur:
                        continue
                    tok = tokens[used_tok]
                    used_tok += 1

                    parts = []
                    i = 0
                    reps = 0
                    L = len(sub)
                    while i < len(cur):
                        if i + L <= len(cur) and cur[i:i+L] == sub:
                            parts.append(tok)
                            i += L
                            reps += 1
                        else:
                            parts.append(cur[i])
                            i += 1
                    if reps < 3:
                        used_tok -= 1
                        continue
                    new_cur = "".join(parts)
                    if len(new_cur) + len(sub) + 3 >= len(cur):
                        used_tok -= 1
                        continue
                    macros.append((tok, sub))
                    cur = new_cur

                # escape cmd/sep/tokens if they somehow appear in cur (they shouldn't)
                header = ["Q", sep, str(len(macros)), sep]
                for tok, sub in macros:
                    header.append(tok)
                    header.append(sub)
                    header.append(sep)
                return "".join(header) + cur, macros, cur

            def macro_decode(s):
                if not s or s[0] != "Q":
                    return None
                if len(s) < 3:
                    return None
                loc_sep = s[1]
                i = 2
                j = i
                while j < len(s) and s[j] != loc_sep:
                    if s[j] < "0" or s[j] > "9":
                        return None
                    j += 1
                if j == i or j >= len(s):
                    return None
                k = int(s[i:j])
                i = j + 1
                macros = []
                for _ in range(k):
                    if i >= len(s):
                        return None
                    tok = s[i]
                    i += 1
                    j = i
                    while j < len(s) and s[j] != loc_sep:
                        j += 1
                    if j >= len(s):
                        return None
                    sub = s[i:j]
                    macros.append((tok, sub))
                    i = j + 1
                cur = s[i:]
                # stack-like repeated expansion in reverse declaration order
                for tok, sub in reversed(macros):
                    parts = []
                    for ch in cur:
                        if ch == tok:
                            parts.append(sub)
                        else:
                            parts.append(ch)
                    cur = "".join(parts)
                return cur

            enc4, _, _ = macro_encode(data)
            if verify(enc4, macro_decode):
                best = min(best, len(enc4))

        ratio = best / n
        if ratio < 0:
            return 999.0
        return ratio
    except:
        return 999.0

Compare with Champion

Score Difference

-64.5%

Runtime Advantage

1.99ms slower

Code Size

346 vs 34 lines

#Your Solution#Champion
1def solve(input):1def solve(input):
2 try:2 data = input.get("data", "")
3 data = input.get("data", "")3 if not isinstance(data, str) or not data:
4 if not isinstance(data, str):4 return 999.0
5 data = str(data)5
66 # Mathematical/analytical approach: Entropy-based redundancy calculation
7 n = len(data)7
8 if n == 0:8 from collections import Counter
9 return 0.09 from math import log2
1010
11 # Novel approach:11 def entropy(s):
12 # Evaluate several robust, exactly-decodable text compressors and12 probabilities = [freq / len(s) for freq in Counter(s).values()]
13 # return the best verified ratio. This avoids overfitting to one scheme.13 return -sum(p * log2(p) if p > 0 else 0 for p in probabilities)
14 #14
15 # Schemes:15 def redundancy(s):
16 # 1) Pure literal16 max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 0
17 # 2) RLE with escape17 actual_entropy = entropy(s)
18 # 3) BPE-style dictionary substitution using rare private-use chars18 return max_entropy - actual_entropy
19 # 4) Queue/stack inspired macro system: repeated substring macros + escapes19
20 #20 # Calculate reduction in size possible based on redundancy
21 # Ratio is measured in "encoded character count" / original character count.21 reduction_potential = redundancy(data)
2222
23 used = set(data)23 # Assuming compression is achieved based on redundancy
2424 max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data)))
25 def verify(encoded, decoder):25
26 try:26 # Qualitative check if max_possible_compression_ratio makes sense
27 dec = decoder(encoded)27 if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0:
28 return dec == data28 return 999.0
29 except:29
30 return False30 # Verify compression is lossless (hypothetical check here)
3131 # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data
32 best = float(n) # encoded size32
3333 # Returning the hypothetical compression performance
34 # 1) Pure literal with one-char header34 return max_possible_compression_ratio
35 # format: 'L' + data35
36 enc1 = "L" + data36
37 def dec1(s):37
38 if not s or s[0] != "L":38
39 return None39
40 return s[1:]40
41 if verify(enc1, dec1):41
42 best = min(best, len(enc1))42
4343
44 # choose escape chars from private use area if possible44
45 esc = None45
46 for cp in range(0xE000, 0xF8FF + 1):46
47 ch = chr(cp)47
48 if ch not in used:48
49 esc = ch49
50 break50
5151
52 # 2) RLE with escape encoding52
53 # raw char c is emitted as c unless c==esc, then esc esc53
54 # run len>=4 emitted as esc 'R' <count digits> esc <char>54
55 if esc is not None:55
56 def enc_rle(s):56
57 out = []57
58 i = 058
59 m = len(s)59
60 while i < m:60
61 j = i + 161
62 while j < m and s[j] == s[i]:62
63 j += 163
64 run = j - i64
65 ch = s[i]65
66 if run >= 4:66
67 out.append(esc)67
68 out.append("R")68
69 out.append(str(run))69
70 out.append(esc)70
71 out.append(ch)71
72 else:72
73 for _ in range(run):73
74 if ch == esc:74
75 out.append(esc)75
76 out.append(esc)76
77 else:77
78 out.append(ch)78
79 i = j79
80 return "".join(out)80
8181
82 def dec_rle(s):82
83 out = []83
84 i = 084
85 m = len(s)85
86 while i < m:86
87 ch = s[i]87
88 if ch != esc:88
89 out.append(ch)89
90 i += 190
91 else:91
92 if i + 1 >= m:92
93 return None93
94 t = s[i + 1]94
95 if t == esc:95
96 out.append(esc)96
97 i += 297
98 elif t == "R":98
99 k = i + 299
100 if k >= m:100
101 return None101
102 start = k102
103 while k < m and s[k] != esc:103
104 if s[k] < "0" or s[k] > "9":104
105 return None105
106 k += 1106
107 if k == start or k + 1 >= m:107
108 return None108
109 cnt = int(s[start:k])109
110 out.extend(s[k + 1] for _ in range(cnt))110
111 i = k + 2111
112 else:112
113 return None113
114 return "".join(out)114
115115
116 enc2 = enc_rle(data)116
117 if verify(enc2, dec_rle):117
118 best = min(best, len(enc2))118
119119
120 # 3) BPE-style iterative pair substitution120
121 # Header:121
122 # M <sep> k <sep> [tok pair tok sep]* encoded_body122
123 # token chars are fresh private-use chars not in input.123
124 fresh = []124
125 for cp in range(0xE000, 0xF8FF + 1):125
126 ch = chr(cp)126
127 if ch not in used:127
128 fresh.append(ch)128
129 if len(fresh) >= 64:129
130 break130
131131
132 if len(fresh) >= 3:132
133 sep = fresh[0]133
134 available = fresh[1:]134
135135
136 def bpe_encode(s):136
137 rules = []137
138 cur = s138
139 avail_idx = 0139
140 # iterative best-pair replacement140
141 for _ in range(min(32, len(available))):141
142 freq = {}142
143 for i in range(len(cur) - 1):143
144 p = cur[i:i+2]144
145 freq[p] = freq.get(p, 0) + 1145
146 best_pair = None146
147 best_count = 0147
148 for p, c in freq.items():148
149 if c > best_count:149
150 best_pair = p150
151 best_count = c151
152 if best_pair is None or best_count < 3 or avail_idx >= len(available):152
153 break153
154 tok = available[avail_idx]154
155 avail_idx += 1155
156156
157 new_cur_parts = []157
158 i = 0158
159 changed = 0159
160 while i < len(cur):160
161 if i + 1 < len(cur) and cur[i:i+2] == best_pair:161
162 new_cur_parts.append(tok)162
163 i += 2163
164 changed += 1164
165 else:165
166 new_cur_parts.append(cur[i])166
167 i += 1167
168 new_cur = "".join(new_cur_parts)168
169169
170 # only keep if worthwhile after rule cost170
171 # rule storage cost about 4 chars: tok + pair + sep171
172 if len(new_cur) + len(rules) * 4 + 4 >= len(cur) + max(0, len(rules)-1) * 4 + 4:172
173 break173
174174
175 rules.append((tok, best_pair))175
176 cur = new_cur176
177177
178 header = ["M", sep, str(len(rules)), sep]178
179 for tok, pair in rules:179
180 header.append(tok)180
181 header.append(pair)181
182 header.append(sep)182
183 return "".join(header) + cur, rules, cur183
184184
185 def bpe_decode(s):185
186 if not s or s[0] != "M":186
187 return None187
188 if len(s) < 3:188
189 return None189
190 loc_sep = s[1]190
191 i = 2191
192 j = i192
193 while j < len(s) and s[j] != loc_sep:193
194 if s[j] < "0" or s[j] > "9":194
195 return None195
196 j += 1196
197 if j == i or j >= len(s):197
198 return None198
199 k = int(s[i:j])199
200 i = j + 1200
201 rules = []201
202 for _ in range(k):202
203 if i + 3 > len(s):203
204 return None204
205 tok = s[i]205
206 pair = s[i + 1:i + 3]206
207 i += 3207
208 if i > len(s) or i == len(s) or s[i] != loc_sep:208
209 return None209
210 i += 1210
211 rules.append((tok, pair))211
212 body = s[i:]212
213 cur = body213
214 for tok, pair in reversed(rules):214
215 parts = []215
216 for ch in cur:216
217 if ch == tok:217
218 parts.append(pair)218
219 else:219
220 parts.append(ch)220
221 cur = "".join(parts)221
222 return cur222
223223
224 enc3, _, _ = bpe_encode(data)224
225 if verify(enc3, bpe_decode):225
226 best = min(best, len(enc3))226
227227
228 # 4) Macro compression using repeated substrings and a stack/queue style expansion.228
229 # Build up to a few macros for profitable repeated substrings.229
230 if len(fresh) >= 8:230
231 sep = fresh[0]231
232 cmd = fresh[1]232
233 tokens = fresh[2:8]233
234234
235 def macro_encode(s):235
236 # Find profitable repeated substrings of length 3..8236
237 best_subs = []237
238 for L in range(3, min(9, len(s) + 1)):238
239 count = {}239
240 for i in range(len(s) - L + 1):240
241 sub = s[i:i+L]241
242 count[sub] = count.get(sub, 0) + 1242
243 cand = []243
244 for sub, c in count.items():244
245 gain = c * (L - 1) - (L + 3) # rough net gain with rule cost245
246 if c >= 3 and gain > 0:246
247 cand.append((gain, c, sub))247
248 cand.sort(reverse=True)248
249 for item in cand[:2]:249
250 best_subs.append(item)250
251251
252 best_subs.sort(reverse=True)252
253 macros = []253
254 used_tok = 0254
255 cur = s255
256256
257 for _, _, sub in best_subs:257
258 if used_tok >= len(tokens):258
259 break259
260 if sub not in cur:260
261 continue261
262 tok = tokens[used_tok]262
263 used_tok += 1263
264264
265 parts = []265
266 i = 0266
267 reps = 0267
268 L = len(sub)268
269 while i < len(cur):269
270 if i + L <= len(cur) and cur[i:i+L] == sub:270
271 parts.append(tok)271
272 i += L272
273 reps += 1273
274 else:274
275 parts.append(cur[i])275
276 i += 1276
277 if reps < 3:277
278 used_tok -= 1278
279 continue279
280 new_cur = "".join(parts)280
281 if len(new_cur) + len(sub) + 3 >= len(cur):281
282 used_tok -= 1282
283 continue283
284 macros.append((tok, sub))284
285 cur = new_cur285
286286
287 # escape cmd/sep/tokens if they somehow appear in cur (they shouldn't)287
288 header = ["Q", sep, str(len(macros)), sep]288
289 for tok, sub in macros:289
290 header.append(tok)290
291 header.append(sub)291
292 header.append(sep)292
293 return "".join(header) + cur, macros, cur293
294294
295 def macro_decode(s):295
296 if not s or s[0] != "Q":296
297 return None297
298 if len(s) < 3:298
299 return None299
300 loc_sep = s[1]300
301 i = 2301
302 j = i302
303 while j < len(s) and s[j] != loc_sep:303
304 if s[j] < "0" or s[j] > "9":304
305 return None305
306 j += 1306
307 if j == i or j >= len(s):307
308 return None308
309 k = int(s[i:j])309
310 i = j + 1310
311 macros = []311
312 for _ in range(k):312
313 if i >= len(s):313
314 return None314
315 tok = s[i]315
316 i += 1316
317 j = i317
318 while j < len(s) and s[j] != loc_sep:318
319 j += 1319
320 if j >= len(s):320
321 return None321
322 sub = s[i:j]322
323 macros.append((tok, sub))323
324 i = j + 1324
325 cur = s[i:]325
326 # stack-like repeated expansion in reverse declaration order326
327 for tok, sub in reversed(macros):327
328 parts = []328
329 for ch in cur:329
330 if ch == tok:330
331 parts.append(sub)331
332 else:332
333 parts.append(ch)333
334 cur = "".join(parts)334
335 return cur335
336336
337 enc4, _, _ = macro_encode(data)337
338 if verify(enc4, macro_decode):338
339 best = min(best, len(enc4))339
340340
341 ratio = best / n341
342 if ratio < 0:342
343 return 999.0343
344 return ratio344
345 except:345
346 return 999.0346
Your Solution
32% (0/5)2.12ms
1def solve(input):
2 try:
3 data = input.get("data", "")
4 if not isinstance(data, str):
5 data = str(data)
6
7 n = len(data)
8 if n == 0:
9 return 0.0
10
11 # Novel approach:
12 # Evaluate several robust, exactly-decodable text compressors and
13 # return the best verified ratio. This avoids overfitting to one scheme.
14 #
15 # Schemes:
16 # 1) Pure literal
17 # 2) RLE with escape
18 # 3) BPE-style dictionary substitution using rare private-use chars
19 # 4) Queue/stack inspired macro system: repeated substring macros + escapes
20 #
21 # Ratio is measured in "encoded character count" / original character count.
22
23 used = set(data)
24
25 def verify(encoded, decoder):
26 try:
27 dec = decoder(encoded)
28 return dec == data
29 except:
30 return False
31
32 best = float(n) # encoded size
33
34 # 1) Pure literal with one-char header
35 # format: 'L' + data
36 enc1 = "L" + data
37 def dec1(s):
38 if not s or s[0] != "L":
39 return None
40 return s[1:]
41 if verify(enc1, dec1):
42 best = min(best, len(enc1))
43
44 # choose escape chars from private use area if possible
45 esc = None
46 for cp in range(0xE000, 0xF8FF + 1):
47 ch = chr(cp)
48 if ch not in used:
49 esc = ch
50 break
51
52 # 2) RLE with escape encoding
53 # raw char c is emitted as c unless c==esc, then esc esc
54 # run len>=4 emitted as esc 'R' <count digits> esc <char>
55 if esc is not None:
56 def enc_rle(s):
57 out = []
58 i = 0
59 m = len(s)
60 while i < m:
61 j = i + 1
62 while j < m and s[j] == s[i]:
63 j += 1
64 run = j - i
65 ch = s[i]
66 if run >= 4:
67 out.append(esc)
68 out.append("R")
69 out.append(str(run))
70 out.append(esc)
71 out.append(ch)
72 else:
73 for _ in range(run):
74 if ch == esc:
75 out.append(esc)
76 out.append(esc)
77 else:
78 out.append(ch)
79 i = j
80 return "".join(out)
81
82 def dec_rle(s):
83 out = []
84 i = 0
85 m = len(s)
86 while i < m:
87 ch = s[i]
88 if ch != esc:
89 out.append(ch)
90 i += 1
91 else:
92 if i + 1 >= m:
93 return None
94 t = s[i + 1]
95 if t == esc:
96 out.append(esc)
97 i += 2
98 elif t == "R":
99 k = i + 2
100 if k >= m:
101 return None
102 start = k
103 while k < m and s[k] != esc:
104 if s[k] < "0" or s[k] > "9":
105 return None
106 k += 1
107 if k == start or k + 1 >= m:
108 return None
109 cnt = int(s[start:k])
110 out.extend(s[k + 1] for _ in range(cnt))
111 i = k + 2
112 else:
113 return None
114 return "".join(out)
115
116 enc2 = enc_rle(data)
117 if verify(enc2, dec_rle):
118 best = min(best, len(enc2))
119
120 # 3) BPE-style iterative pair substitution
121 # Header:
122 # M <sep> k <sep> [tok pair tok sep]* encoded_body
123 # token chars are fresh private-use chars not in input.
124 fresh = []
125 for cp in range(0xE000, 0xF8FF + 1):
126 ch = chr(cp)
127 if ch not in used:
128 fresh.append(ch)
129 if len(fresh) >= 64:
130 break
131
132 if len(fresh) >= 3:
133 sep = fresh[0]
134 available = fresh[1:]
135
136 def bpe_encode(s):
137 rules = []
138 cur = s
139 avail_idx = 0
140 # iterative best-pair replacement
141 for _ in range(min(32, len(available))):
142 freq = {}
143 for i in range(len(cur) - 1):
144 p = cur[i:i+2]
145 freq[p] = freq.get(p, 0) + 1
146 best_pair = None
147 best_count = 0
148 for p, c in freq.items():
149 if c > best_count:
150 best_pair = p
151 best_count = c
152 if best_pair is None or best_count < 3 or avail_idx >= len(available):
153 break
154 tok = available[avail_idx]
155 avail_idx += 1
156
157 new_cur_parts = []
158 i = 0
159 changed = 0
160 while i < len(cur):
161 if i + 1 < len(cur) and cur[i:i+2] == best_pair:
162 new_cur_parts.append(tok)
163 i += 2
164 changed += 1
165 else:
166 new_cur_parts.append(cur[i])
167 i += 1
168 new_cur = "".join(new_cur_parts)
169
170 # only keep if worthwhile after rule cost
171 # rule storage cost about 4 chars: tok + pair + sep
172 if len(new_cur) + len(rules) * 4 + 4 >= len(cur) + max(0, len(rules)-1) * 4 + 4:
173 break
174
175 rules.append((tok, best_pair))
176 cur = new_cur
177
178 header = ["M", sep, str(len(rules)), sep]
179 for tok, pair in rules:
180 header.append(tok)
181 header.append(pair)
182 header.append(sep)
183 return "".join(header) + cur, rules, cur
184
185 def bpe_decode(s):
186 if not s or s[0] != "M":
187 return None
188 if len(s) < 3:
189 return None
190 loc_sep = s[1]
191 i = 2
192 j = i
193 while j < len(s) and s[j] != loc_sep:
194 if s[j] < "0" or s[j] > "9":
195 return None
196 j += 1
197 if j == i or j >= len(s):
198 return None
199 k = int(s[i:j])
200 i = j + 1
201 rules = []
202 for _ in range(k):
203 if i + 3 > len(s):
204 return None
205 tok = s[i]
206 pair = s[i + 1:i + 3]
207 i += 3
208 if i > len(s) or i == len(s) or s[i] != loc_sep:
209 return None
210 i += 1
211 rules.append((tok, pair))
212 body = s[i:]
213 cur = body
214 for tok, pair in reversed(rules):
215 parts = []
216 for ch in cur:
217 if ch == tok:
218 parts.append(pair)
219 else:
220 parts.append(ch)
221 cur = "".join(parts)
222 return cur
223
224 enc3, _, _ = bpe_encode(data)
225 if verify(enc3, bpe_decode):
226 best = min(best, len(enc3))
227
228 # 4) Macro compression using repeated substrings and a stack/queue style expansion.
229 # Build up to a few macros for profitable repeated substrings.
230 if len(fresh) >= 8:
231 sep = fresh[0]
232 cmd = fresh[1]
233 tokens = fresh[2:8]
234
235 def macro_encode(s):
236 # Find profitable repeated substrings of length 3..8
237 best_subs = []
238 for L in range(3, min(9, len(s) + 1)):
239 count = {}
240 for i in range(len(s) - L + 1):
241 sub = s[i:i+L]
242 count[sub] = count.get(sub, 0) + 1
243 cand = []
244 for sub, c in count.items():
245 gain = c * (L - 1) - (L + 3) # rough net gain with rule cost
246 if c >= 3 and gain > 0:
247 cand.append((gain, c, sub))
248 cand.sort(reverse=True)
249 for item in cand[:2]:
250 best_subs.append(item)
251
252 best_subs.sort(reverse=True)
253 macros = []
254 used_tok = 0
255 cur = s
256
257 for _, _, sub in best_subs:
258 if used_tok >= len(tokens):
259 break
260 if sub not in cur:
261 continue
262 tok = tokens[used_tok]
263 used_tok += 1
264
265 parts = []
266 i = 0
267 reps = 0
268 L = len(sub)
269 while i < len(cur):
270 if i + L <= len(cur) and cur[i:i+L] == sub:
271 parts.append(tok)
272 i += L
273 reps += 1
274 else:
275 parts.append(cur[i])
276 i += 1
277 if reps < 3:
278 used_tok -= 1
279 continue
280 new_cur = "".join(parts)
281 if len(new_cur) + len(sub) + 3 >= len(cur):
282 used_tok -= 1
283 continue
284 macros.append((tok, sub))
285 cur = new_cur
286
287 # escape cmd/sep/tokens if they somehow appear in cur (they shouldn't)
288 header = ["Q", sep, str(len(macros)), sep]
289 for tok, sub in macros:
290 header.append(tok)
291 header.append(sub)
292 header.append(sep)
293 return "".join(header) + cur, macros, cur
294
295 def macro_decode(s):
296 if not s or s[0] != "Q":
297 return None
298 if len(s) < 3:
299 return None
300 loc_sep = s[1]
301 i = 2
302 j = i
303 while j < len(s) and s[j] != loc_sep:
304 if s[j] < "0" or s[j] > "9":
305 return None
306 j += 1
307 if j == i or j >= len(s):
308 return None
309 k = int(s[i:j])
310 i = j + 1
311 macros = []
312 for _ in range(k):
313 if i >= len(s):
314 return None
315 tok = s[i]
316 i += 1
317 j = i
318 while j < len(s) and s[j] != loc_sep:
319 j += 1
320 if j >= len(s):
321 return None
322 sub = s[i:j]
323 macros.append((tok, sub))
324 i = j + 1
325 cur = s[i:]
326 # stack-like repeated expansion in reverse declaration order
327 for tok, sub in reversed(macros):
328 parts = []
329 for ch in cur:
330 if ch == tok:
331 parts.append(sub)
332 else:
333 parts.append(ch)
334 cur = "".join(parts)
335 return cur
336
337 enc4, _, _ = macro_encode(data)
338 if verify(enc4, macro_decode):
339 best = min(best, len(enc4))
340
341 ratio = best / n
342 if ratio < 0:
343 return 999.0
344 return ratio
345 except:
346 return 999.0
Champion
97% (3/5)130μs
1def solve(input):
2 data = input.get("data", "")
3 if not isinstance(data, str) or not data:
4 return 999.0
5
6 # Mathematical/analytical approach: Entropy-based redundancy calculation
7
8 from collections import Counter
9 from math import log2
10
11 def entropy(s):
12 probabilities = [freq / len(s) for freq in Counter(s).values()]
13 return -sum(p * log2(p) if p > 0 else 0 for p in probabilities)
14
15 def redundancy(s):
16 max_entropy = log2(len(set(s))) if len(set(s)) > 1 else 0
17 actual_entropy = entropy(s)
18 return max_entropy - actual_entropy
19
20 # Calculate reduction in size possible based on redundancy
21 reduction_potential = redundancy(data)
22
23 # Assuming compression is achieved based on redundancy
24 max_possible_compression_ratio = 1.0 - (reduction_potential / log2(len(data)))
25
26 # Qualitative check if max_possible_compression_ratio makes sense
27 if max_possible_compression_ratio < 0.0 or max_possible_compression_ratio > 1.0:
28 return 999.0
29
30 # Verify compression is lossless (hypothetical check here)
31 # Normally, if we had a compression algorithm, we'd test decompress(compress(data)) == data
32
33 # Returning the hypothetical compression performance
34 return max_possible_compression_ratio