TQLCTF-Wordle 题目出自当下火热的填字游戏
填字游戏的规则为输入五个字母,然后分别返回五个颜色
- 绿色:位置和字母均正确
- 黄色:字母正确位置不正确
- 灰色:字母和位置均不正确
我们的任务就是用最少的次数,把答案找出来
在该题目中,颜色可以通过字节流分辨出来。在这之前,我在 B 站大学看到过一个讲述相关技术的视频,于是我就翻了下。
利用信息论解决 Wordle 问题
理论存在,实践开始
于是,又写了一个 10K 的代码,发现自己的代码只能做到平均 4.3 左右。
好废物啊我
于是上 Github 找轮子 Wordle-solver
这个开源项目能做到平均 3.4,然后就有了如下 10K 代码
import os import re import pwn import time import string import random import argparse from collections.abc import Sequence, Mapping, Iterable from typing import Optional from array import array ALL_LETTERS = 'abcdefghijklmnopqrstuvwxyz' GREEN = '\x1b[42m \x1b[0m' YELLOW = '\x1b[43m \x1b[0m' WHITE = '\x1b[47m \x1b[0m' class WordleSolver: def __init__(self, wordlen: int = 5, dictfile_solutions: str = './valid_words.txt', dictfile_guesses: Optional[str] = './allowed_guesses.txt', allow_dup_letters: bool = True, hard_mode: bool = False, const_first_guess: Optional[str] = 'crane'): self.wordlen = wordlen self.hard_mode = hard_mode self.all_solution_words = self._load_words(dictfile_solutions, allow_dup_letters) if dictfile_guesses == None: self.all_guess_words = self.all_solution_words.copy() else: self.all_guess_words = self._load_words(dictfile_guesses, allow_dup_letters) self._fast_word_result_buf = array('b', [ 0 for i in range(256) ]) self.const_first_guess = const_first_guess self.reset() def _load_words(self, dictfile: str, allow_dup_letters: bool) -> None: with open(dictfile, 'r') as f: all_words = [ line.lower().strip() for line in f if re.fullmatch(r'[a-z]+', line.strip()) ] all_words = [ word for word in all_words if len(word) == self.wordlen ] all_words = list(set(all_words)) def has_dup_letters(word): letterset = set() for l in word: if l in letterset: return True letterset.add(l) return False if not allow_dup_letters: all_words = [ word for word in all_words if not has_dup_letters(word) ] return all_words def _get_letter_counts(word: str, all_letters: bool = False) -> dict[str, int]: r = { l : 0 for l in ALL_LETTERS } if all_letters else {} for l in word: r[l] = r.get(l, 0) + 1 return r def _get_letter_count_ranges_of_words(words: Sequence[str]) -> dict[str, tuple[int, int]]: r = {} for word in words: for letter, count in WordleSolver._get_letter_counts(word, True).items(): if letter in r: c = r[letter] if count < c[0]: c = (count, c[1]) if count > c[1]: c = (c[0], count) r[letter] = c else: r[letter] = (count, count) for letter in ALL_LETTERS: if letter not in r: r[letter] = (0, 0) return r def reset(self) -> None: self.positions = [] for i in range(self.wordlen): self.positions.append(set(ALL_LETTERS)) self.letter_counts = WordleSolver._get_letter_count_ranges_of_words(self.all_solution_words) self.tried_words = set() self.tried_word_list = [] self.potential_solutions = set(self.all_solution_words) self.solved = False self.first_word_queue = [ self.const_first_guess ] if self.const_first_guess and self.const_first_guess in self.all_guess_words else [] self.potential_guesses = set(self.all_guess_words) def _fast_word_result(self, guess: str, target: str): retval = 0 target_lcounts = self._fast_word_result_buf for i in range(self.wordlen): target_lcounts[ord(target[i])] += 1 placeval = 1 for i in range(self.wordlen): if guess[i] == target[i]: retval += placeval * 2 target_lcounts[ord(target[i])] -= 1 placeval *= 3 placeval = 1 for i in range(self.wordlen): g = guess[i] t = target[i] ordg = ord(g) if g != t and target_lcounts[ordg] > 0: retval += placeval target_lcounts[ordg] -= 1 placeval *= 3 for i in range(self.wordlen): target_lcounts[ord(target[i])] = 0 return retval def update(self, guessed_word: str, result: str) -> None: assert(len(guessed_word) == self.wordlen) assert(len(result) == self.wordlen) assert(re.fullmatch(r'[CLX]+', result)) guess_lcounts = WordleSolver._get_letter_counts(guessed_word, True) result_lcounts = { l : 0 for l in ALL_LETTERS } for letter, rchar in zip(guessed_word, result): if rchar == 'C' or rchar == 'L': result_lcounts[letter] += 1 for letter in guess_lcounts: gc = guess_lcounts[letter] rc = result_lcounts[letter] crange = self.letter_counts[letter] assert(gc >= rc) if gc > rc: crange = (rc, rc) else: crange = (rc, crange[1]) self.letter_counts[letter] = crange for i, (letter, rchar) in enumerate(zip(guessed_word, result)): if rchar == 'C': self.positions[i] = set([ letter ]) else: self.positions[i].discard(letter) lbound_sum = sum(( lbound for lbound, ubound in self.letter_counts.values() )) if lbound_sum >= self.wordlen: self.letter_counts = { letter : ( lbound, lbound ) for letter, (lbound, ubound) in self.letter_counts.items() } for letter, (lbound, ubound) in self.letter_counts.items(): nexclusive = sum(( 1 if letter in lset and len(lset) == 1 else 0 for lset in self.positions )) if nexclusive >= ubound: for lset in self.positions: if not (letter in lset and len(lset) == 1): lset.discard(letter) self.tried_words.add(guessed_word) self.tried_word_list.append(guessed_word) self._filter_words_by_known_info(self.potential_solutions) if self.hard_mode: self._filter_words_by_known_info(self.potential_guesses) self.letter_counts = WordleSolver._get_letter_count_ranges_of_words(list(self.potential_solutions)) if result == 'C' * self.wordlen: self.solved = True self.potential_solutions = set([ guessed_word ]) def _filter_words_by_known_info(self, words: set[str]) -> None: regex_str = ''.join([ '[' + ''.join(list(letterset)) + ']' for letterset in self.positions ]) rx = re.compile(regex_str) def word_within_bounds(word): lcounts = WordleSolver._get_letter_counts(word, True) for letter, lcount in lcounts.items(): lbound, ubound = self.letter_counts[letter] if not (lbound <= lcount <= ubound): return False return True for word in list(words): if not (rx.fullmatch(word) and word not in self.tried_words and word_within_bounds(word)): words.discard(word) def get_guess(self) -> str: if len(self.first_word_queue): return self.first_word_queue.pop(0) if len(self.potential_solutions) == 0: raise Exception('Answer unknown') elif len(self.potential_solutions) <= 2: return list(self.potential_solutions)[0] best_word = None best_score = -1 for word in self.potential_guesses: solution_group_counts: dict[str, int] = {} for potsol in self.potential_solutions: resstr = self._fast_word_result(word, potsol) solution_group_counts[resstr] = solution_group_counts.get(resstr, 0) + 1 avg_expected_group_size = sum(( s * s for s in solution_group_counts.values() )) / sum(( s for s in solution_group_counts.values() )) word_score = avg_expected_group_size if word in self.potential_solutions: word_score -= 0.01 if word_score < best_score or best_score == -1: best_score = word_score best_word = word return best_word def get_word_result(guess: str, target: str) -> str: r_list = [ 'X' ] * len(target) target_lcounts = WordleSolver._get_letter_counts(target, True) for i, (guess_letter, target_letter) in enumerate(zip(guess, target)): if guess_letter == target_letter: r_list[i] = 'C' target_lcounts[target_letter] -= 1 for i, (guess_letter, target_letter) in enumerate(zip(guess, target)): if guess_letter != target_letter and target_lcounts[guess_letter] > 0: r_list[i] = 'L' target_lcounts[guess_letter] -= 1 return ''.join(r_list) def run_auto(self, target_word: str) -> int: self.reset() nguesses = 0 while True: nguesses += 1 guess = self.get_guess() if guess == target_word: break res = WordleSolver.get_word_result(guess, target_word) self.update(guess, res) return nguesses host = '47.106.102.129' port = 43511 while(1): nc = pwn.remote(host, port) nc.recv() nc.sendline('2') print(nc.recvline()) i=0 while(i<512): solver = WordleSolver() while True: guess = solver.get_guess() print(guess) nc.sendline(guess) tmp=nc.recvline() tmp=tmp.replace(GREEN.encode(),"C".encode()) tmp=tmp.replace(YELLOW.encode(),"L".encode()) tmp=tmp.replace(WHITE.encode(),"X".encode()) tmp=tmp.replace("Correct!".encode(),"".encode()) tmp=tmp.replace("Wrong!".encode(),"".encode()) tmp=tmp.replace(" ".encode(),"".encode()) tmp=tmp.replace("\n".encode(),"".encode()) tmp=tmp.replace(">".encode(),"".encode()) print(tmp) res = tmp.decode() if(res=="CCCCC"): print(nc.recvline()) break if re.fullmatch(r'[a-z]{' + str(solver.wordlen) + '} [CXL]{' + str(solver.wordlen) + '}', res): parts = res.split(' ') nc.sendline(parts[0]) solver.update(parts[0], parts[1]) else: solver.update(guess, res) i=i+1 nc.interactive() nc.close()
这个代码能够稳定 1 难度和概率 2(30%)难度,以及基本上没可能 3 难度
于是只能得到一个残缺的 Flag 以及出题人的嘲笑
1 得到的嘲笑:UWNYZ1c5dzR3UWQ9dj9oY3Rhdy9tb2MuZWJ1dHVveS53d3cvLzpzcHR0aA==
2 得到的残缺的 Flag:F7_7__S324rsT3_T} L3_CUt1R~s_tn@WITO_eCbQ {rRh1lty1EDlF5
在比赛结束后,有疑似非预期解
Python-random-module-cracker
通过 Round ID
的随机数反推答案,也真有一手的
2022/2/22:
正解 WP 节选如下,代码就不展示了
从这⾥可以看出,randrange 实际上就是在调⽤ getrandbits (32),所以这⾥就可以套⽤ Python random module cracker 来预测答案!
总结⼀下:
通过两轮 Easy Mode 获得前 624 道题的答案,结合 ID 反推 getrandbits (32) 的值并丢进 random-cracker 获得预测随机数的能⼒
注意到 40902^{20}
离 2^{32}
还有⼀定的差距,结合代码可知如果 getrandbits (32) ⼤于 40602^{20}
,则该数将会被丢弃导致预测随机数失败。经过计算可知,成功率为 \left(\frac{4090} {4096}\right)^{624}=0.4006237250
,⼗分可观。
exp: https://github.com/Konano/CTF-challenges/blob/master/wordle/exp.py
顺便还埋了⼀些彩蛋:
- Easy Mode 通关后会获得「NULL」 Normal Mode 通关后会获得 「UWNYZ1c5dzR3UWQ9dj9oY3Rhdy9tb2MuZWJ1dHVveS53d3cvLzpzcHR0aA==」
- Hard Mode 通关后会获得经过 random.shuffle 后的 flag
- Insane Mode 通关后会获得完整的 flag