XCTF lambda 题解

2023-03-31
#ctf #re

题目给了一个可执行程序和一份源码(一般有源码的题目都不容易)。源码可以看到是实现了一个lambda的解释器。

附件地址

lambda表达式结构体如下

typedef enum _ExpType
{
	kSymbol, kLambda, kCall
}ExpType;

typedef unsigned char Symbol;
typedef struct _Exp Exp;
typedef struct _Closure Closure;
typedef struct _Env Env;
typedef struct _LazyArg LazyArg;

// Expression represents AST of lambda calculus
struct _Exp
{
	ExpType type;
	union
	{
		Symbol symbol;
		struct
		{
			Symbol arg;
			const Exp* body;
		}lambda;
		struct
		{
			const Exp* rator;
			const Exp* rand;
		}call;
	}u;
};

程序运行时会将输入的字符转换成Church Number,放入程序内置的表达式的指定位置。因此第一步是要将数据段的lambda表达式提取出来。 程序会将输入分成11段,放入11个lambda表达式中进行校验,每个lambda表达式会返回一个Church Boolean。 先编写简单的python脚本把程序内的11段lambda表达式提取出来

import dump
import sys
sys.setrecursionlimit(50000)
def get_index(addr):
    return (addr-0x004040a0)//8

class Exp:
    def to_str(self) -> str:
        pass

class Symbol(Exp):
    c:str

    def __init__(self, c):
        self.c=c
    def to_str(self):
        return self.c

class Call(Exp):
    rator: Exp
    rand: Exp
    def __init__(self, rator, rand):
        self.rator=rator
        self.rand=rand
    def to_str(self):
        return '(' + self.rator.to_str() + ' ' + self.rand.to_str() + ')'

class Lambda(Exp):
    arg: str
    body: Exp
    def __init__(self, c, body):
        self.arg=c
        self.body=body
    def to_str(self):
        return '( \\' + self.arg + '. ' + self.body.to_str() + ')'

input_list=[ 
    0x000000000040b9a8, 0x000000000040b978, 0x000000000040b948,
    0x0000000000427938, 0x0000000000427908, 0x00000000004278d8,
    0x0000000000445de8, 0x0000000000445db8, 0x0000000000445d88,
    0x00000000004615f8, 0x00000000004615c8, 0x0000000000461598,
    0x000000000047cad0, 0x000000000047caa0, 0x000000000047ca70,
    0x00000000004a5ca8, 0x00000000004a5c78, 0x00000000004a5c48,
    0x00000000004c3448, 0x00000000004c3418, 0x00000000004c33e8,
    0x00000000004e9260, 0x00000000004e9230, 0x00000000004e9200,
    0x0000000000505eb0, 0x0000000000505e80, 0x0000000000505e50,
    0x000000000051f438, 0x000000000051f408, 0x000000000051f3d8,
    0x0000000000536518, 0x00000000005364e8, 0x00000000005364b8]

def get_exp(addr) -> Exp:
    starti=get_index(addr)
    match dump.code[starti]:
        case 0:
            return Symbol('var_%03d' % dump.code[starti+1])
        case 1:
            return Lambda('var_%03d'%dump.code[starti+1], get_exp(dump.code[starti+2]))
        case 2:
            if addr in input_list:
                return Call(get_exp(dump.code[starti+1]),Symbol('input'+str(input_list.index(addr))))

            return Call(get_exp(dump.code[starti+1]),get_exp(dump.code[starti+2]))


start_points=[
    0x0000000000420030, 0x000000000043e528, 0x0000000000459db0,
    0x0000000000475158, 0x000000000049e448, 0x00000000004bba58,
    0x00000000004e1928, 0x00000000004fe5a8, 0x0000000000517b38,
    0x000000000052eb58, 0x0000000000558400]
for start_point in start_points:
    root=get_exp(start_point)
    print(root.to_str())

得到11段非常相似的表达式,大致如下

((( \var_188. ( \var_027. ((( \var_182. ( \var_219. ((var_182 var_219) var_182))) (( \var_139. ((var_139 ( \var_106. ( \var_029. ( \var_066. var_066)))) ( \var_025. ( \var_128. var_025)))) ((( \var_227. ( ...
((( \var_176. ( \var_170. ((( \var_103. ( \var_112. ((var_103 var_112) var_103))) (( \var_223. ((var_223 ( \var_183. ( \var_246. ( \var_057. var_057)))) ( \var_054. ( \var_044. var_054)))) ((( \var_228. ( ...
((( \var_201. ( \var_071. ((( \var_077. ( \var_234. ((var_077 var_234) var_077))) (( \var_034. ((var_034 ( \var_251. ( \var_093. ( \var_043. var_043)))) ( \var_024. ( \var_031. var_024)))) ((( \var_118. ( ...
((( \var_061. ( \var_230. ((( \var_144. ( \var_226. ((var_144 var_226) var_144))) (( \var_153. ((var_153 ( \var_127. ( \var_246. ( \var_169. var_169)))) ( \var_105. ( \var_197. var_105)))) ((( \var_002. ( ...
((( \var_211. ( \var_143. ((( \var_035. ( \var_115. ((var_035 var_115) var_035))) (( \var_252. ((var_252 ( \var_233. ( \var_041. ( \var_047. var_047)))) ( \var_020. ( \var_214. var_020)))) ((( \var_102. ( ...
((( \var_120. ( \var_051. ((( \var_229. ( \var_195. ((var_229 var_195) var_229))) (( \var_187. ((var_187 ( \var_099. ( \var_031. ( \var_223. var_223)))) ( \var_079. ( \var_050. var_079)))) ((( \var_104. ( ...
((( \var_049. ( \var_081. ((( \var_153. ( \var_015. ((var_153 var_015) var_153))) (( \var_198. ((var_198 ( \var_072. ( \var_163. ( \var_049. var_049)))) ( \var_133. ( \var_206. var_133)))) ((( \var_084. ( ...
((( \var_094. ( \var_214. ((( \var_146. ( \var_181. ((var_146 var_181) var_146))) (( \var_096. ((var_096 ( \var_080. ( \var_073. ( \var_144. var_144)))) ( \var_042. ( \var_200. var_042)))) ((( \var_190. ( ...
((( \var_162. ( \var_017. ((( \var_254. ( \var_074. ((var_254 var_074) var_254))) (( \var_161. ((var_161 ( \var_185. ( \var_124. ( \var_074. var_074)))) ( \var_034. ( \var_027. var_034)))) ((( \var_163. ( ...
((( \var_021. ( \var_046. ((( \var_033. ( \var_125. ((var_033 var_125) var_033))) (( \var_213. ((var_213 ( \var_220. ( \var_045. ( \var_251. var_251)))) ( \var_057. ( \var_242. var_057)))) ((( \var_005. ( ...
((( \var_142. ( \var_000. ((( \var_156. ( \var_255. ((var_156 var_255) var_156))) (( \var_027. ((var_027 ( \var_147. ( \var_248. ( \var_008. var_008)))) ( \var_246. ( \var_108. var_246)))) ((( \var_205. ( ...

每一段表达式都很长,发现有大量形如 $\lambda f. \lambda x. f^{\circ n} x$的表达式,用python正则可以做一个简单的匹配

def num_replace(match:re.Match[str]) -> str:
    numl=match[0].count('(')
    numr=match[0].count(')')
    return f'num_{numl-2}'+')'*(numr-numl)

input_text=re.subn(r'\( \\(?P<p1>var_[0-9]+)\. \( \\(?P<p2>var_[0-9]+)\.( \((?P=p1))+ (?P=p2)\)*', num_replace, input_text)[0]

接着继续手动分析,主要是识别出来代码中出现的组合子,详见维基百科Church encodingLambda calculus,将他们替换,如下 $$ \begin{eqnarray} true &=& \lambda a. \lambda b. a \\ false &=& \lambda a. \lambda b. b \\ zero &=& \lambda f. \lambda x. x \\ PAIR &=& \lambda x. \lambda y. \lambda z. z\ x\ y \\ if &=& \lambda x. \lambda y. \lambda z. x\ y\ z \\ is\_zero &=& \lambda n. n\ (\lambda x. false)\ true \\ first &=& \lambda n. n\ true \\ second &=& \lambda n. n\ false \\ Y &=& λg.(λx.g\ (x\ x))\ (λx.g\ (x\ x)) \\ succ &=& \lambda a. \lambda f. \lambda x. f\ (a\ f \ x) \\ mul &=& \lambda n. \lambda m. \lambda f. \lambda x. m\ (n\ f)\ x \\ plus &=& \lambda n. \lambda m. \lambda f. \lambda x. m\ f\ (n\ f\ x) \\ sub &=& \lambda n. first\ (n\ (\lambda p. PAIR\ (second\ p)\ (succ\ (second\ p)))\ (PAIR\ zero\ zero)) \\ and &=& \lambda a. \lambda b. a\ b\ a \\ equal &=& \lambda a. \lambda b. and\ (is\_zero (sub\ a\ b)\ (is\_zero (sub\ b\ a)) \\ nil &=& \lambda x. true \\ is\_nil &=& \lambda a. a\ (\lambda b. \lambda c. false) \end{eqnarray} $$ 其中falsezero是一样的,所以就不做区分了,替换代码如下

import re
with open('lambda.txt', 'r') as f:
    input_text=f.read()

def num_replace(match:re.Match[str]) -> str:
    numl=match[0].count('(')
    numr=match[0].count(')')
    return f'num_{numl-2}'+')'*(numr-numl)

input_text=re.subn(r'\( \\(?P<p1>var_[0-9]+)\. \( \\(?P<p2>var_[0-9]+)\.( \((?P=p1))+ (?P=p2)\)*', num_replace, input_text)[0]
input_text=re.subn(r'\( \\(?P<p1>var_[0-9]+)\. \( \\(?P<p2>var_[0-9]+)\. (?P=p1)\)\)','true', input_text)[0]
input_text=re.subn(r'\( \\(?P<p1>var_[0-9]+)\. \( \\(?P<p2>var_[0-9]+)\. (?P=p2)\)\)','false', input_text)[0]
input_text=re.subn(r'\( \\(?P<P1>var_[0-9]+)\. \( \\(?P<P2>var_[0-9]+)\. \( \\(?P<P3>var_[0-9]+)\. \(\((?P=P3) (?P=P1)\) (?P=P2)\)\)\)\)','PAIR', input_text)[0]
input_text=re.subn(r'\( \\(?P<P1>var_[0-9]+)\. \( \\(?P<P2>var_[0-9]+)\. \( \\(?P<P3>var_[0-9]+)\. \(\((?P=P1) (?P=P2)\) (?P=P3)\)\)\)\)','if', input_text)[0]
input_text=re.subn(r'\( \\(?P<P1>var_[0-9]+)\. \(\((?P=P1) \( \\(?P<P2>var_[0-9]+)\. false\)\) true\)\)','is_zero', input_text)[0]
input_text=re.subn(r'\( \\(?P<p1>var_[0-9]+)\. \((?P=p1) true\)\)','first', input_text)[0]
input_text=re.subn(r'\( \\(?P<p1>var_[0-9]+)\. \((?P=p1) false\)\)','second', input_text)[0]
input_text=re.subn(r'\( \\(?P<p1>var_[0-9]+)\. \(\( \\(?P<p2>var_[0-9]+)\. \((?P=p1) \((?P=p2) (?P=p2)\)\)\) \( \\(?P<p3>var_[0-9]+)\. \((?P=p1) \((?P=p3) (?P=p3)\)\)\)\)\)','Y', input_text)[0]
input_text=re.subn(r'\( \\(?P<P1>var_[0-9]+)\. \( \\(?P<P2>var_[0-9]+)\. \( \\(?P<P3>var_[0-9]+)\. \((?P=P2) \(\((?P=P1) (?P=P2)\) (?P=P3)\)\)\)\)\)','succ', input_text)[0]
input_text=re.subn(r'\( \\(?P<p1>var_[0-9]+)\. \( \\(?P<p2>var_[0-9]+)\. \( \\(?P<p3>var_[0-9]+)\. \( \\(?P<p4>var_[0-9]+)\. \(\((?P=p1) \((?P=p2) (?P=p3)\)\) (?P=p4)\)\)\)\)\)','mul', input_text)[0]
input_text=re.subn(r'\( \\(?P<P1>var_[0-9]+)\. \( \\(?P<P2>var_[0-9]+)\. \( \\(?P<P3>var_[0-9]+)\. \( \\(?P<P4>var_[0-9]+)\. \(\((?P=P1) (?P=P3)\) \(\((?P=P2) (?P=P3)\) (?P=P4)\)\)\)\)\)\)','plus', input_text)[0]
input_text=re.subn(r'\( \\(?P<p1>var_[0-9]+)\. \( \\(?P<p2>var_[0-9]+)\. \(\((?P=p2) \( \\(?P<p3>var_[0-9]+)\. \(first \(\((?P=p3) \( \\(?P<p4>var_[0-9]+)\. \(\(PAIR \(second (?P=p4)\)\) \(succ \(second (?P=p4)\)\)\)\)\) \(\(PAIR false\) false\)\)\)\)\) (?P=p1)\)\)\)','sub', input_text)[0]
input_text=re.subn(r'\( \\(?P<P1>var_[0-9]+)\. \( \\(?P<P2>var_[0-9]+)\. \(\((?P=P1) (?P=P2)\) (?P=P1)\)\)\)','and', input_text)[0]
input_text=re.subn(r'\( \\(?P<P1>var_[0-9]+)\. \( \\(?P<P2>var_[0-9]+)\. \(\(and \(is_zero \(\(sub (?P=P1)\) (?P=P2)\)\)\) \(is_zero \(\(sub (?P=P2)\) (?P=P1)\)\)\)\)\)','equal', input_text)[0]
input_text=re.subn(r'\( \\(var_[0-9]+). true\)','nil', input_text)[0]
input_text=re.subn(r'\( \\(?P<P1>var_[0-9]+)\. \((?P=P1) \( \\(?P<P2>var_[0-9]+)\. \( \\(?P<P3>var_[0-9]+)\. false\)\)\)\)','is_nil', input_text)[0]

print(input_text)

替换过后就简洁很多了,再经过整理、重命名,得到如下代码

(
    equal num_3
    (
        (
            (Y \list_equal_count. \list1. \list2. (
                ((if (is_nil list1)) num_0)
                (plus
                    (if (equal (first list1) (first list2)) num_1 num_0)
                    (list_equal_count (second list1) (second list2))
                )
            ))
            (
                Y (\op. \matrix_line. \input_list. 
                    (if (is_nil matrix_line) nil)
                    (PAIR 
                        (
                            (Y \op2. \matrix_item. \input_item. \i. (
                                if (is_nil matrix_item) num_7
                                (plus
                                    (mul 
                                        (plus (first matrix_item) (sub (mul num_2 i) num_5))
                                        (sub (first input_item) ((plus num_13) (sub num_11 (mul num_3 i))))
                                    )
                                    (op2 (second matrix_item) (second input_item) (succ i))
                                ))
                            ) (first matrix_line) input_list num_2
                        )
                        (op (second matrix_line) input_list)
                    )
                )

                (PAIR (PAIR num_4 (PAIR num_6 (PAIR num_5 nil)))
                (PAIR (PAIR num_4 (PAIR num_3 (PAIR num_6 nil))))
                (PAIR (PAIR num_7 (PAIR num_4 (PAIR num_7 nil))))
                nil)
                ((PAIR input_30) ((PAIR input_31) ((PAIR input_32) nil)))
            )
        )
        (PAIR num_1848 (PAIR num_1711 (PAIR num_2179 nil)))
    )
)

这段代码就可以直接写解密脚本了,要注意lambda里面的减法是不会有负数的

import numpy as np
import re
count=[]
def num_replace(match:re.Match[str]) -> str:
    global count
    numl=match[0].count('(')
    return numl-2
with open('lambda.txt', 'r') as f:
    for line in f.readlines():
        count=list(map(num_replace, re.finditer(r'\( \\(?P<p1>var_[0-9]+)\. \( \\(?P<p2>var_[0-9]+)\.( \((?P=p1))+ (?P=p2)\)*', line)))[-12:]
        a= np.array([[count[0],count[1]+1,count[2]+3], [count[3],count[4]+1,count[5]+3],[count[6],count[7]+1,count[8]+3]])
        A = np.matrix(a).I
        y= np.array([count[9]-7,count[10]-7,count[11]-7])
        y = np.matrix(y).T
        ans=A*y
        print(chr(round(float(ans[0]))+24-6),end='')
        print(chr(round(float(ans[1]))+24-9),end='')
        print(chr(round(float(ans[2]))+24-11),end='')
print()

得到XCTF{M4tRI1|i||l|Il|I1X_A5_YC0mb}