Training Dataset
We need training examples: math problems with their answers. Let's generate them programmatically.
python
1def num_to_words(n: int) -> str:2 """Convert a number (0-99) to English words."""3 if n < 0:4 return "negative " + num_to_words(-n)5
6 words_0_19 = [7 "zero", "one", "two", "three", "four", "five", "six", "seven",8 "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",9 "fifteen", "sixteen", "seventeen", "eighteen", "nineteen",10 ]11
12 if n < 20:13 return words_0_19[n]14
15 tens_words = [16 "", "", "twenty", "thirty", "forty", "fifty",17 "sixty", "seventy", "eighty", "ninety",18 ]19
20 if n < 100:21 tens = tens_words[n // 10]22 ones = n % 1023 return tens if ones == 0 else f"{tens} {num_to_words(ones)}"24
25 return str(n)26
27
28def generate_all_combinations() -> list[dict]:29 """Generate all valid math problem combinations."""30 data = []31
32 # Addition: a + b where result <= 9933 for a in range(100):34 for b in range(100 - a):35 result = a + b36 data.append({37 "input": f"{num_to_words(a)} plus {num_to_words(b)}",38 "output": num_to_words(result),39 "full": f"{num_to_words(a)} plus {num_to_words(b)} equals {num_to_words(result)}",40 })41
42 # Subtraction: a - b where result >= 043 for a in range(100):44 for b in range(a + 1):45 result = a - b46 data.append({47 "input": f"{num_to_words(a)} minus {num_to_words(b)}",48 "output": num_to_words(result),49 "full": f"{num_to_words(a)} minus {num_to_words(b)} equals {num_to_words(result)}",50 })51
52 # Multiplication: a * b where result <= 9953 for a in range(100):54 for b in range(100):55 result = a * b56 if result <= 99:57 data.append({58 "input": f"{num_to_words(a)} times {num_to_words(b)}",59 "output": num_to_words(result),60 "full": f"{num_to_words(a)} times {num_to_words(b)} equals {num_to_words(result)}",61 })62
63 return data64
65# Generate dataset66all_data = generate_all_combinations()67print(f"Total examples: {len(all_data):,}")68# Total examples: ~15,000Tests
python
1# tests/test_data.py2def test_num_to_words_single_digits():3 assert num_to_words(0) == "zero"4 assert num_to_words(5) == "five"5
6def test_num_to_words_compound():7 assert num_to_words(42) == "forty two"8 assert num_to_words(99) == "ninety nine"9
10def test_generate_has_all_operations():11 data = generate_all_combinations()12 assert any("plus" in d["input"] for d in data)13 assert any("minus" in d["input"] for d in data)14 assert any("times" in d["input"] for d in data)15
16def test_addition_example():17 data = generate_all_combinations()18 two_plus_three = [d for d in data if d["input"] == "two plus three"]19 assert two_plus_three[0]["output"] == "five"Run tests: pytest tests/test_data.py -v
Helpful?