import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from torchinfo import summary
import warnings; warnings.filterwarnings("ignore")  # Pesky Pytorch artifact

def pprint(text, start=0, stop=20):  # For pretty-printing with line numbers
    lines = text.split("\n")
    for i, line in enumerate(lines):
        if i < start or i > stop: continue
        print(f"{i:>3d}: {line}")

print("PyTorch version {} with Transformers version {}".format(
    torch.__version__, transformers.__version__))

PyTorch version 1.13.1.post200 with Transformers version 4.26.1


model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-16B-mono", pad_token_id=50256)
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-16B-mono")
model_summary = summary(model, depth=5).__str__()
pprint(model_summary, 419, 419)  # Extract one specific line of output

419: Total params: 16,032,155,648


print(f"Tokenizer vocabulary size is {tokenizer.vocab_size}")
test_prompt = "def def hullo(): test 1 2 3456 (4+5) muchago printf"
encoded = tokenizer.encode(test_prompt)
decoded = "|".join([tokenizer.decode(x) for x in encoded])
print(f"Test prompt: {test_prompt} \nEncoded:     {encoded}\nRound trip:  |{decoded}|")
arb_vocab = [tokenizer.decode(x) for x in [1,2,5,10,20,50,100,200,500,1000]]
print(f'Arb vocab:   |{"|".join(arb_vocab)}|')

Tokenizer vocabulary size is 50257
Test prompt: def def hullo(): test 1 2 3456 (4+5) muchago printf 
Encoded:     [4299, 825, 23644, 78, 33529, 1332, 352, 362, 513, 29228, 357, 19, 10, 20, 8, 881, 3839, 30812]
Round trip:  |def| def| hull|o|():| test| 1| 2| 3|456| (|4|+|5|)| much|ago| printf|
Arb vocab:   |"|#|&|+|5|S|�||ine|ale|


pprint(model_summary, 0, 3)  # Table headings
pprint(model_summary, 5, 5)  # Embedding stats

  0: ===========================================================================
  1: Layer (type:depth-idx)                             Param #
  2: ===========================================================================
  3: CodeGenForCausalLM                                 --
  5: │    └─Embedding: 2-1                              314,572,800


pprint(model_summary, 0, 3)   # Table headings
pprint(model_summary, 8, 19)  # Block stats

  0: ===========================================================================
  1: Layer (type:depth-idx)                             Param #
  2: ===========================================================================
  3: CodeGenForCausalLM                                 --
  8: │    │    └─CodeGenBlock: 3-1                      --
  9: │    │    │    └─LayerNorm: 4-1                    12,288
 10: │    │    │    └─CodeGenAttention: 4-2             --
 11: │    │    │    │    └─Dropout: 5-1                 --
 12: │    │    │    │    └─Dropout: 5-2                 --
 13: │    │    │    │    └─Linear: 5-3                  113,246,208
 14: │    │    │    │    └─Linear: 5-4                  37,748,736
 15: │    │    │    └─CodeGenMLP: 4-3                   --
 16: │    │    │    │    └─Linear: 5-5                  151,019,520
 17: │    │    │    │    └─Linear: 5-6                  151,001,088
 18: │    │    │    │    └─NewGELUActivation: 5-7       --
 19: │    │    │    │    └─Dropout: 5-8                 --


pprint(model_summary, 0, 3)      # Table headings
pprint(model_summary, 417, 417)  # Output layer stats

  0: ===========================================================================
  1: Layer (type:depth-idx)                             Param #
  2: ===========================================================================
  3: CodeGenForCausalLM                                 --
417: ├─Linear: 1-2                                      314,624,000


def generate(prompt):
    input_ids = tokenizer(prompt.strip(" \n"), return_tensors="pt")
    generated_ids = model.generate(**input_ids, max_new_tokens=256, do_sample=False, 
                                   num_beams=10, temperature=0.75)
    result = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return result


pprint(generate("""
def min2sec(minutes: float):
    \"""Converts the input minutes into seconds.\""" """), 0, 18)

  0: def min2sec(minutes: float):
  1:     """Converts the input minutes into seconds."""
  2:     return minutes * 60
  3: 
  4: 
  5: def sec2min(seconds: float):
  6:     """Converts the input seconds into minutes."""
  7:     return seconds / 60
  8: 
  9: 
 10: def min2hour(minutes: float):
 11:     """Converts the input minutes into hours."""
 12:     return minutes / 60
 13: 
 14: 
 15: def hour2min(hours: float):
 16:     """Converts the input hours into minutes."""
 17:     return hours * 60
 18:


pprint(generate("""
def sum_of_multiples(a, b, c):
    \"""Finds the sum of all the multiples of a and b below c.\""" """), 0, 7)

  0: def sum_of_multiples(a, b, c):
  1:     """Finds the sum of all the multiples of a and b below c."""
  2:     total = 0
  3:     for i in range(1, c):
  4:         if i % a == 0 or i % b == 0:
  5:             total += i
  6:     return total
  7:


pprint(generate("""
def validate_email(address):
    \"""Validates a legal email address.\""" """), 0, 5)

  0: def validate_email(address):
  1:     """Validates a legal email address."""
  2:     if not re.match(r"^[A-Za-z0-9\.\+_-]+@[A-Za-z0-9\._-]+\.[a-zA-Z]*$", address):
  3:         return False
  4:     return True
  5:


pprint(generate("""
def add(p1, p2):
    \"""Adds two elliptic curve points in affine coordinates.\""" """), 0, 16)

  0: def add(p1, p2):
  1:     """Adds two elliptic curve points in affine coordinates."""
  2:     if p1 == INFINITY:
  3:         return p2
  4:     if p2 == INFINITY:
  5:         return p1
  6:     x1, y1 = p1
  7:     x2, y2 = p2
  8:     if x1 == x2 and y1 == y2:
  9:         return double(p1)
 10:     elif x1 == x2:
 11:         return INFINITY
 12:     s = (y2 - y1) / (x2 - x1)
 13:     x3 = s**2 - x1 - x2
 14:     y3 = s * (x1 - x3) - y1
 15:     return (x3, y3)
 16:


pprint(generate("""
def is_prime(a):
    \"""Determines whether the input a is a prime number.\""" """), 0, 13)

  0: def is_prime(a):
  1:     """Determines whether the input a is a prime number."""
  2:     if a < 2:
  3:         return False
  4:     elif a == 2:
  5:         return True
  6:     elif a % 2 == 0:
  7:         return False
  8:     else:
  9:         for i in range(3, int(a ** 0.5) + 1, 2):
 10:             if a % i == 0:
 11:                 return False
 12:         return True
 13:


pprint(generate("""
def sqrt(input, N):
    \"""
    Calculates the square root of the input integer modulo N, where N is an
    odd prime.
    \""" """), 0, 21)

  0: def sqrt(input, N):
  1:     """
  2:     Calculates the square root of the input integer modulo N, where N is an
  3:     odd prime.
  4:     """
  5:     assert isinstance(input, int)
  6:     assert isinstance(N, int)
  7:     assert N > 1
  8:     assert is_prime(N)
  9: 
 10:     if input == 0:
 11:         return 0
 12:     elif input == 1:
 13:         return 1
 14:     elif input == 4:
 15:         return 2
 16:     elif input == 9:
 17:         return 3
 18:     elif input == 16:
 19:         return 4
 20:     elif input == 25:
 21:         return 5


pprint(generate("""
def sqrt(input, N):
    \"""
    Calculates the square root of the positive input integer modulo N using 
    the Tonelli-Shanks algorithm, where N is an odd prime. 
    \""" """), 0, 22)

  0: def sqrt(input, N):
  1:     """
  2:     Calculates the square root of the positive input integer modulo N using 
  3:     the Tonelli-Shanks algorithm, where N is an odd prime. 
  4:     """
  5:     if not isinstance(input, int):
  6:         raise TypeError("Input must be an integer.")
  7:     if not isinstance(N, int):
  8:         raise TypeError("N must be an odd prime integer.")
  9:     if input < 0:
 10:         raise ValueError("Input must be a positive integer.")
 11:     if N <= 1:
 12:         raise ValueError("N must be an odd prime integer.")
 13:     if not is_prime(N):
 14:         raise ValueError("N must be an odd prime integer.")
 15:     if input == 0:
 16:         return 0
 17:     if input == 1:
 18:         return 1
 19:     if input == 2:
 20:         return 2
 21:     if input == 3:
 22:         return 3


pprint(generate("""
def deserialize(input, N):
    \"""Deserializes a bytearray input into an integer modulo N.\""" """), 0, 6)

  0: def deserialize(input, N):
  1:     """Deserializes a bytearray input into an integer modulo N."""
  2:     x = 0
  3:     for c in input:
  4:         x = (x << 8) | c
  5:     return x % N
  6:


# Unable to prompt for code that raises exception for result >= N
pprint(generate("""
def deserialize(bytes, N):
    \"""
    Deserializes a little-endian array of exactly 16 bytes into an  
    unsigned integer modulo N. This function raises a TypeError for
    any input type other than a bytearray. This function raises a 
    ValueError for an incorrectly sized input.
    \""" """), 0, 12)

  0: def deserialize(bytes, N):
  1:     """
  2:     Deserializes a little-endian array of exactly 16 bytes into an  
  3:     unsigned integer modulo N. This function raises a TypeError for
  4:     any input type other than a bytearray. This function raises a 
  5:     ValueError for an incorrectly sized input.
  6:     """
  7:     if not isinstance(bytes, bytearray):
  8:         raise TypeError("Input must be a bytearray")
  9:     if len(bytes)!= 16:
 10:         raise ValueError("Input must be exactly 16 bytes")
 11:     return int.from_bytes(bytes, "little", signed=False) % N
 12:


pprint(generate("""
def pbkdf(passphrase):
    \"""
    This key derivation function converts a passphrase into an ecryption key.
    The input passphrase is a unicode string and the output key is a bytearray
    of length 16.
    \""" """), 0, 10)

  0: def pbkdf(passphrase):
  1:     """
  2:     This key derivation function converts a passphrase into an ecryption key.
  3:     The input passphrase is a unicode string and the output key is a bytearray
  4:     of length 16.
  5:     """
  6:     salt = bytearray(os.urandom(16))
  7:     passphrase = passphrase.encode('utf-8')
  8:     key = hashlib.pbkdf2_hmac('sha256', passphrase, salt, 100000)
  9:     return salt + key
 10:


pprint(generate("""
def encrypt(data, key):
    \"""
    Encrypts an input data string with a key. The key must be a bytearray
    of length 16.
    \""" """), 0, 7)

  0: def encrypt(data, key):
  1:     """
  2:     Encrypts an input data string with a key. The key must be a bytearray
  3:     of length 16.
  4:     """
  5:     aes = AES.new(key, AES.MODE_ECB)
  6:     return aes.encrypt(data)
  7:


pprint(generate("""
def aes_decrypt(ciphertext, key):
    \"""
    Decrypts the input ciphertext with a key using AES in GCM mode. The key 
    must be a bytearray of length 16.
    \""" """), 0, 13)

  0: def aes_decrypt(ciphertext, key):
  1:     """
  2:     Decrypts the input ciphertext with a key using AES in GCM mode. The key 
  3:     must be a bytearray of length 16.
  4:     """
  5:     iv = ciphertext[:AES.block_size]
  6:     cipher = AES.new(key, AES.MODE_GCM, iv)
  7:     plaintext = cipher.decrypt(ciphertext[AES.block_size:])
  8:     try:
  9:         cipher.verify(plaintext[-16:])
 10:         return plaintext[:-16]
 11:     except ValueError:
 12:         raise ValueError("Invalid key or ciphertext!")
 13:


# def greatest_common_denominator(a, b):
pprint(generate("""
def gcd(a, b):
    \"""
    Finds the greatest common denominator of two positive non-zero integers 
    a and b. This function raises a TypeError if both inputs are not integers.
    \""" """), 0, 20)

  0: def gcd(a, b):
  1:     """
  2:     Finds the greatest common denominator of two positive non-zero integers 
  3:     a and b. This function raises a TypeError if both inputs are not integers.
  4:     """
  5:     if not isinstance(a, int) or not isinstance(b, int):
  6:         raise TypeError
  7:     if a < 0 or b < 0:
  8:         raise ValueError
  9:     if a == 0 and b == 0:
 10:         raise ValueError
 11:     if a == 0:
 12:         return b
 13:     if b == 0:
 14:         return a
 15:     if a < b:
 16:         a, b = b, a
 17:     while b!= 0:
 18:         a, b = b, a % b
 19:     return a
 20:


pprint(generate("""
Write a unit test for a function that calculates the greatest common 
denominator of two positive integers. """), 0, 23)

  0: Write a unit test for a function that calculates the greatest common 
  1: denominator of two positive integers.
  2: """
  3: 
  4: import unittest
  5: from gcd import gcd
  6: 
  7: 
  8: class TestGcd(unittest.TestCase):
  9:     def test_gcd_of_1_and_1(self):
 10:         self.assertEqual(1, gcd(1, 1))
 11: 
 12:     def test_gcd_of_2_and_2(self):
 13:         self.assertEqual(2, gcd(2, 2))
 14: 
 15:     def test_gcd_of_3_and_3(self):
 16:         self.assertEqual(3, gcd(3, 3))
 17: 
 18:     def test_gcd_of_4_and_4(self):
 19:         self.assertEqual(4, gcd(4, 4))
 20: 
 21:     def test_gcd_of_5_and_5(self):
 22:         self.assertEqual(5, gcd(5, 5))
 23:


pprint(model_summary, 0, 2000)

  0: ===========================================================================
  1: Layer (type:depth-idx)                             Param #
  2: ===========================================================================
  3: CodeGenForCausalLM                                 --
  4: ├─CodeGenModel: 1-1                                --
  5: │    └─Embedding: 2-1                              314,572,800
  6: │    └─Dropout: 2-2                                --
  7: │    └─ModuleList: 2-3                             --
  8: │    │    └─CodeGenBlock: 3-1                      --
  9: │    │    │    └─LayerNorm: 4-1                    12,288
 10: │    │    │    └─CodeGenAttention: 4-2             --
 11: │    │    │    │    └─Dropout: 5-1                 --
 12: │    │    │    │    └─Dropout: 5-2                 --
 13: │    │    │    │    └─Linear: 5-3                  113,246,208
 14: │    │    │    │    └─Linear: 5-4                  37,748,736
 15: │    │    │    └─CodeGenMLP: 4-3                   --
 16: │    │    │    │    └─Linear: 5-5                  151,019,520
 17: │    │    │    │    └─Linear: 5-6                  151,001,088
 18: │    │    │    │    └─NewGELUActivation: 5-7       --
 19: │    │    │    │    └─Dropout: 5-8                 --
 20: │    │    └─CodeGenBlock: 3-2                      --
 21: │    │    │    └─LayerNorm: 4-4                    12,288
 22: │    │    │    └─CodeGenAttention: 4-5             --
 23: │    │    │    │    └─Dropout: 5-9                 --
 24: │    │    │    │    └─Dropout: 5-10                --
 25: │    │    │    │    └─Linear: 5-11                 113,246,208
 26: │    │    │    │    └─Linear: 5-12                 37,748,736
 27: │    │    │    └─CodeGenMLP: 4-6                   --
 28: │    │    │    │    └─Linear: 5-13                 151,019,520
 29: │    │    │    │    └─Linear: 5-14                 151,001,088
 30: │    │    │    │    └─NewGELUActivation: 5-15      --
 31: │    │    │    │    └─Dropout: 5-16                --
 32: │    │    └─CodeGenBlock: 3-3                      --
 33: │    │    │    └─LayerNorm: 4-7                    12,288
 34: │    │    │    └─CodeGenAttention: 4-8             --
 35: │    │    │    │    └─Dropout: 5-17                --
 36: │    │    │    │    └─Dropout: 5-18                --
 37: │    │    │    │    └─Linear: 5-19                 113,246,208
 38: │    │    │    │    └─Linear: 5-20                 37,748,736
 39: │    │    │    └─CodeGenMLP: 4-9                   --
 40: │    │    │    │    └─Linear: 5-21                 151,019,520
 41: │    │    │    │    └─Linear: 5-22                 151,001,088
 42: │    │    │    │    └─NewGELUActivation: 5-23      --
 43: │    │    │    │    └─Dropout: 5-24                --
 44: │    │    └─CodeGenBlock: 3-4                      --
 45: │    │    │    └─LayerNorm: 4-10                   12,288
 46: │    │    │    └─CodeGenAttention: 4-11            --
 47: │    │    │    │    └─Dropout: 5-25                --
 48: │    │    │    │    └─Dropout: 5-26                --
 49: │    │    │    │    └─Linear: 5-27                 113,246,208
 50: │    │    │    │    └─Linear: 5-28                 37,748,736
 51: │    │    │    └─CodeGenMLP: 4-12                  --
 52: │    │    │    │    └─Linear: 5-29                 151,019,520
 53: │    │    │    │    └─Linear: 5-30                 151,001,088
 54: │    │    │    │    └─NewGELUActivation: 5-31      --
 55: │    │    │    │    └─Dropout: 5-32                --
 56: │    │    └─CodeGenBlock: 3-5                      --
 57: │    │    │    └─LayerNorm: 4-13                   12,288
 58: │    │    │    └─CodeGenAttention: 4-14            --
 59: │    │    │    │    └─Dropout: 5-33                --
 60: │    │    │    │    └─Dropout: 5-34                --
 61: │    │    │    │    └─Linear: 5-35                 113,246,208
 62: │    │    │    │    └─Linear: 5-36                 37,748,736
 63: │    │    │    └─CodeGenMLP: 4-15                  --
 64: │    │    │    │    └─Linear: 5-37                 151,019,520
 65: │    │    │    │    └─Linear: 5-38                 151,001,088
 66: │    │    │    │    └─NewGELUActivation: 5-39      --
 67: │    │    │    │    └─Dropout: 5-40                --
 68: │    │    └─CodeGenBlock: 3-6                      --
 69: │    │    │    └─LayerNorm: 4-16                   12,288
 70: │    │    │    └─CodeGenAttention: 4-17            --
 71: │    │    │    │    └─Dropout: 5-41                --
 72: │    │    │    │    └─Dropout: 5-42                --
 73: │    │    │    │    └─Linear: 5-43                 113,246,208
 74: │    │    │    │    └─Linear: 5-44                 37,748,736
 75: │    │    │    └─CodeGenMLP: 4-18                  --
 76: │    │    │    │    └─Linear: 5-45                 151,019,520
 77: │    │    │    │    └─Linear: 5-46                 151,001,088
 78: │    │    │    │    └─NewGELUActivation: 5-47      --
 79: │    │    │    │    └─Dropout: 5-48                --
 80: │    │    └─CodeGenBlock: 3-7                      --
 81: │    │    │    └─LayerNorm: 4-19                   12,288
 82: │    │    │    └─CodeGenAttention: 4-20            --
 83: │    │    │    │    └─Dropout: 5-49                --
 84: │    │    │    │    └─Dropout: 5-50                --
 85: │    │    │    │    └─Linear: 5-51                 113,246,208
 86: │    │    │    │    └─Linear: 5-52                 37,748,736
 87: │    │    │    └─CodeGenMLP: 4-21                  --
 88: │    │    │    │    └─Linear: 5-53                 151,019,520
 89: │    │    │    │    └─Linear: 5-54                 151,001,088
 90: │    │    │    │    └─NewGELUActivation: 5-55      --
 91: │    │    │    │    └─Dropout: 5-56                --
 92: │    │    └─CodeGenBlock: 3-8                      --
 93: │    │    │    └─LayerNorm: 4-22                   12,288
 94: │    │    │    └─CodeGenAttention: 4-23            --
 95: │    │    │    │    └─Dropout: 5-57                --
 96: │    │    │    │    └─Dropout: 5-58                --
 97: │    │    │    │    └─Linear: 5-59                 113,246,208
 98: │    │    │    │    └─Linear: 5-60                 37,748,736
 99: │    │    │    └─CodeGenMLP: 4-24                  --
100: │    │    │    │    └─Linear: 5-61                 151,019,520
101: │    │    │    │    └─Linear: 5-62                 151,001,088
102: │    │    │    │    └─NewGELUActivation: 5-63      --
103: │    │    │    │    └─Dropout: 5-64                --
104: │    │    └─CodeGenBlock: 3-9                      --
105: │    │    │    └─LayerNorm: 4-25                   12,288
106: │    │    │    └─CodeGenAttention: 4-26            --
107: │    │    │    │    └─Dropout: 5-65                --
108: │    │    │    │    └─Dropout: 5-66                --
109: │    │    │    │    └─Linear: 5-67                 113,246,208
110: │    │    │    │    └─Linear: 5-68                 37,748,736
111: │    │    │    └─CodeGenMLP: 4-27                  --
112: │    │    │    │    └─Linear: 5-69                 151,019,520
113: │    │    │    │    └─Linear: 5-70                 151,001,088
114: │    │    │    │    └─NewGELUActivation: 5-71      --
115: │    │    │    │    └─Dropout: 5-72                --
116: │    │    └─CodeGenBlock: 3-10                     --
117: │    │    │    └─LayerNorm: 4-28                   12,288
118: │    │    │    └─CodeGenAttention: 4-29            --
119: │    │    │    │    └─Dropout: 5-73                --
120: │    │    │    │    └─Dropout: 5-74                --
121: │    │    │    │    └─Linear: 5-75                 113,246,208
122: │    │    │    │    └─Linear: 5-76                 37,748,736
123: │    │    │    └─CodeGenMLP: 4-30                  --
124: │    │    │    │    └─Linear: 5-77                 151,019,520
125: │    │    │    │    └─Linear: 5-78                 151,001,088
126: │    │    │    │    └─NewGELUActivation: 5-79      --
127: │    │    │    │    └─Dropout: 5-80                --
128: │    │    └─CodeGenBlock: 3-11                     --
129: │    │    │    └─LayerNorm: 4-31                   12,288
130: │    │    │    └─CodeGenAttention: 4-32            --
131: │    │    │    │    └─Dropout: 5-81                --
132: │    │    │    │    └─Dropout: 5-82                --
133: │    │    │    │    └─Linear: 5-83                 113,246,208
134: │    │    │    │    └─Linear: 5-84                 37,748,736
135: │    │    │    └─CodeGenMLP: 4-33                  --
136: │    │    │    │    └─Linear: 5-85                 151,019,520
137: │    │    │    │    └─Linear: 5-86                 151,001,088
138: │    │    │    │    └─NewGELUActivation: 5-87      --
139: │    │    │    │    └─Dropout: 5-88                --
140: │    │    └─CodeGenBlock: 3-12                     --
141: │    │    │    └─LayerNorm: 4-34                   12,288
142: │    │    │    └─CodeGenAttention: 4-35            --
143: │    │    │    │    └─Dropout: 5-89                --
144: │    │    │    │    └─Dropout: 5-90                --
145: │    │    │    │    └─Linear: 5-91                 113,246,208
146: │    │    │    │    └─Linear: 5-92                 37,748,736
147: │    │    │    └─CodeGenMLP: 4-36                  --
148: │    │    │    │    └─Linear: 5-93                 151,019,520
149: │    │    │    │    └─Linear: 5-94                 151,001,088
150: │    │    │    │    └─NewGELUActivation: 5-95      --
151: │    │    │    │    └─Dropout: 5-96                --
152: │    │    └─CodeGenBlock: 3-13                     --
153: │    │    │    └─LayerNorm: 4-37                   12,288
154: │    │    │    └─CodeGenAttention: 4-38            --
155: │    │    │    │    └─Dropout: 5-97                --
156: │    │    │    │    └─Dropout: 5-98                --
157: │    │    │    │    └─Linear: 5-99                 113,246,208
158: │    │    │    │    └─Linear: 5-100                37,748,736
159: │    │    │    └─CodeGenMLP: 4-39                  --
160: │    │    │    │    └─Linear: 5-101                151,019,520
161: │    │    │    │    └─Linear: 5-102                151,001,088
162: │    │    │    │    └─NewGELUActivation: 5-103     --
163: │    │    │    │    └─Dropout: 5-104               --
164: │    │    └─CodeGenBlock: 3-14                     --
165: │    │    │    └─LayerNorm: 4-40                   12,288
166: │    │    │    └─CodeGenAttention: 4-41            --
167: │    │    │    │    └─Dropout: 5-105               --
168: │    │    │    │    └─Dropout: 5-106               --
169: │    │    │    │    └─Linear: 5-107                113,246,208
170: │    │    │    │    └─Linear: 5-108                37,748,736
171: │    │    │    └─CodeGenMLP: 4-42                  --
172: │    │    │    │    └─Linear: 5-109                151,019,520
173: │    │    │    │    └─Linear: 5-110                151,001,088
174: │    │    │    │    └─NewGELUActivation: 5-111     --
175: │    │    │    │    └─Dropout: 5-112               --
176: │    │    └─CodeGenBlock: 3-15                     --
177: │    │    │    └─LayerNorm: 4-43                   12,288
178: │    │    │    └─CodeGenAttention: 4-44            --
179: │    │    │    │    └─Dropout: 5-113               --
180: │    │    │    │    └─Dropout: 5-114               --
181: │    │    │    │    └─Linear: 5-115                113,246,208
182: │    │    │    │    └─Linear: 5-116                37,748,736
183: │    │    │    └─CodeGenMLP: 4-45                  --
184: │    │    │    │    └─Linear: 5-117                151,019,520
185: │    │    │    │    └─Linear: 5-118                151,001,088
186: │    │    │    │    └─NewGELUActivation: 5-119     --
187: │    │    │    │    └─Dropout: 5-120               --
188: │    │    └─CodeGenBlock: 3-16                     --
189: │    │    │    └─LayerNorm: 4-46                   12,288
190: │    │    │    └─CodeGenAttention: 4-47            --
191: │    │    │    │    └─Dropout: 5-121               --
192: │    │    │    │    └─Dropout: 5-122               --
193: │    │    │    │    └─Linear: 5-123                113,246,208
194: │    │    │    │    └─Linear: 5-124                37,748,736
195: │    │    │    └─CodeGenMLP: 4-48                  --
196: │    │    │    │    └─Linear: 5-125                151,019,520
197: │    │    │    │    └─Linear: 5-126                151,001,088
198: │    │    │    │    └─NewGELUActivation: 5-127     --
199: │    │    │    │    └─Dropout: 5-128               --
200: │    │    └─CodeGenBlock: 3-17                     --
201: │    │    │    └─LayerNorm: 4-49                   12,288
202: │    │    │    └─CodeGenAttention: 4-50            --
203: │    │    │    │    └─Dropout: 5-129               --
204: │    │    │    │    └─Dropout: 5-130               --
205: │    │    │    │    └─Linear: 5-131                113,246,208
206: │    │    │    │    └─Linear: 5-132                37,748,736
207: │    │    │    └─CodeGenMLP: 4-51                  --
208: │    │    │    │    └─Linear: 5-133                151,019,520
209: │    │    │    │    └─Linear: 5-134                151,001,088
210: │    │    │    │    └─NewGELUActivation: 5-135     --
211: │    │    │    │    └─Dropout: 5-136               --
212: │    │    └─CodeGenBlock: 3-18                     --
213: │    │    │    └─LayerNorm: 4-52                   12,288
214: │    │    │    └─CodeGenAttention: 4-53            --
215: │    │    │    │    └─Dropout: 5-137               --
216: │    │    │    │    └─Dropout: 5-138               --
217: │    │    │    │    └─Linear: 5-139                113,246,208
218: │    │    │    │    └─Linear: 5-140                37,748,736
219: │    │    │    └─CodeGenMLP: 4-54                  --
220: │    │    │    │    └─Linear: 5-141                151,019,520
221: │    │    │    │    └─Linear: 5-142                151,001,088
222: │    │    │    │    └─NewGELUActivation: 5-143     --
223: │    │    │    │    └─Dropout: 5-144               --
224: │    │    └─CodeGenBlock: 3-19                     --
225: │    │    │    └─LayerNorm: 4-55                   12,288
226: │    │    │    └─CodeGenAttention: 4-56            --
227: │    │    │    │    └─Dropout: 5-145               --
228: │    │    │    │    └─Dropout: 5-146               --
229: │    │    │    │    └─Linear: 5-147                113,246,208
230: │    │    │    │    └─Linear: 5-148                37,748,736
231: │    │    │    └─CodeGenMLP: 4-57                  --
232: │    │    │    │    └─Linear: 5-149                151,019,520
233: │    │    │    │    └─Linear: 5-150                151,001,088
234: │    │    │    │    └─NewGELUActivation: 5-151     --
235: │    │    │    │    └─Dropout: 5-152               --
236: │    │    └─CodeGenBlock: 3-20                     --
237: │    │    │    └─LayerNorm: 4-58                   12,288
238: │    │    │    └─CodeGenAttention: 4-59            --
239: │    │    │    │    └─Dropout: 5-153               --
240: │    │    │    │    └─Dropout: 5-154               --
241: │    │    │    │    └─Linear: 5-155                113,246,208
242: │    │    │    │    └─Linear: 5-156                37,748,736
243: │    │    │    └─CodeGenMLP: 4-60                  --
244: │    │    │    │    └─Linear: 5-157                151,019,520
245: │    │    │    │    └─Linear: 5-158                151,001,088
246: │    │    │    │    └─NewGELUActivation: 5-159     --
247: │    │    │    │    └─Dropout: 5-160               --
248: │    │    └─CodeGenBlock: 3-21                     --
249: │    │    │    └─LayerNorm: 4-61                   12,288
250: │    │    │    └─CodeGenAttention: 4-62            --
251: │    │    │    │    └─Dropout: 5-161               --
252: │    │    │    │    └─Dropout: 5-162               --
253: │    │    │    │    └─Linear: 5-163                113,246,208
254: │    │    │    │    └─Linear: 5-164                37,748,736
255: │    │    │    └─CodeGenMLP: 4-63                  --
256: │    │    │    │    └─Linear: 5-165                151,019,520
257: │    │    │    │    └─Linear: 5-166                151,001,088
258: │    │    │    │    └─NewGELUActivation: 5-167     --
259: │    │    │    │    └─Dropout: 5-168               --
260: │    │    └─CodeGenBlock: 3-22                     --
261: │    │    │    └─LayerNorm: 4-64                   12,288
262: │    │    │    └─CodeGenAttention: 4-65            --
263: │    │    │    │    └─Dropout: 5-169               --
264: │    │    │    │    └─Dropout: 5-170               --
265: │    │    │    │    └─Linear: 5-171                113,246,208
266: │    │    │    │    └─Linear: 5-172                37,748,736
267: │    │    │    └─CodeGenMLP: 4-66                  --
268: │    │    │    │    └─Linear: 5-173                151,019,520
269: │    │    │    │    └─Linear: 5-174                151,001,088
270: │    │    │    │    └─NewGELUActivation: 5-175     --
271: │    │    │    │    └─Dropout: 5-176               --
272: │    │    └─CodeGenBlock: 3-23                     --
273: │    │    │    └─LayerNorm: 4-67                   12,288
274: │    │    │    └─CodeGenAttention: 4-68            --
275: │    │    │    │    └─Dropout: 5-177               --
276: │    │    │    │    └─Dropout: 5-178               --
277: │    │    │    │    └─Linear: 5-179                113,246,208
278: │    │    │    │    └─Linear: 5-180                37,748,736
279: │    │    │    └─CodeGenMLP: 4-69                  --
280: │    │    │    │    └─Linear: 5-181                151,019,520
281: │    │    │    │    └─Linear: 5-182                151,001,088
282: │    │    │    │    └─NewGELUActivation: 5-183     --
283: │    │    │    │    └─Dropout: 5-184               --
284: │    │    └─CodeGenBlock: 3-24                     --
285: │    │    │    └─LayerNorm: 4-70                   12,288
286: │    │    │    └─CodeGenAttention: 4-71            --
287: │    │    │    │    └─Dropout: 5-185               --
288: │    │    │    │    └─Dropout: 5-186               --
289: │    │    │    │    └─Linear: 5-187                113,246,208
290: │    │    │    │    └─Linear: 5-188                37,748,736
291: │    │    │    └─CodeGenMLP: 4-72                  --
292: │    │    │    │    └─Linear: 5-189                151,019,520
293: │    │    │    │    └─Linear: 5-190                151,001,088
294: │    │    │    │    └─NewGELUActivation: 5-191     --
295: │    │    │    │    └─Dropout: 5-192               --
296: │    │    └─CodeGenBlock: 3-25                     --
297: │    │    │    └─LayerNorm: 4-73                   12,288
298: │    │    │    └─CodeGenAttention: 4-74            --
299: │    │    │    │    └─Dropout: 5-193               --
300: │    │    │    │    └─Dropout: 5-194               --
301: │    │    │    │    └─Linear: 5-195                113,246,208
302: │    │    │    │    └─Linear: 5-196                37,748,736
303: │    │    │    └─CodeGenMLP: 4-75                  --
304: │    │    │    │    └─Linear: 5-197                151,019,520
305: │    │    │    │    └─Linear: 5-198                151,001,088
306: │    │    │    │    └─NewGELUActivation: 5-199     --
307: │    │    │    │    └─Dropout: 5-200               --
308: │    │    └─CodeGenBlock: 3-26                     --
309: │    │    │    └─LayerNorm: 4-76                   12,288
310: │    │    │    └─CodeGenAttention: 4-77            --
311: │    │    │    │    └─Dropout: 5-201               --
312: │    │    │    │    └─Dropout: 5-202               --
313: │    │    │    │    └─Linear: 5-203                113,246,208
314: │    │    │    │    └─Linear: 5-204                37,748,736
315: │    │    │    └─CodeGenMLP: 4-78                  --
316: │    │    │    │    └─Linear: 5-205                151,019,520
317: │    │    │    │    └─Linear: 5-206                151,001,088
318: │    │    │    │    └─NewGELUActivation: 5-207     --
319: │    │    │    │    └─Dropout: 5-208               --
320: │    │    └─CodeGenBlock: 3-27                     --
321: │    │    │    └─LayerNorm: 4-79                   12,288
322: │    │    │    └─CodeGenAttention: 4-80            --
323: │    │    │    │    └─Dropout: 5-209               --
324: │    │    │    │    └─Dropout: 5-210               --
325: │    │    │    │    └─Linear: 5-211                113,246,208
326: │    │    │    │    └─Linear: 5-212                37,748,736
327: │    │    │    └─CodeGenMLP: 4-81                  --
328: │    │    │    │    └─Linear: 5-213                151,019,520
329: │    │    │    │    └─Linear: 5-214                151,001,088
330: │    │    │    │    └─NewGELUActivation: 5-215     --
331: │    │    │    │    └─Dropout: 5-216               --
332: │    │    └─CodeGenBlock: 3-28                     --
333: │    │    │    └─LayerNorm: 4-82                   12,288
334: │    │    │    └─CodeGenAttention: 4-83            --
335: │    │    │    │    └─Dropout: 5-217               --
336: │    │    │    │    └─Dropout: 5-218               --
337: │    │    │    │    └─Linear: 5-219                113,246,208
338: │    │    │    │    └─Linear: 5-220                37,748,736
339: │    │    │    └─CodeGenMLP: 4-84                  --
340: │    │    │    │    └─Linear: 5-221                151,019,520
341: │    │    │    │    └─Linear: 5-222                151,001,088
342: │    │    │    │    └─NewGELUActivation: 5-223     --
343: │    │    │    │    └─Dropout: 5-224               --
344: │    │    └─CodeGenBlock: 3-29                     --
345: │    │    │    └─LayerNorm: 4-85                   12,288
346: │    │    │    └─CodeGenAttention: 4-86            --
347: │    │    │    │    └─Dropout: 5-225               --
348: │    │    │    │    └─Dropout: 5-226               --
349: │    │    │    │    └─Linear: 5-227                113,246,208
350: │    │    │    │    └─Linear: 5-228                37,748,736
351: │    │    │    └─CodeGenMLP: 4-87                  --
352: │    │    │    │    └─Linear: 5-229                151,019,520
353: │    │    │    │    └─Linear: 5-230                151,001,088
354: │    │    │    │    └─NewGELUActivation: 5-231     --
355: │    │    │    │    └─Dropout: 5-232               --
356: │    │    └─CodeGenBlock: 3-30                     --
357: │    │    │    └─LayerNorm: 4-88                   12,288
358: │    │    │    └─CodeGenAttention: 4-89            --
359: │    │    │    │    └─Dropout: 5-233               --
360: │    │    │    │    └─Dropout: 5-234               --
361: │    │    │    │    └─Linear: 5-235                113,246,208
362: │    │    │    │    └─Linear: 5-236                37,748,736
363: │    │    │    └─CodeGenMLP: 4-90                  --
364: │    │    │    │    └─Linear: 5-237                151,019,520
365: │    │    │    │    └─Linear: 5-238                151,001,088
366: │    │    │    │    └─NewGELUActivation: 5-239     --
367: │    │    │    │    └─Dropout: 5-240               --
368: │    │    └─CodeGenBlock: 3-31                     --
369: │    │    │    └─LayerNorm: 4-91                   12,288
370: │    │    │    └─CodeGenAttention: 4-92            --
371: │    │    │    │    └─Dropout: 5-241               --
372: │    │    │    │    └─Dropout: 5-242               --
373: │    │    │    │    └─Linear: 5-243                113,246,208
374: │    │    │    │    └─Linear: 5-244                37,748,736
375: │    │    │    └─CodeGenMLP: 4-93                  --
376: │    │    │    │    └─Linear: 5-245                151,019,520
377: │    │    │    │    └─Linear: 5-246                151,001,088
378: │    │    │    │    └─NewGELUActivation: 5-247     --
379: │    │    │    │    └─Dropout: 5-248               --
380: │    │    └─CodeGenBlock: 3-32                     --
381: │    │    │    └─LayerNorm: 4-94                   12,288
382: │    │    │    └─CodeGenAttention: 4-95            --
383: │    │    │    │    └─Dropout: 5-249               --
384: │    │    │    │    └─Dropout: 5-250               --
385: │    │    │    │    └─Linear: 5-251                113,246,208
386: │    │    │    │    └─Linear: 5-252                37,748,736
387: │    │    │    └─CodeGenMLP: 4-96                  --
388: │    │    │    │    └─Linear: 5-253                151,019,520
389: │    │    │    │    └─Linear: 5-254                151,001,088
390: │    │    │    │    └─NewGELUActivation: 5-255     --
391: │    │    │    │    └─Dropout: 5-256               --
392: │    │    └─CodeGenBlock: 3-33                     --
393: │    │    │    └─LayerNorm: 4-97                   12,288
394: │    │    │    └─CodeGenAttention: 4-98            --
395: │    │    │    │    └─Dropout: 5-257               --
396: │    │    │    │    └─Dropout: 5-258               --
397: │    │    │    │    └─Linear: 5-259                113,246,208
398: │    │    │    │    └─Linear: 5-260                37,748,736
399: │    │    │    └─CodeGenMLP: 4-99                  --
400: │    │    │    │    └─Linear: 5-261                151,019,520
401: │    │    │    │    └─Linear: 5-262                151,001,088
402: │    │    │    │    └─NewGELUActivation: 5-263     --
403: │    │    │    │    └─Dropout: 5-264               --
404: │    │    └─CodeGenBlock: 3-34                     --
405: │    │    │    └─LayerNorm: 4-100                  12,288
406: │    │    │    └─CodeGenAttention: 4-101           --
407: │    │    │    │    └─Dropout: 5-265               --
408: │    │    │    │    └─Dropout: 5-266               --
409: │    │    │    │    └─Linear: 5-267                113,246,208
410: │    │    │    │    └─Linear: 5-268                37,748,736
411: │    │    │    └─CodeGenMLP: 4-102                 --
412: │    │    │    │    └─Linear: 5-269                151,019,520
413: │    │    │    │    └─Linear: 5-270                151,001,088
414: │    │    │    │    └─NewGELUActivation: 5-271     --
415: │    │    │    │    └─Dropout: 5-272               --
416: │    └─LayerNorm: 2-4                              12,288
417: ├─Linear: 1-2                                      314,624,000
418: ===========================================================================
419: Total params: 16,032,155,648
420: Trainable params: 16,032,155,648
421: Non-trainable params: 0
422: ===========================================================================

Machine Learning 103: Exploring LLM Code Generation¶

Initialization¶

The transformer architecture under the hood¶

1. Tokenizer¶

2. Word embeddings¶

3. Repeated blocks¶

4 Output layer¶

LLM code generation¶

1. Unit conversion and programming challenges¶

2 Specialized math¶

3 Deserialization¶

Code with unit tests¶

Conclusion¶

Machine Learning 103: Exploring LLM Code Generation¶

Initialization¶

The transformer architecture under the hood¶

1. Tokenizer¶

2. Word embeddings¶

3. Repeated blocks¶

4 Output layer¶

LLM code generation¶

1. Unit conversion and programming challenges¶

2 Specialized math¶

3 Deserialization¶

4 Encryption related functionality¶

Code with unit tests¶

Conclusion¶