From 42f01acac1a9ad8b4d93087afa24b2cee80d90a1 Mon Sep 17 00:00:00 2001 From: Jeffrey Ke Date: Thu, 13 Mar 2025 17:31:33 -0400 Subject: [PATCH 1/4] Added -p flag to fix breaking mkdir command --- setup.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.sh b/setup.sh index 83aaab7..86c6bf0 100755 --- a/setup.sh +++ b/setup.sh @@ -1,6 +1,6 @@ python -m pip install -r requirements.txt -mkdir $1/cleanfid/ -mkdir $1/cleanfid/stats/ +mkdir -p $1/cleanfid/ +mkdir -p $1/cleanfid/stats/ mkdir -p datasets/ rm -rf datasets/* # clear directory content gdown https://drive.google.com/uc\?id\=1hbzc_P1FuxMkcabkgn9ZKinBwW683j45 -O datasets/ From 05fe92843bfd9f516a36714428ff744e41242478 Mon Sep 17 00:00:00 2001 From: Jeffrey Ke Date: Sun, 16 Mar 2025 12:30:14 -0400 Subject: [PATCH 2/4] figured out downsampleconv2d with all the pixel shuffling --- gan/.gitignore | 1 + gan/networks.py | 70 +++++++++++++++++++++++++++++++++++++++++++------ gan/q1_3.py | 1 - 3 files changed, 63 insertions(+), 9 deletions(-) create mode 100644 gan/.gitignore diff --git a/gan/.gitignore b/gan/.gitignore new file mode 100644 index 0000000..1377554 --- /dev/null +++ b/gan/.gitignore @@ -0,0 +1 @@ +*.swp diff --git a/gan/networks.py b/gan/networks.py index 97e57f9..d2bccf0 100644 --- a/gan/networks.py +++ b/gan/networks.py @@ -27,6 +27,10 @@ def forward(self, x): # (batch, channel, height*upscale_factor, width*upscale_factor) # 3. Apply convolution and return output ################################################################## + x = x.repeat([1,self.upscale_factor**2,1,1]) + shuf = torch.nn.PixelShuffle(upscale_factor=self.upscale_factor) + out = shuf(x) + return self.conv(out) pass ################################################################## # END OF YOUR CODE # @@ -54,6 +58,15 @@ def forward(self, x): # 3. Take the average across dimension 0, apply convolution, # and return the output ################################################################## + downshuf = torch.nn.PixelUnshuffle(downscale_factor=self.downscale_ratio) + out = downshuf(x) + B,C_rr,H,W = out.shape + C = C_rr // self.downscale_ratio + out = out.view(B, self.downscale_ratio**2, C, H, W) + out = torch.permute(out, (1, 0, 2, 3, 4)) + out = torch.mean(out, dim=0) + out = self.conv(out) + return out pass ################################################################## # END OF YOUR CODE # @@ -83,7 +96,15 @@ def __init__(self, input_channels, kernel_size=3, n_filters=128): ################################################################## # TODO 1.1: Setup the network layers ################################################################## - self.layers = None + self.upsample_residual = UpSampleConv2D(input_channels, n_filters, kernel_size) + self.layers = torch.nn.Sequential( + torch.nn.BatchNorm2d(input_channels), # apparently I need to tell it how many channels there are + torch.nn.ReLU(), + torch.nn.Conv2d(input_channels, n_filters, kernel_size, bias=False, padding=1), + torch.nn.BatchNorm2d(n_filters), + torch.nn.ReLU(), + UpSampleConv2D(input_channels=n_filters, n_filters=n_filters), + ) ################################################################## # END OF YOUR CODE # ################################################################## @@ -95,7 +116,17 @@ def forward(self, x): # connection. Make sure to upsample the residual before adding it # to the layer output. ################################################################## - pass + # aha I see: this is a res block, so the input needs to be added + # to the output: + # this is a clever idea: to make a residual connection a block, + # so you can just add res connections as blocks in later layers + # and they even give me a helpful upsample_residual thing (wait, can I even use it?) + # I think this block assumes a conv layer. + out = self.layers(x) + in_upsampled = self.up_sample_residual(x) # I NEED the conv2d for the res connection + # because out had n_filters # of channels, + # whereas x has input_channels + return out + in_upsampled ################################################################## # END OF YOUR CODE # ################################################################## @@ -122,7 +153,17 @@ def __init__(self, input_channels, kernel_size=3, n_filters=128): ################################################################## # TODO 1.1: Setup the network layers ################################################################## - self.layers = None + self.downsample_residual = DownSampleConv2D(input_channels=n_filters, + n_filters=n_filters, + kernel_size=1, + padding=1 + ) + self.layers = torch.nn.Sequential( + torch.nn.ReLU(), + torch.nn.Conv2d(input_channels, n_filters, kernel_size, padding=1), + torch.nn.ReLU(), + DownSampleConv2D(input_channels=n_filters, n_filters=n_filters,), + ) ################################################################## # END OF YOUR CODE # ################################################################## @@ -134,7 +175,8 @@ def forward(self, x): # connection. Make sure to downsample the residual before adding # it to the layer output. ################################################################## - pass + out = self.layers(x) + return out + self.down_sample_residual(x) ################################################################## # END OF YOUR CODE # ################################################################## @@ -157,7 +199,15 @@ def __init__(self, input_channels, kernel_size=3, n_filters=128): ################################################################## # TODO 1.1: Setup the network layers ################################################################## - self.layers = None + self.layers = torch.nn.Sequential( + torch.nn.ReLU(), # how come there aren't any batch norms here? + torch.nn.Conv2d(input_channels, n_filters, kernel_size, padding=1), + torch.nn.ReLU(), + torch.nn.Conv2d(n_filters, n_filters, kernel_size, padding=1) + ) + self.reduce_channels = torch.nn.Conv2d(input_channels, n_filters, kernel_size=1) + # dimension check: if the input is (C1, H, W) and we have a kernel size of (3,3) and padding 1: + # we'll have an output shape of : (C2, H, W): we have equivalent shapes. ################################################################## # END OF YOUR CODE # ################################################################## @@ -168,7 +218,8 @@ def forward(self, x): # TODO 1.1: Forward the conv layers. Don't forget the residual # connection! ################################################################## - pass + out = self.layers(x) + return out + self.reduce_channels(x) ################################################################## # END OF YOUR CODE # ################################################################## @@ -237,8 +288,11 @@ def __init__(self, starting_image_size=4): # TODO 1.1: Set up the network layers. You should use the modules # you have implemented previously above. ################################################################## - self.dense = None - self.layers = None + self.dense = torch.nn.Linear(128, 2048) + self.layers = torch.nn.Sequential( + ResBlockUp(input_channels=128, n_filters=128) + # ah don't read the wrong layer of indent. A resblock up goes here. + # don't I have to reshape the noise? ################################################################## # END OF YOUR CODE # ################################################################## diff --git a/gan/q1_3.py b/gan/q1_3.py index 6cc92e5..641f490 100644 --- a/gan/q1_3.py +++ b/gan/q1_3.py @@ -1,7 +1,6 @@ import argparse import os from utils import get_args - import torch from networks import Discriminator, Generator From b67fa74d646af2dd21f7e49ecd27b51596ae54bd Mon Sep 17 00:00:00 2001 From: Jeffrey Ke Date: Sun, 16 Mar 2025 18:17:57 -0400 Subject: [PATCH 3/4] proofread --- gan/networks.py | 93 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 28 deletions(-) diff --git a/gan/networks.py b/gan/networks.py index d2bccf0..a01d29d 100644 --- a/gan/networks.py +++ b/gan/networks.py @@ -29,8 +29,10 @@ def forward(self, x): ################################################################## x = x.repeat([1,self.upscale_factor**2,1,1]) shuf = torch.nn.PixelShuffle(upscale_factor=self.upscale_factor) - out = shuf(x) - return self.conv(out) + after_shuf = shuf(x) + after_conv = self.conv(after_shuf) # why the .repeat command doesn't screw us here: + # PixelShuffle reverts the number of channels to x's original channel count was + return after_conv pass ################################################################## # END OF YOUR CODE # @@ -59,14 +61,14 @@ def forward(self, x): # and return the output ################################################################## downshuf = torch.nn.PixelUnshuffle(downscale_factor=self.downscale_ratio) - out = downshuf(x) - B,C_rr,H,W = out.shape - C = C_rr // self.downscale_ratio - out = out.view(B, self.downscale_ratio**2, C, H, W) - out = torch.permute(out, (1, 0, 2, 3, 4)) - out = torch.mean(out, dim=0) - out = self.conv(out) - return out + after_downshuf = downshuf(x) + B,C_rr,H,W = after_downshuf.shape + C = C_rr // self.downscale_ratio**2 + after_reshape = after_downshuf.view(B, self.downscale_ratio**2, C, H, W) + after_permute = torch.permute(after_reshape, (1, 0, 2, 3, 4)) + after_mean = torch.mean(after_permute, dim=0) + after_conv = self.conv(after_mean) + return after_conv pass ################################################################## # END OF YOUR CODE # @@ -103,7 +105,7 @@ def __init__(self, input_channels, kernel_size=3, n_filters=128): torch.nn.Conv2d(input_channels, n_filters, kernel_size, bias=False, padding=1), torch.nn.BatchNorm2d(n_filters), torch.nn.ReLU(), - UpSampleConv2D(input_channels=n_filters, n_filters=n_filters), + UpSampleConv2D(input_channels=n_filters, n_filters=n_filters, padding=1), ) ################################################################## # END OF YOUR CODE # @@ -122,11 +124,11 @@ def forward(self, x): # so you can just add res connections as blocks in later layers # and they even give me a helpful upsample_residual thing (wait, can I even use it?) # I think this block assumes a conv layer. - out = self.layers(x) - in_upsampled = self.up_sample_residual(x) # I NEED the conv2d for the res connection - # because out had n_filters # of channels, + after_layers = self.layers(x) + res = self.upsample_residual(x) # I NEED the conv2d for the res connection + # because after_layers had n_filters # of channels, # whereas x has input_channels - return out + in_upsampled + return after_layers + res ################################################################## # END OF YOUR CODE # ################################################################## @@ -153,16 +155,15 @@ def __init__(self, input_channels, kernel_size=3, n_filters=128): ################################################################## # TODO 1.1: Setup the network layers ################################################################## - self.downsample_residual = DownSampleConv2D(input_channels=n_filters, + self.downsample_residual = DownSampleConv2D(input_channels=input_channels, n_filters=n_filters, kernel_size=1, - padding=1 ) self.layers = torch.nn.Sequential( torch.nn.ReLU(), torch.nn.Conv2d(input_channels, n_filters, kernel_size, padding=1), torch.nn.ReLU(), - DownSampleConv2D(input_channels=n_filters, n_filters=n_filters,), + DownSampleConv2D(input_channels=n_filters, n_filters=n_filters, padding=1), ) ################################################################## # END OF YOUR CODE # @@ -175,8 +176,9 @@ def forward(self, x): # connection. Make sure to downsample the residual before adding # it to the layer output. ################################################################## - out = self.layers(x) - return out + self.down_sample_residual(x) + after_layers = self.layers(x) + res = self.downsample_residual(x) + return after_layers + res ################################################################## # END OF YOUR CODE # ################################################################## @@ -218,8 +220,9 @@ def forward(self, x): # TODO 1.1: Forward the conv layers. Don't forget the residual # connection! ################################################################## - out = self.layers(x) - return out + self.reduce_channels(x) + after_layers = self.layers(x) + res = self.reduce_channels(x) + return after_layers + res ################################################################## # END OF YOUR CODE # ################################################################## @@ -290,7 +293,15 @@ def __init__(self, starting_image_size=4): ################################################################## self.dense = torch.nn.Linear(128, 2048) self.layers = torch.nn.Sequential( - ResBlockUp(input_channels=128, n_filters=128) + ResBlockUp(input_channels=128, n_filters=128), + ResBlockUp(input_channels=128, n_filters=128), + ResBlockUp(input_channels=128, n_filters=128), + torch.nn.BatchNorm2d(128), + torch.nn.ReLU(), + torch.nn.Conv2d(128, 3, kernel_size=3, padding=1), + torch.nn.Tanh() + ) + self.starting_image_size = starting_image_size # ah don't read the wrong layer of indent. A resblock up goes here. # don't I have to reshape the noise? ################################################################## @@ -304,7 +315,13 @@ def forward_given_samples(self, z): # been passed in. Don't forget to re-shape the output of the dense # layer into an image with the appropriate size! ################################################################## - pass + # shape is going to be (N,z) where z is 128 + after_linear = self.dense(z) + after_reshape = after_linear.view(-1, 128, self.starting_image_size, self.starting_image_size) + # needs to be 128 channels because that's whay resblockup is expecting + after_layers = self.layers(after_reshape) + return after_layers + ################################################################## # END OF YOUR CODE # ################################################################## @@ -315,7 +332,9 @@ def forward(self, n_samples: int = 1024): # TODO 1.1: Generate n_samples latents and forward through the # network. ################################################################## - pass + samples = torch.randn(n_samples, 128) + after_forward_given_samples = self.forward_given_samples(samples) + return after_forward_given_samples ################################################################## # END OF YOUR CODE # ################################################################## @@ -379,8 +398,16 @@ def __init__(self): # TODO 1.1: Set up the network layers. You should use the modules # you have implemented previously above. ################################################################## - self.dense = None - self.layers = None + self.dense = torch.nn.Linear(128, 1) + self.layers = torch.nn.Sequential( + ResBlockDown(input_channels=3, n_filters=128), + ResBlockDown(input_channels=128, n_filters=128), # downscaling by 2 + ResBlock(input_channels=128, n_filters=128), + ResBlock(input_channels=128, n_filters=128), + torch.nn.ReLU() + ) + + ################################################################## # END OF YOUR CODE # ################################################################## @@ -392,7 +419,17 @@ def forward(self, x): # have been passed in. Make sure to sum across the image # dimensions after passing x through self.layers. ################################################################## - pass + # x shape is (N, 3, H, W) + # I have N images, trying to output (N,1) + # don't I want sigmoid? I'm outputting probability of fake/real? + # maybe I'm adding sigmoid later? + after_layers = self.layers(x) + # shape is (N, 128, H/4, W/4), need to go to (N, 128) + after_sum = torch.sum(after_layers, dim=(2,3)) + after_dense = self.dense(after_sum) + return after_dense + + ################################################################## # END OF YOUR CODE # ################################################################## From cde84b5f20a8a2cfda09d5085687ab36d31eee4f Mon Sep 17 00:00:00 2001 From: Jeffrey Ke Date: Sun, 16 Mar 2025 18:18:10 -0400 Subject: [PATCH 4/4] added lr scheduler --- gan/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gan/train.py b/gan/train.py index eb2ce23..0ab3ce6 100644 --- a/gan/train.py +++ b/gan/train.py @@ -28,8 +28,8 @@ def get_optimizers_and_schedulers(gen, disc): # The learning rate for the generator should be decayed to 0 over # 100K iterations. ################################################################## - scheduler_discriminator = None - scheduler_generator = None + scheduler_discriminator = torch.optim.lr_scheduler.CosineAnnealingLR(optim_discriminator, 500000)# what are the schedulers that the later gan papers used? + scheduler_generator = torch.optim.lr_scheduler.CosineAnnealingLR(optim_generator, 100000) ################################################################## # END OF YOUR CODE # ##################################################################