diff --git a/CITATION b/CITATION
index 3139b19..9c29544 100644
--- a/CITATION
+++ b/CITATION
@@ -1,3 +1,3 @@
 Please cite as:
 
-Ed Bennett, Lester Hedges, Matt Williams, "Introduction to automated testing and continuous integration in Python"
+Ed Bennett, Lester Hedges, Julian Lenz, Matt Williams, "Introduction to automated testing and continuous integration in Python"
diff --git a/_episodes/02-pytest-functionality.md b/_episodes/02-pytest-functionality.md
index af90e2c..7f7a238 100644
--- a/_episodes/02-pytest-functionality.md
+++ b/_episodes/02-pytest-functionality.md
@@ -36,7 +36,7 @@ Lets add a second test to check a different set of inputs and outputs to the
 ~~~
 from arrays import add_arrays
 
-def test_add_arrays1():
+def test_add_arrays_positive():
     a = [1, 2, 3]
     b = [4, 5, 6]
     expect = [5, 7, 9]
@@ -45,7 +45,7 @@ def test_add_arrays1():
 
     assert output == expect
 
-def test_add_arrays2():
+def test_add_arrays_negative():
     a = [-1, -5, -3]
     b = [-4, -3, 0]
     expect = [-5, -8, -3]
@@ -73,8 +73,8 @@ rootdir: /home/matt/projects/courses/software_engineering_best_practices
 plugins: requests-mock-1.8.0
 collected 2 items
 
-test_arrays.py::test_add_arrays1 PASSED              [ 50%]
-test_arrays.py::test_add_arrays2 PASSED              [100%]
+test_arrays.py::test_add_arrays_positive PASSED              [ 50%]
+test_arrays.py::test_add_arrays_negative PASSED              [100%]
 
 ==================== 2 passed in 0.07s =====================
 ~~~
@@ -166,6 +166,57 @@ test_arrays.py::test_add_arrays[a1-b1-expect1] PASSED [100%]
 
 We see that both tests have the same name (`test_arrays.py::test_add_arrays`)
 but each parametrization is differentiated with some square brackets.
+Unfortunately, in the current form this differentiation is not very helpful. If
+you run this test later, you might not remember what `a0-b0-expect0` means, let
+alone the precise numbers or the motivation for choosing them. Were that the
+positive inputs or the negative ones? Did I choose them after fixing a
+particular bug or because it is an important use case or were those just random
+numbers? 
+
+Luckily, we are not the first ones to realise that the above form of
+parametrization misses the expressiveness of explicit function names. That's why
+there is an additional `ids` keyword argument: The following code
+
+~~~
+import pytest
+
+from arrays import add_arrays
+
+@pytest.mark.parametrize("a, b, expect", [
+    ([1, 2, 3],    [4, 5, 6],   [5, 7, 9]),
+    ([-1, -5, -3], [-4, -3, 0], [-5, -8, -3]),
+    ids=['positve','negative']
+])
+def test_add_arrays(a, b, expect):
+    output = add_arrays(a, b)
+
+    assert output == expect
+~~~
+{: .language-python}
+
+now results in the significantly more expressive
+
+~~~
+=================== test session starts ====================
+platform linux -- Python 3.8.5, pytest-6.0.1, py-1.9.0, pluggy-0.13.1 -- /usr/bin/python3
+cachedir: .pytest_cache
+rootdir: /home/matt/projects/courses/software_engineering_best_practices
+plugins: requests-mock-1.8.0
+collected 2 items
+
+test_arrays.py::test_add_arrays[positive] PASSED [ 50%]
+test_arrays.py::test_add_arrays[negative] PASSED [100%]
+
+==================== 2 passed in 0.03s =====================
+~~~
+{: .output}
+
+If the arguments are better representable as a string than our example with
+lists here, `pytest` often does a reasonably good job in generating `ids`
+automatically from the values (we will see some examples of this in the next
+section). But this still lacks the intentional communication that is associated
+with manually chosen `ids`, so we strongly recommend to use `ids` in all but the
+most trivial cases.
 
 > ## More parameters
 >
@@ -185,6 +236,7 @@ but each parametrization is differentiated with some square brackets.
 >>     ([-1, -5, -3], [-4, -3, 0], [-5, -8, -3]), # Test zeros
 >>     ([41, 0, 3], [4, 76, 32], [45, 76, 35]), # Test larger numbers
 >>     ([], [], []), # Test empty lists
+>>     ids=["positive", "negative", "larger numbers", "empty lists"]
 >> ])
 >> def test_add_arrays(a, b, expect):
 >>     output = add_arrays(a, b)
@@ -195,7 +247,6 @@ but each parametrization is differentiated with some square brackets.
 > {: .solution}
 {: .challenge}
 
-
 ## Failing correctly
 
 The interface of a function is made up of the _parameters_ it expects and the
@@ -280,6 +331,7 @@ from arrays import add_arrays
 @pytest.mark.parametrize("a, b, expect", [
     ([1, 2, 3],    [4, 5, 6],   [5, 7, 9]),
     ([-1, -5, -3], [-4, -3, 0], [-5, -8, -3]),
+    ids=["positive", "negative"]
 ])
 def test_add_arrays(a, b, expect):
     output = add_arrays(a, b)
@@ -307,8 +359,8 @@ rootdir: /home/matt/projects/courses/software_engineering_best_practices
 plugins: requests-mock-1.8.0
 collected 3 items
 
-test_arrays.py::test_add_arrays[a0-b0-expect0] PASSED [ 33%]
-test_arrays.py::test_add_arrays[a1-b1-expect1] PASSED [ 66%]
+test_arrays.py::test_add_arrays[positive] PASSED [ 33%]
+test_arrays.py::test_add_arrays[negative] PASSED [ 66%]
 test_arrays.py::test_add_arrays_error PASSED         [100%]
 
 ==================== 3 passed in 0.03s =====================
@@ -326,6 +378,7 @@ test_arrays.py::test_add_arrays_error PASSED         [100%]
 >> @pytest.mark.parametrize("a, b, expected_error", [
 >>     ([1, 2, 3], [4, 5], ValueError),
 >>     ([1, 2], [4, 5, 6], ValueError),
+>>     ids=['second shorter','first shorter']
 >> ])
 >> def test_add_arrays_error(a, b, expected_error):
 >>     with pytest.raises(expected_error):
@@ -354,6 +407,7 @@ test_arrays.py::test_add_arrays_error PASSED         [100%]
 >>     ([6], [3], [2]), # Test single-element lists
 >>     ([1, 2, 3], [4, 5, 6], [0.25, 0.4, 0.5]), # Test non-integers
 >>     ([], [], []), # Test empty lists
+>>     ids=["int", "negative int", "single-element", "non-int", "empty lists"]
 >> ])
 >> def test_divide_arrays(a, b, expect):
 >>     output = divide_arrays(a, b)
@@ -365,6 +419,7 @@ test_arrays.py::test_add_arrays_error PASSED         [100%]
 >>     ([1, 2, 3], [4, 5], ValueError),
 >>     ([1, 2], [4, 5, 6], ValueError),
 >>     ([1, 2, 3], [0, 1, 2], ZeroDivisionError),
+>>     ids=['second shorter', 'first shorter', 'zero division']
 >> ])
 >> def test_divide_arrays_error(a, b, expected_error):
 >>     with pytest.raises(expected_error):
diff --git a/_episodes/03-fixtures.md b/_episodes/03-fixtures.md
index 1d4658f..89cc0c3 100644
--- a/_episodes/03-fixtures.md
+++ b/_episodes/03-fixtures.md
@@ -414,7 +414,8 @@ being done once.
 > behaviour of the tests, and pytest prioritises correctness of the tests over
 > their performance.
 >
-> What sort of behavior would functions have that failed in this way?
+> What sort of behavior would functions have that failed in this way? Can you
+> come up with example code for this?
 >
 >> ## Solution
 >>
@@ -425,6 +426,80 @@ being done once.
 >>
 >> Fixtures should only be re-used within groups of tests that do not mutate
 >> them.
+>>
+>> ~~~
+>> @pytest.fixture(scope="session")
+>> def initially_empty_list():
+>>     return []
+>> 
+>> 
+>> @pytest.mark.parametrize("letter", ["a", "b", "c"])
+>> def test_append_letter(initially_empty_list, letter):
+>>     initially_empty_list.append(letter)
+>>     assert initially_empty_list == [letter]
+>> ~~~
+>> {:. language-python}
+> {: .solution}
+{: .challenge}
+
+> ## Better ways to (unit) test
+>
+> The above example was explicitly constructed to acquire an expensive resource
+> and exhibit a big advantage when using a fixture but is it actually a good
+> way to test the `word_counts` function? Think about what the `word_counts` is
+> supposed to do. Do you need a whole book to test this?
+> 
+> List advantages and disadvantages of the above approach. Then, come up with
+> another way of testing it that cures the disadvantages (maybe also loosing
+> some of the advantages). Is your approach simpler and less error-prone?
+>
+> It is safe to assume that whenever to test such a function, it is supposed to
+> be used in a larger project. Can you think of a test scenario where the
+> original method is the best?
+>
+>> ## Solution
+>> 
+>> The `word_counts` function is designed to count words in any string. It does
+>> not need a whole book to test counting, so we could have also used tiny test
+>> strings like `""`, `"hello world"`, `"hello, hello world"` to test all
+>> functionality of `word_counts`. In fact, the original approach has a number
+>> of disadvantages:
+>>
+>> * It is (time) expensive because it needs to download the book every time the
+>>   test suite is run. (2s for a test is a very long time if you want to run
+>>   that a test suite of hundreds of those every few minutes.)
+>> * It is brittle regarding various aspects:
+>>   - If you don't have an internet connection, your test fails.
+>>   - If the URL changes, your test fails.
+>>   - If the content changes, your test fails (we had that a few times).
+>> * It is very obscure because you cannot know if the numbers we have given you
+>>   are correct. Maybe the function has a bug that we don't know about because
+>>   admittedly we also just used the output of that function to generate our
+>>   test cases.
+>> 
+>> The one big advantage of the above is that you are using realistic test data.
+>> As opposed to the string `"hello world"`, the book likely contains a lot of
+>> different words, potentially different capitalisation and spellings,
+>> additional punctuation and maybe special characters that your function may or
+>> may not handle correctly. You might need a lot of different test strings to
+>> cover all these cases (and combinations thereof).
+>> 
+>> The alternative approach with tiny test strings cures all of the above
+>> listed disadvantages and the tests will be easy to read, understand and
+>> verify particularly if you use expressive test function names and parameters
+>> `ids`. This is the best way to write a unit test, i.e. a test that is
+>> concerned with this single unit of functionality in isolation and will likely
+>> be run hundreds of times during a coding session.
+>>
+>> Nevertheless, in a bigger project you would want to have other kinds of
+>> tests, too. The `word_counts` functionality will probably be integrated into
+>> a larger aspect of functionality, e.g., a statistical analysis of books. In
+>> such a case, it is equally important to test that the integration of the
+>> various individually tested units worked correctly. Such integration tests
+>> will be run less often than unit tests and might be more meaningful for more
+>> realistic circumstances. For such -- and definitely for the even broader
+>> end-to-end tests that run a whole program from the (simulated) user input to
+>> a final output -- the original approach is well-suited.
 > {: .solution}
 {: .challenge}
 
diff --git a/_episodes/04-edges.md b/_episodes/04-edges.md
index 7b4228c..f19270b 100644
--- a/_episodes/04-edges.md
+++ b/_episodes/04-edges.md
@@ -114,7 +114,7 @@ def test_left_edge():
     assert c.neighbours() == 3
 
     # Check the coordinates of the neighbours.
-    assert c.left()  == None
+    assert c.left()  is None
     assert c.right() == (1, 2)
     assert c.up()    == (0, 3)
     assert c.down()  == (0, 1)
@@ -146,10 +146,10 @@ def test_bottom_left_corner():
     assert c.neighbours() == 2
 
     # Check the coordinates of the neighbours.
-    assert c.left()  == None
+    assert c.left()  is None
     assert c.right() == (1, 0)
     assert c.up()    == (0, 1)
-    assert c.down()  == None
+    assert c.down()  is None
 ~~~
 {: .language-python}
 
diff --git a/_episodes/05-randomness.md b/_episodes/05-randomness.md
index 4e40863..a3eab52 100644
--- a/_episodes/05-randomness.md
+++ b/_episodes/05-randomness.md
@@ -173,11 +173,14 @@ that it is relatively bug-free for the cases we've tested for. Of course, so far
 we've only tested 6-sided dice&mdash;we have no guarantee that it works for
 other numbers of sides, yet.
 
-You can extend this approach to any programming problem where you don't know the
-exact answer up front, including those that are random and those that are just
-exploratory. Start by focusing on what you do know, and write tests for that. As
-you understand more what the expected results are, you can expand the test
-suite.
+The important upshot of this approach is that despite the fact that we could not
+predict the exact return value of our function, we were still able to test for
+exactly known invariants and guarantees upheld by it. You can extend this
+approach to any programming problem where the exact return value of a function
+cannot be meaningfully tested for, including those that are random or out of
+your control and those that are just exploratory. Start by focusing on what you
+do know, and write tests for that. As you understand more what the expected
+results are, you can expand the test suite.
 
 > ## Two six-sided dice
 >
diff --git a/_episodes/06-continuous-integration.md b/_episodes/06-continuous-integration.md
index 67c7011..03e3033 100644
--- a/_episodes/06-continuous-integration.md
+++ b/_episodes/06-continuous-integration.md
@@ -338,6 +338,23 @@ next to the first commit, a green tick (passed) next to the second, and nothing
 > check all code against a defined house style (for example, PEP 8).
 {: .callout}
 
+> ## pre-commit
+>
+> Another helpful developer tool somewhat related to CI is
+> [pre-commit][pre-commit] (or more generally `git` hooks). They allow to
+> perform certain actions locally when triggered by various `git` related events
+> like before or after a commit, merge, push, etc. A standard use-case is
+> running automated formatters or code linters before every commit/push but
+> other things are possible, too, like updating a version number. One major
+> difference with respect to CI is that each developer on your team has to
+> manually install the hooks themselves and, thus, could choose to not do so. As
+> opposed to a CI in a central repository, `git` hooks are therefore not capable
+> of enforcing anything but are a pure convenience for the programmer while CI
+> could be used to reject pushes or pull requests automatically. Furthermore,
+> you are supposed to commit often and, hence, committing should be a fast and
+> lightweight action. Therefore, the pre-commit developers explicitly discourage
+> running expensive test suites as a pre-commit hook.
+> {: .callout}
 
 > ## Try it yourself
 >
@@ -366,3 +383,4 @@ next to the first commit, a green tick (passed) next to the second, and nothing
 [pypi]: https://pypi.org
 [starter-workflows]: https://github.com/actions/starter-workflows
 [yaml]: https://en.wikipedia.org/wiki/YAML
+[pre-commit]: https://pre-commit.com
diff --git a/_episodes/07-coverage.md b/_episodes/07-coverage.md
index 57be482..30f6d6d 100644
--- a/_episodes/07-coverage.md
+++ b/_episodes/07-coverage.md
@@ -98,6 +98,8 @@ the consistency checks in the `__init__()` method of `Cell`, and methods such as
 methods) to have at least one test, so this test suite would benefit from being
 expanded.
 
+> ## How much coverage do I need?
+>
 > It's worth pointing out again that 100% coverage is not essential for a good
 > test suite. If the coverage is below 100%, then that indicates that it's worth
 > understanding where the uncovered lines of code are, and whether it is worth
@@ -116,6 +118,22 @@ expanded.
 > between projects.
 {: .callout}
 
+> ## Configuring `coverage`
+>
+> `coverage` and `pytest-cov` are configurable via a toml file called
+> `.coveragerc` by default. Various details about behaviour and output can be
+> adjusted there. Most notably, explicit exceptions can be defined that exclude
+> certain files, blocks or lines from the coverage report.
+>
+> This is useful in various situation; you can, e.g., exclude the test files
+> from the coverage report to reduce noise or change the commandline output.
+>
+> Another opinionated idea is to indeed aim for 100% code coverage but
+> explicitly exclude what you consider unimportant in your testing. While
+> opponents say that is just cheating, you had to make a concious decision to
+> exclude a piece of code and explicitly documented it in a file (with a comment
+> explaining the decision in the best case).
+{: .callout}
 
 ## Coverage and continuous integration
 
diff --git a/_episodes/08-exercise.md b/_episodes/08-exercise.md
index 691242e..5654f70 100644
--- a/_episodes/08-exercise.md
+++ b/_episodes/08-exercise.md
@@ -1,12 +1,14 @@
 ---
 title: "Putting it all together"
-teaching: 5
+teaching: 10
 exercises: 90
 questions:
 - "How can I apply all of these techniques at once to a real application?"
 objectives:
 - "Be able to apply testing and CI techniques to a piece of research software."
 keypoints:
+- "Tests can have very different purposes and you should keep in mind the broad
+  applicability of automated testing."
 - "Testing and CI work well together to identify problems in research software and allow them to be fixed quickly."
 - "If anything is unclear, or you get stuck, please ask for help!"
 ---
@@ -15,6 +17,87 @@ Now we have developed a range of skills relating to testing and continuous
 integration, we can try putting them into practice on a real piece of research
 software.
 
+But before we do so, let us quickly recap what we learnt today.
+
+## The purpose of a test
+
+When you write a test you do this to gain (and in the case of automated tests
+maintain) confidence into the correctness of your code. But in detail your tests
+can serve a variety of purposes. It can be useful to keep in mind what you could
+use tests for during your coding, so we compiled a certainly non-exhaustive list
+of test purposes here. The major ones were discussed in the lesson; some more
+exotic ones should be seen as suggestions for you to try. A test can have more
+than one of these purposes:
+
+* **test for features** - The first and simplest test that is capable of verifying
+  the correctness of any feature (in the broadest sense) of your code. This is
+  the obvious purpose of a test and encountered everywhere in the lesson.
+* **test for confidence (in a narrow sense)** - Additional tests of the same
+  features that are redundant in that they repeat a test for features with
+  qualitatively similar input just to double-check. This is also encountered in
+  the lesson, e.g. in the solution of "More parameters" in [pytest
+  features][pytest_features] large numbers aren't really that different from
+  other numbers unless you run into overflow errors (one could even argue that
+  testing the negative numbers in [pytest features][pytest_features] is not
+  qualitatively different and rather  to double check). One should be aware of
+  the difference between testing for confidence and necessary feature tests. The
+  former is a convenience that comes at the cost of longer test runs and so it
+  is not always desirable to test redundantly (although certainly better than
+  missing an aspect).
+* **test for edge-/corner-cases** - Test special input or conditions for which a
+  general algorithm needs to be specialised (e.g., NaNs, infinities, overflows,
+  empty input, etc.). We did a [whole episode][edge_cases] on this.
+* **test for failures** - This is part of feature testing but important enough to
+  mention explicitly: The conditions under which your code fails are part of
+  your interface and need to be tested. The user (that probably includes
+  yourself) might rely on a raised exception or returned default value in the
+  case of failure. Make sure they can and think of all the cases that your
+  current approach cannot handle. Any changes in these (even those for the
+  better) are changes of the interface and should appear intentionally. This was
+  discussed in [pytest features][pytest_features].
+* **fuzzy testing** - This is broader than testing for failures; if you have
+  unexperienced or even malicious users of your project, they might run your
+  code with inputs or under conditions that do not make any sense at all and are
+  almost impossible to predict. Fuzzy testing is a strategy where you let the
+  computer run your code with random input (sometimes down to the bit level) and
+  make sure that not even the most far-fetched input can break your code. There
+  are libraries for that, so you don't have to set up all the boilerplate
+  yourself.
+* **regression test** - After you have found a bug, you can write a test reproducing
+  the precise conditions under which the bug appeared. Once you fixed it, your
+  test will work fine and if a later change risks introducing this bug again, you
+  can rest assured that it will be immediately signalled by a failing test.
+* **test as a reminder** - In most contemporary test frameworks, you can mark a test
+  as an "expected failure". Such tests are run during your standard test runs
+  but the test framework will complain if they don't fail. This can be a
+  convenient way of marking a to-do or a known bug that you don't have time to
+  fix at the moment. It will preserve your precise intention, e.g., the precise
+  conditions of the bug in code form and it might be an important information if
+  a bug disappeared unexpectedly. Maybe another code change had an effect you
+  did not intend?
+* **test for fixing an external interface** - You can even test code did not write
+  yourself. If you rely on a particular library, you don't have control over the
+  evolution of that library, so it can be a good idea to write a few test cases
+  that just use the interface of that library as you do it in your code. If they
+  ever change or deprecate something about that interface, you don't have to
+  chase down a rabbit hole of function calls to get the bottom of that but
+  instead have a (hopefully well-named) test that immediately signals where the
+  problem lies.
+* **test for learning an external interface** - When you start using a new library,
+  you might play around with it for a while before using it in production just
+  to learn how it's used. Why not preserve this in automated tests? You have the
+  same effect as if you, e.g., wrote a script or used an interactive session but
+  you can come back and have a look at it again later. Also, you immediately fix
+  the external interface (see previous item).
+
+This is list certainly not something you want to implement as a whole. Some of
+the purposes might simply not apply (e.g. fuzzy testing if you don't have
+external users) or might not be worth the extra effort (e.g. fixing an external
+interface that is expected to be very stable). But you might find yourself in a
+situation where some of these are appropriate tools for your problem and you
+might want to come back from time to time and refresh your memory. That said,
+let's dive into the final exercise.
+
 ## The software
 
 We are going to work with `pl_curves`, a piece of research software developed by
@@ -34,10 +117,16 @@ the different bacteria are. It already has tests written for most functions.
 >    the badges.
 > 4. Create a virtual environment on your computer for the project, and install
 >    the project's requirements, so you can run the test suite locally.
-> 5. Currently, some of the tests for the repository fail. Work out why this is
+> 5. The current code is very outdated by now and you will see in a moment that
+>    it does not work with a standard contemporary python installation anymore.
+>    Assuming for a moment the tests would not exist, how would you feel about
+>    the task of updating the code to run _correctly_ on a modern machine? Where
+>    would you start? How confident would you feel that each and every line of
+>    code works as intended?
+> 6. Now we turn to the tests. Some of them fail currently. Work out why this is
 >    happening, and fix the issues. Check that they are fixed in the CI workflow
 >    as well.
-> 6. Currently, the code is only tested for Python versions up to 3.6. Since
+> 7. Currently, the code is only tested for Python versions up to 3.6. Since
 >    Python has moved on now, add 3.7, 3.8 and 3.9 as targets for the CI. Do the
 >    tests pass now? If not, identify what has caused them to fail, and fix the
 >    issues you identify. This is an important reason for having a test suite:
@@ -45,12 +134,12 @@ the different bacteria are. It already has tests written for most functions.
 >    Without a test suite, you don't know whether this has happened until
 >    someone points out that your new results don't match your older ones!
 >    Having CI set up allows easy testing of multiple different versions.
-> 7. Currently the code is being tested against Ubuntu 18.04 (released April 2018).
+> 8. Currently the code is being tested against Ubuntu 18.04 (released April 2018).
 >    A new long term support release of Ubuntu came out in April 2020 (version 20.04).
 >    Upgrade the operating system being tested from Ubuntu 18.04 to Ubuntu 20.04.
 >    As with upgrading Python, the test suite helps us check that the code still
 >    runs on a newer operating system.
-> 8. Upgrade to the most recent version of Pandas. Again, see if this breaks
+> 9. Upgrade to the most recent version of Pandas. Again, see if this breaks
 >    anything. If it does, then fix the issues, and ensure that the test suite
 >    passes again.
 >
@@ -64,3 +153,5 @@ the different bacteria are. It already has tests written for most functions.
 
 
 [pl-curves]: https://github.com/CDT-AIMLAC/pl_curves
+[pytest_features]: https://edbennett.github.io/python-testing-ci/02-pytest-functionality/index.html
+[edge_cases]: https://edbennett.github.io/python-testing-ci/04-edges/index.html
diff --git a/files/code-testing.zip b/files/code-testing.zip
index f946700..cf5e745 100644
Binary files a/files/code-testing.zip and b/files/code-testing.zip differ