diff --git a/README.md b/README.md new file mode 100644 index 0000000..bb3b988 --- /dev/null +++ b/README.md @@ -0,0 +1,57 @@ +# typst-theorems + +An implementation of numbered theorem environments in +[typst](https://github.com/typst/typst). +Copy and import the [theorems.typ](theorems.typ) file to use in your own projects. + +Minimal example below; also see [example.typ](example.typ) for a demonstration of more features, and [differential_calculus.typ](differential_calculus.typ) for a practical use case. + +![basic example](basic.png) + +``` +#import "theorems.typ": * + +#set page(width: 16cm, height: auto, margin: 1.5cm) +#set heading(numbering: "1.1.") + +#let theorem = thmbox("theorem", "Theorem", fill: rgb("#eeffee")) +#let corollary = thmplain( + "corollary", + "Corollary", + base: "theorem", + titlefmt: strong +) +#let definition = thmbox("definition", "Definition") + +#let example = thmplain("example", "Example").with(numbering: none) +#let proof = thmplain( + "proof", + "Proof", + base: "theorem", + bodyfmt: body => [#body #h(1fr) $square$] +).with(numbering: none) + + += Prime numbers + +#definition[ + A natural number is called a _prime number_ if it is greater than 1 + and cannot be written as the product of two smaller natural numbers. +] +#example[The numbers $2$, $3$, and $17$ are prime.] + +#theorem(name: "Euclid")[ + There are infinitely many primes. +] +#proof[ + Suppose to the contrary that $p_1, p_2, dots p_n$ is a finite enumeration + of all primes. Set $P = p_1 p_2 dots p_n$. Since $P + 1$ is not in our list, + it cannot be prime. Thus, some prime factor $p_j$ divides $P + 1$. + Since $p_j$ also divides $P$, it must divide the difference $(P + 1) - P = + 1$, a contradiction. +] + +#corollary[There is no largest prime number.] +#corollary[There are infinitely many composite numbers.] +``` + diff --git a/basic.pdf b/basic.pdf new file mode 100644 index 0000000..beef127 Binary files /dev/null and b/basic.pdf differ diff --git a/basic.png b/basic.png new file mode 100644 index 0000000..d2901ae Binary files /dev/null and b/basic.png differ diff --git a/basic.typ b/basic.typ new file mode 100644 index 0000000..b384903 --- /dev/null +++ b/basic.typ @@ -0,0 +1,51 @@ +#import "theorems.typ": * + +#set page(width: 16cm, height: auto, margin: 1.5cm) +#set text(font: "Linux Libertine", lang: "en") +#set heading(numbering: "1.1.") + +#let theorem = thmbox("theorem", "Theorem", fill: rgb("#eeffee")) +#let corollary = thmplain( + "corollary", + "Corollary", + base: "theorem", + titlefmt: strong +) +#let definition = thmbox("definition", "Definition") + +#let example = thmplain("example", "Example").with(numbering: none) +#let proof = thmplain( + "proof", + "Proof", + base: "theorem", + bodyfmt: body => [#body #h(1fr) $square$] +).with(numbering: none) + + += Prime numbers + +#definition[ + A natural number is called a _prime number_ if it is greater than 1 + and cannot be written as the product of two smaller natural numbers. +] +#example[ + The numbers $2$, $3$, and $17$ are prime. +] + +#theorem(name: "Euclid")[ + There are infinitely many primes. +] +#proof[ + Suppose to the contrary that $p_1, p_2, dots p_n$ is a finite enumeration of + all primes. Set $P = p_1 p_2 dots p_n$. Since $P + 1$ is not in + our list, it cannot be prime. Thus, some prime factor $p_j$ divides $P + 1$. + Since $p_j$ also divides $P$, it must divide the difference $(P + 1) - P = + 1$, a contradiction. +] + +#corollary[ + There is no largest prime number. +] +#corollary[ + There are infinitely many composite numbers. +] diff --git a/differential_calculus.pdf b/differential_calculus.pdf new file mode 100644 index 0000000..654db5f Binary files /dev/null and b/differential_calculus.pdf differ diff --git a/differential_calculus.typ b/differential_calculus.typ new file mode 100644 index 0000000..dbc9717 --- /dev/null +++ b/differential_calculus.typ @@ -0,0 +1,723 @@ +#import "theorems.typ": * + +// Define theorem environments + +#let theorem = thmbox( + "theorem", + "Theorem", + fill: rgb("#e8e8f8") +) +#let lemma = thmbox( + "theorem", // Lemmas use the same counter as Theorems + "Lemma", + fill: rgb("#efe6ff") +) +#let corollary = thmbox( + "corollary", + "Corollary", + base: "theorem", // Corollaries are 'attached' to Theorems + fill: rgb("#f8e8e8") +) + +#let definition = thmbox( + "definition", // Definitions use their own counter + "Definition", + fill: rgb("#e8f8e8") +) + +#let exercise = thmbox( + "exercise", + "Exercise", + stroke: rgb("#ffaaaa") + 1pt, + base: "", // Unattached: count globally +).with(numbering: "I") // Use Roman numerals + +// Examples and remarks are not numbered +#let example = thmplain("example", "Example").with(numbering: none) +#let remark = thmplain( + "remark", + "Remark", + inset: 0em +).with(numbering: none) + +// Proofs are attached to theorems, although they are not numbered +#let proof = thmplain( + "proof", + "Proof", + base: "theorem", + bodyfmt: body => [ + #body #h(1fr) $square$ // Insert QED symbol + ] +).with(numbering: none) + +#let solution = thmplain( + "solution", + "Solution", + base: "exercise", + inset: 0em, +).with(numbering: none) + + +// Template + +#let project(title: "", authors: (), body) = { + set document(author: authors, title: title) + set text(font: "Linux Libertine", lang: "en") + set heading(numbering: "1.1.", ) + set par(justify: true) + + show heading: it => [ + #v(1em) + #it + ] + + align(center)[ + #block(text(weight: 700, 1.75em, title)) + #v(2em) + ] + + outline(fill: none, indent: true) + + body +} + +// Shorthand for vectors +#let va = $bold(a)$ +#let vb = $bold(b)$ +#let vx = $bold(x)$ +#let vy = $bold(y)$ +#let vz = $bold(z)$ +#let vv = $bold(v)$ +#let vw = $bold(w)$ +#let ve = $bold(e)$ + +// Mapping arrow +#let mapsto = $arrow.r.bar$ + +// Operators +#let grad = (x) => $nabla #x$ +#let ip = (x, y) => $angle.l #x, #y angle.r$ +#let pp = (x, y) => $(diff #x) / (diff #y)$ +#let dd = (x, y) => $(d #x) / (d #y)$ +#let Df = $D f$ +#let Dg = $D g$ +#let DT = $D T$ + + +// Document starts here + +#show: project.with( + title: "Notes on Differential Calculus" +) + += Differentiability + +#definition[ + Let $f : (a, b) -> RR^n$, and let $f_i = pi_i compose f$ be its + components. Then, $f$ is differentiable at $t_0 in (a, b)$ if the + following limit exists. $ + f'(t_0) = lim _(h -> 0) frac(f(t_0 + h) - f(t_0), h) . + $ + #remark[ + The vector $f'(t_0)$ represents the tangent to the curve $f$ at the + point $f(t_0)$. The full tangent line is the parametric curve $f(t) + + f'(t_0)(t - t_0)$. + ] +] + +#definition[ + Let $U subset.eq RR^n$ be open, and let $f: U -> RR^m$. Then, $f$ is + differentiable at $x in U$ if there exists a linear transformation $ + lambda : RR^n -> RR^m$ such that $ + lim _(h -> 0) frac(f(x + h) - f(x) - lambda h, norm(h)) = 0. + $ The derivative of $f$ at $x$ is denoted by $lambda = Df(x)$. + #remark[ + In a neighbourhood of $x$, we may approximate $ + f(x + h) approx f(x) + Df(x)(h). + $ + ] + #remark[ + The statement that this quantity goes to zero means that each of the + $m$ components must also go to zero. For each of these limits, there + are $n$ axes along which we can let $h -> 0$. As a result, we obtain + $m times n$ limits, which allow us to identify the $m times n$ + components of the matrix representing the linear transformation + $lambda$ (in the standard basis). These are the partial derivatives of + $f$, and the matrix of $lambda$ is the Jacobian matrix of $f$ + evaluated at $x$. + ] +] + +#example[ + Let $T: RR^n -> RR^m$ be a linear map. By choosing $ lambda = T$, we see + that $T$ is differentiable everywhere, with $DT(x) = T$ for every choice + of $x in RR^n$. This is made obvious by the fact that the best linear + approximation of a linear map at some point is the map itself; indeed, the + 'approximation' is exact. +] + +#lemma[ + If $f: RR^n -> RR^m$ is differentiable at $x in RR^n$, with derivative + $Df(x)$, then + + $f$ is continuous at $x$. + + The linear transformation $Df(x)$ is unique. +] +#proof[ + We prove the second part. Suppose that $lambda$, $mu$ satisfy the + requirements for $Df(x)$; it can be shown that $ lim _(h -> 0) ( lambda - + mu )h \/ norm(h) = 0$. Now, if $ lambda v != mu v$ for some + non-zero vector $v in RR^n$, then $ + lambda v - mu v = frac(lambda (t v) - mu(t v), norm(t v)) + dot.c norm(v) -> 0, + $ a contradiction. +] + += Chain rule + +#exercise[ + Let $T: RR^n -> RR^m$ be a linear transformation. Then, there exists $M + > 0$ such that for all $ vx in RR^n$, we have $ + norm(T vx ) <= M norm( vx ) . + $ + #solution[ + Set $ vv _i = T( ve _i)$ where $ ve _i$ are the standard unit basis + vectors of $RR^n$. Then, $ + norm(T vx ) + = norm( sum _(i) x_i vv _i) + <= sum _(i) norm(x_i vv _i) + <= max norm( vv _i) sum _i |x_i|. + $ Since each $|x_i| <= norm( vx ) $, set $M = n max norm( vv _i) + $ and write $ + norm(T vx ) + <= max norm( vv _i) sum _i |x_i| + <= max norm( vv _i) dot.c n norm( vx ) + = M norm( vx ) . + $ + ] +] +#theorem(name: "Chain Rule")[ + Let $f: RR^n -> RR^m$, $g: RR^m -> RR^k$ where $f$ is differentiable at $a + in RR^n$ and $g$ is differentiable at $f(a) in RR^m$. Then, $g compose + f$ is differentiable, with $D(g compose f)(a) = Dg(f(a)) compose Df(a)$. + Note that this means that the Jacobian matrices simply multiply. +] +#proof[ + Set $b = f(a) in RR^m$, $ lambda = Df(a)$, $ mu = Dg(f(a))$. Define + $ + phi : RR^n -> RR^m, quad phi (x) &= f(x) - f(a) - lambda (x + - a), \ + psi : RR^m -> RR^k, quad psi (y) &= g(y) - g(b) - mu (y - b). + $ + We claim that $ + lim _(x -> a) frac(g compose f(x) - g compose f(a) - mu compose lambda (x + - a), norm(x - a) ) = 0. + $ Write the numerator as $ + g compose f(x) - g compose f(a) - mu compose lambda (x - a) = psi (f(x)) + + mu ( phi (x)). + $ Note that $ + lim _(x -> a) frac( phi (x), norm(x - a) ) = 0, quad + lim _(y -> b) frac( psi (y), norm(y - b) ) = 0. + $ Thus, find $M > 0$ such that $ norm( mu ( phi (x))) <= + norm( phi (x)) $ for all $x in RR^n$, hence $ + lim _(x -> a) frac( norm( mu ( phi (x))) , norm(x - a) ) <= + lim _{x -> a} frac(M norm( phi (x)) , norm(x - a) ) = 0. + $ Now write $ + lim _(f(x) -> b) frac( psi (f(x)), norm(f(x) - b) ) = 0, + $ hence for any $ epsilon > 0$, there is a neighbourhood of $b$ on which $ + norm( psi (f(x))) <= epsilon norm(f(x) - b) = epsilon + norm( phi (x) + lambda (x - a)) . + $ Apply the triangle inequality and find $M' > 0$ such that $ + norm( psi (f(x))) <= + epsilon norm( phi (x)) + epsilon M' norm(x - a) . + $ Thus, $ + lim _(x -> a) frac( norm( psi (f(x))) , norm(x - a) ) <= + lim _(x -> a) frac( epsilon norm( phi (x)) , norm(x - a) ) + epsilon + M' = epsilon M'. + $ Since $ epsilon > 0$ was arbitrary, this limit is zero, completing the proof. +] + + += Partial derivatives + +#definition[ + Let $U subset.eq RR^n$ be open, and let $f: U -> RR$. The partial + derivative of $f$ with respect to the coordinate $x_j$ at some $a in U$ + is defined by the following limit, if it exists. $ + pp(f, x_j) (a) = lim _(h -> 0) frac(f(a + h ve_j) - f(a), h) . + $ +] +#lemma[ + If $f: U -> RR$ is differentiable at a point $a in RR^n$, then $ + Df(a)(x_1, dots , x_n) = x_1 \, pp(f, x_1) (a) + dots + x_n + \, pp(f, x_n) (a). + $ +] +#example[ + Consider $ + f: RR^2 -> RR, quad (x, y) mapsto cases( + (x y) \/ (x^2 + y^2)\, & " if " (x, y) != (0, 0), + 0\, & " if " (x, y) = (0, 0). + ) + $ Note that $f$ is not differentiable at $(0, 0)$; it is not even continuous + there. However, both partial derivatives of $f$ exist at $(0, 0)$. +] + +#lemma[ + If $f: RR^n -> RR^m$ is differentiable at $a in RR^n$, then the matrix + representation of $Df(a)$ in the standard basis is given by $ + [Df(a)] = [ pp(f_i, x_j) (a)]_(i j). + $ +] + +#lemma[ + Let $f: RR^n -> RR^m$ be differentiable at $a in RR^n$, and let $g: RR^m + -> RR^k$ be differentiable at $f(a) in RR^m$. Then, the matrix + representation of $D(g compose f)(a)$ in the standard basis is the + product $ + [D(g compose f)(a)] = [Dg(f(a))][Df(a)] = [ sum _( ell = 1)^m + pp(g_i, y_ ell ) pp(f_ ell , x_j) ]_(i j). + $ In other words, $ + pp(, x_j) (g compose f)_i(a) = sum _( ell = 1)^m pp(g_i, y_ ell ) (f(a)) + pp(f_ ell , x_j) (a). + $ +] + +#example[ + Let $f: RR^2-> RR$ be differentiable, and let $ Gamma (f) = {(x, y, f(x, + y)): x, y in RR}$ be the graph of $f$. Now, let $gamma : [-1, 1] -> + Gamma (f)$ be a differentiable curve, represented by $ + gamma (t) = (g(t), h(t), f(g(t), h(t))). + $ Then, we can compute the derivative $ + gamma '(a) = (g'(a), h'(a), lr(g'(a) pp(f, x) + h'(a) pp(f, y) + \|)_((g(a), h(a)))) + $ +] + +#exercise[ + Consider the inner product map, $ ip( dot.c , dot.c ) : RR^n times RR^n + -> RR$. What is its derivative? + #solution[ + We treat the inner product as a map $g: RR^(2n) -> RR$, which acts + as $ + ip( vx , vy ) colon.eq g(x_1, dots , x_n, y_1, dots , y_n) = x_1y_1 + + dots + x_n y_n. + $ Now, note that $ + pp(g, x_i) = y_i, quad pp(g, y_i) = x_i. + $ Thus, $ + Dg( va , vb )( vx , vy ) &= sum _(i = 1)^n x_i pp(g, x_i) ( va , vb ) + + sum _(i = 1)^n y_i pp(g, y_i) ( va , vb ) \ + &= sum _(i = 1)^n x_i b_i + sum _(i = 1)^n y_i a_i \ + &= ip( vx , vb ) + ip( vy , va ) . + $ + In other words, the matrix representation of the derivative of the inner + product map at the point $( va , vb )$ is given by $[ vb^top + va^top]$. + ] +] + +#exercise[ + Let $gamma : RR -> RR^n$ be a differentiable curve. What is the + derivative of the real map $t mapsto norm( gamma (t)) ^2$? + #solution[ + We write this map as $t mapsto ip( gamma (t), gamma (t)) $. Consider the + scheme $ + RR -> RR^(2n) -> RR, quad + t mapsto mat( + gamma (t) ; gamma (t) + ) mapsto ip( gamma (t), gamma (t)) . + $ Pick a point $t in RR$, whence the derivative of the map at $t$ is $ + mat( + gamma (t)^top , gamma (t)^top + ) mat( + gamma '(t) ; gamma '(t) + ) = 2 ip( gamma (t), gamma '(t)) . + $ + ] + #remark[ + Consider the surface $S^(n - 1) subset RR^n$, and pick an arbitrary + differentiable curve $gamma : RR -> S^(n - 1)$. Now, the tangent + vector $gamma '(t)$ is tangent to the sphere $S^(n - 1)$ at any point + $gamma (t)$. We claim that this tangent drawn at $ gamma (t)$ is always + perpendicular to the position vector $ gamma (t)$. This is made trivial by + our exercise: the map $t mapsto norm( gamma (t)) ^2 = 1$ is a constant + map since $gamma$ is a curve on the unit sphere. This means that it has + zero derivative, forcing $ ip( gamma (t), gamma '(t)) = 0$. + ] +] + +== Directional derivatives + +#definition[ + Let $U subset.eq RR^n$ be open, and let $f: U -> RR$. The directional + derivative of $f$ along a direction $ vv in RR^n$ at a point $a in U$ is + defined by the following limit, if it exists. $ + nabla _v f(a) = lim _(h -> 0) frac(f(a + h vv ) - f(a), h) . + $ +] + +#example[ + Consider $ + f: RR^2 -> RR, quad (x, y) mapsto cases( + x^3\/(x^2 + y^2)\, & " if " (x, y) != (0, 0), + 0\, & " if " (x, y) = (0, 0). + ) + $ Note that $f$ is not differentiable at $(0, 0)$. However, all + directional derivatives derivatives of $f$ exist at $(0, 0)$. Indeed, + consider a direction $( cos theta , sin theta )$, and examine the limit + $ + lim_(t -> 0) frac(1, t) [f(t cos theta , t sin theta ) - f(0, + 0)] = cos ^3 theta . + $ +] + +#definition[ + Let $f: RR^n -> RR$ be differentiable. The gradient of $f$ is defined + as the map $ + grad(f) : RR^n -> RR^n, quad x mapsto + [ pp(f, x_i) (x)]_i. + $ + #remark[ + The gradient at a point $x in RR^n$ is thought of as a vector. In + contrast, the derivative is thought of as a linear transformation. + Otherwise, we see that $ grad(f) (x) = [Df(x)]$. + ] +] + +#definition[ + Let $C^1(RR^n)$ be the set of real-valued differentiable functions on $RR^n$. + Fix a point $a in RR^n$, then fix a tangent vector $v in RR^n$. Then, the + map $ + nabla _v: C^1(RR^n) -> RR, quad f mapsto Df(a)(v) + $ is a linear functional. The quantity $ nabla _v f$ is called the + directional derivative of $f$ in the direction $v$ at the point $a$. + #remark[ + We can represent $ nabla _v$ as the operator $ + nabla _v ( dot.c ) = D( dot.c )(a)(v) = sum _i v_i lr(pp(, x_i)\|)_a = + v dot.c nabla ( dot.c ). + $ + ] +] + +#lemma[ + The directional derivatives $ nabla _v$ form a vector space called the tangent + space, attached to the point $a in RR^n$. This can be identified with the + vector space $RR^n$ by the natural map $ nabla _v mapsto v$. The standard + basis can be informally denoted by the vectors $ + nabla_( ve _1) colon.eq pp(, x_1) , quad dots quad, nabla _( ve _n) colon.eq pp(, x_n) . + $ +] + + +== Differentiation on manifolds \* + +#definition[ + A homeomorphism is a continuous, bijective map whose inverse is also continuous. +] + +#lemma[ + Let $f: RR^n -> RR$ be continuous. Denote the graph of $f$ as $ + Gamma (f) = \{(x, f(x)): x in RR^n\}. + $ Then, $ Gamma (f)$ is a smooth manifold. +] +#proof[ + Consider the homeomorphism $ + phi : Gamma (f) -> RR^n, quad (x, f(x)) mapsto x. + $ This is clearly bijective, continuous (restriction of a projection map), + with a continuous inverse (from the continuity of $f$). Call this + homeomorphism $phi$ a coordinate map on $Gamma (f)$. +] + +#definition[ + Let $f: M -> RR$ where $M$ is a smooth manifold, with a coordinate map + $phi : M -> RR^n$. We say that $f$ is differentiable at a point $a + in M$ if $f compose phi ^(-1): RR^n -> RR$ is differentiable at + $ phi (a)$. +] + +#definition[ + Let $f: M -> RR$ where $M$ is a smooth manifold, let $ phi : M + -> RR^n$ be a coordinate map, and let $a in M$. Let $gamma : RR -> M$ + be a curve such that $ gamma (0) = a$, and further let $gamma$ be + differentiable in the sense that $phi compose gamma : RR -> RR^n$ is + differentiable. The directional derivative of $f$ at $a$ along $gamma$ is + defined as $ + dd(, t) f( gamma (t)) \|_ (t = 0) = lim _(h -> 0) lr(frac(f( gamma (t + + h)) - f( gamma (t)), h) \|)_ (t = 0). + $ Note that we are taking the derivative of $f compose gamma : RR -> RR$ + in the conventional sense. +] + +#lemma[ + Let $ gamma _1$ and $gamma _2$ be two curves in $M$ such that $gamma_1(0) = + gamma _2(0) = a$, and $ + dd(, t) lr( phi compose gamma _1(t) \|)_ (t = 0) + = dd(, t) lr( phi compose gamma _2(t) \|)_ (t = 0). + $ In other words, $gamma _1$ and $ gamma _2$ pass through the same point $a$ + at $t = 0$, and have the same velocities there. Then, the directional + derivatives of $f$ at $a$ along $gamma _1$ and $ gamma _2$ are the same. +] + +#definition[ + Let $M$ be a smooth manifold, and let $a in M$. Consider the following + equivalence relation on the set of all curves $gamma$ in $M$ such that + $gamma (0) = a$. $ + gamma_1 tilde.op gamma_2 quad arrow.r.double.long quad + dd(, t) lr( phi compose gamma _1(t) \|)_ (t = 0) + = dd(, t) lr( phi compose gamma _2(t) \|)_ (t = 0). + $ Each resultant equivalence class of curves is called a tangent vector at + $a in M$. Note that all these curves in a particular equivalence class pass + through $a$ with the same velocity vector. + + The collection of all such tangent vectors, i.e. the space of all curves + through $a$ modulo the equivalence relation which identifies curves with the + same velocity vector through $a$, is called the tangent space to $M$ at $a$, + denoted $T_a M$. + + #remark[ + Each tangent vector $v in T_a M$ acts on a differentiable function + $f: M -> RR$ yielding a (well-defined) directional derivative at + $a$. $ + v: C^1(M) -> RR, quad f mapsto dd(, t) + lr(f( gamma _v(t)) \|)_ (t = 0). + $ + Thus, the tangent space represents all the directions in which taking a + derivative of $f$ makes sense. + ] + #remark[ + The tangent space $T_a M$ is a vector space. Upon fixing $f$, the map + $Df(a): T_a M -> RR$, $v mapsto v f(a)$ is a linear functional on + the tangent space. + ] + #remark[ + Given a tangent vector $v in T_a M$, it can be identified with its + corresponding velocity vector in $RR^n$. Thus, the tangent space $T_a M$ + can be identified with the geometric tangent plane drawn to the manifold + $M$ at the point $a$. + ] +] + + += Mean value theorem +Consider a differentiable function $f: RR^n -> RR$, and fix $a in RR^n$. +Define the functions $ + g_i: RR -> RR, quad g_i(x) = f(a_1, dots , a_(i - 1), x, a_(i + 1), + dots , a_n). +$ Then, each $g_i$ is differentiable, with $ + g_i'(x) = pp(f, x_i) (a_1, dots , a_(i - 1), x, a_(i + 1), dots , a_n). +$ By applying the Mean Value Theorem on some interval $[c, d]$, we can find +$ alpha in (c, d)$ such that $g_i(d) - g_i(c) = g_i'( alpha )(d - c)$. In other +words, +$ + f( dots , d, dots ) - f( dots , c, dots ) = pp(f, x_i) ( dots , alpha , dots )(d + - c). +$ + +#theorem[ + Let $f: RR^n -> RR^m$ and $a in RR^n$. Then, $f$ is differentiable at + $a$ if all the partial derivatives $diff f\/diff x_j$ exist in a + neighbourhood of $a$ and are continuous at $a$. +] +#proof[ + Without loss of generality, let $m = 1$. We claim that $ + lim _(h -> 0) frac(1, norm(h) ) norm(f(a + h) - f(a) - sum _(i = 0) ^n + pp(f, x_i) (a)h_i) = 0. + $ Examine $ + f(a + h) - f(a) &= f(a_1 + h_1, dots , a_n + h_n) - f(a_1, dots , + a_n) \ + &= f(a_1 + h_1, dots , a_n + h_n) - f(a_1 + h_1, dots , a_(n - 1) + + h_(n - 1), a_n) + \ + & quad f(a_1 + h_1, dots , a_(n - 1) + h_(n - 1), a_n) - f(a_1 + h_1, + dots , a_(n - 1), a_n) + \ + & quad dots.h \ + & quad f(a_1 + h_1, a_2, dots , a_n) - f(a_1, dots , a_n) \ + &= pp(f, x_n) (c_n)h_n + dots + pp(f, x_1) (c_1)h_1. + $ + The last step follows from the Mean Value Theorem. As $h -> 0$, each $c_i + -> a$. Thus, $ + frac(1, norm(h)) norm(f(a + h) - f(a) - sum _(i = 0)^n + pp(f, x_i) (a)h_i) + &= frac(1, norm(h)) norm( sum _(i = 0) ^n (pp(f, x_i)(c_i) - + pp(f, x_i) (a))h_i) \ + & <= sum _(i = 0)^n | pp(f, x_i) (c_i) - pp(f, x_i) (a) | + frac(|h_i|, norm(h) ) \ + & <= sum _(i = 0)^n | pp(f, x_i) (c_i) - pp(f, x_i) (a) |. + $ + Taking the limit $h -> 0$, observe that $(diff f\/diff x_i) (c_i) -> + (diff f \/ diff x_i) (a)$ by the continuity of the partial derivatives, + completing the proof. +] + +#corollary[ + All polynomial functions on $RR^n$ are differentiable. +] + +#theorem[ + Let $f: RR^n -> RR$ be differentiable with continuous partial + derivatives, and let $a in RR^n$ be a point of local maximum. Then, $Df(a) = + 0$. +] +#proof[ + We need only show that each $ + pp(f, x_i) (a) = 0. + $ This must be true, since $a$ is also a local maximum of each of the + restrictions $g_i$ as defined earlier. +] + += Inverse and implicit function theorems +#theorem(name: [Inverse function theorem])[ + Let $f: RR^n -> RR^n$ be continuously differentiable on a neighbourhood + of $a in RR^n$, and let $"det" (Df(a)) != 0$. Then, there exist neighbourhoods + $U$ of $a$ and $W$ of $f(a)$ such that the restriction $f: U -> W$ is + invertible. Furthermore, $f^(-1)$ is continuous on $U$ and differentiable on + $U$. +] + +#lemma[ + Consider a continuously differentiable function $f: RR^n -> RR$, and + let $M$ denote the surface defined by the zero set of $f$. Then, $M$ can be + represented as the graph of a differentiable function $h: RR^(n - 1) -> + RR$ at those points where $Df != 0$. +] +#proof[ + Without loss of generality, suppose that $diff f \/ diff x_n != 0$ + at some point $a in M$. It can be shown that the map $ + F: RR^n -> RR^n , quad x mapsto (x_1, x_2, dots , x_(n - 1), + f(x)) + $ is invertible in a neighbourhood $W$ of $a$, with a continuous and + differentiable inverse of the form + $ + G: RR^n -> RR^n, quad u mapsto (u_1, u_2, dots , u_(n - 1), + g(u)). + $ + Since $F compose G$ must be the identity map on $W$, we demand $ + (x_1, x_2, dots , x_(n - 1), f(x_1, x_2, dots , x_(n - 1), g(x))) = (x_1, + x_2, dots , x_(n - 1), x_n). + $ Thus, the zero set of $f$ in this neighbourhood of $a$ satisfies $x_n = + 0$, hence $ + f(x_1, x_2, dots , x_(n - 1), g(x_1, x_2, dots , x_(n - 1), 0)) = 0. + $ In other words, the part of the surface $M$ in the neighbourhood of $a$ is + precisely the set of points $ + (x_1, x_2, dots , x_(n - 1), g(x_1, x_2, dots , x_(n - 1), 0)). + $ Simply set $ + h: RR^(n - 1) -> RR, quad x mapsto g(x_1, x_2, dots , x_(n - 1), + 0), + $ whence the surface $M$ is locally represented by the graph of $h$. +] + +#block(inset: 1em)[ +#remark()[ + Note that by using $ + f(x_1, dots , x_(n - 1), h(x_1, dots , x_(n - 1))) = 0 + $ on the surface, we can use the chain rule to conclude that for all $1 <= + i < n$, we have $ + pp(f, x_i) (a) + pp(f, x_n) (a) pp(h, x_i) (a_1, dots , a_(n - 1)) = 0. + $ +] +] + +#theorem(name: [Implicit function theorem])[ + Let $f: RR^n times RR^m -> RR^m$ be continuously differentiable in an open + set containing $(a, b)$, with $f(a, b) = 0$. Let $"det" (diff f^j \/ diff + x_(n + k) (a, b)) != 0$. Then, there exists an open set $U subset RR^n$ + containing $a$, an open set $V subset RR^m$ containing $b$, and a + differentiable function $g: U -> V$ such that $f(x, g(x)) = 0$. + + #remark[ + The condition on the determinant can be rephrased as + $"rank" Df(a, b) = m$. + ] +] + +#theorem[ + Let $f: RR^n -> RR$ be continuously differentiable, and let $M$ be the + surface defined by its zero set. Furthermore, let $ grad(f) (a) != 0$ for + some $a in M$; thus, $M$ can be locally represented by a graph on $RR^(n + - 1)$. Then, $ grad(f) (a)$ is normal to the tangent vectors drawn at $a$ + to $M$; in fact, the perpendicular space of $ grad(f) (a)$ is precisely + the tangent space $T_a M$. +] +#proof[ + Consider a tangent vector drawn at $a$ to $M$, represented by the + differentiable curve $gamma : RR -> M$, $ gamma (0) = a$; note that + we use the identification $gamma '(0) = v in RR^n$. Then, calculate $ + dd(, t) lr(f( gamma (t)) \|)_ (t = 0) = Df( gamma (0))( gamma '(0)) + = Df(a)(v). + $ On the other hand, we have $f( gamma (t)) = 0$ identically. Thus, $ + v dot.c grad(f) (a) = Df(a)(v) = 0 + $ as claimed. +] + += Taylor's theorem + +#theorem(name: "Clairaut")[ + Let $f: RR^n -> RR$ have continuous second order partial derivatives. + Then, $ + frac(diff ^2 f, diff x_i diff x_j) = frac(diff ^2 + f, diff x_j diff x_i). + $ +] + +#theorem(name: "Taylor")[ + Let $f: RR^2 -> RR$ have continuous second order partial derivatives, + and let $(x_0, y_0) in RR^2$. Then, there exists $ epsilon > 0$ such that + for all $norm((x - x_0, y - y_0)) < epsilon$, + $ + f(x, y) = &f(x_0, y_0) + pp(f, x) (x - x_0) + pp(f, y) (y - y_0) \ + & + space frac(1, 2) frac(diff^2 f, diff x^2)(x - x_0)^2 + frac(1, 2) + frac(diff^2 f, diff y^2)(y - y_0)^2 \ + & + space frac(diff ^2 f, diff x diff y) (x - x_0)(y - x_0) + R(x, + y), + $ + where as $(x, y) -> (x_0, y_0)$, the remainder term vanishes as $ + frac(|R(x, y)|, norm((x - x_0, y - y_0) ) ^2) -> 0. + $ All partial derivatives here are evaluated at $(x_0, y_0)$. +] +#proof[ + This follows from applying the Taylor's Theorem in one variable to the real + function $g: RR -> RR$, $t mapsto f((1 - t)(x_0, y_0) + t(x, y))$. +] + + += Critical points and extrema +#definition[ + We say that $a in RR^n$ is a critical point of $f: RR^n -> RR$ if all + $diff f \/ diff x^j = 0$ there. +] + +#lemma[ + All points of extrema of a differentiable function are critical points. +] +#proof[ + We already know that $Df(a) = 0$ where $a$ is either a point of maximum or + minimum. +] + +#example[ + In order to find a point of extrema of a $C^2$-smooth function $f: RR^2 + -> RR$, we first identify a critical point $(x_0, y_0)$. Next, we must find + a neighbourhood of $(x_0, y_0)$ which contains no other critical points -- to + do this, apply Taylor's Theorem. Indeed, we see that $ + f(x, y) = f(x_0, y_0) + A(x - x_0)^2 + 2B(x - x_0)(y - y_0) + C(y - + y_0)^2 + R_2. + $ For non-degeneracy of solutions, we demand $A C - B^2 != 0$, i.e. at + $(x_0, y_0)$, we want $ + [ frac(diff ^2 f, diff x diff y) ]^2 != + frac(diff ^2 f, diff x^2) + frac(diff ^2 f, diff y^2) . + $ + + If $A C - B^2 > 0$ and $diff ^2f \/ diff x^2 > 0$, then we have found a + point of minima; if $diff ^2 f \/ diff x^2 < 0$, then we have found a + point of maximum. If $A C - B^2 < 0$, then we have found a saddle point. +] + +#example[ + Suppose that we wish to maximize the function $f: RR^2 -> RR$, given an + equation of constraint $g = 0$, where $g: RR^2 -> RR$. Using the method + of Lagrange multipliers, we look for solutions of the system $ + cases( + grad(f) (x, y) + lambda grad(&g) (x, y) &= 0, + &g(x, y) &= 0. + ) + $ +] diff --git a/example.pdf b/example.pdf new file mode 100644 index 0000000..16b55c7 Binary files /dev/null and b/example.pdf differ diff --git a/example.typ b/example.typ new file mode 100644 index 0000000..ce70b7a --- /dev/null +++ b/example.typ @@ -0,0 +1,108 @@ +#import "theorems.typ": * + +// Define theorem environments + +#let theorem = thmbox( + "theorem", // The Theorem counter is attached to headings + "Theorem", + fill: rgb("#e8e8f8") +) +#let lemma = thmbox( + "theorem", // Lemmas use the same counter as Theorems + "Lemma", + fill: rgb("#efe6ff") +) +#let corollary = thmbox( + "corollary", + "Corollary", + base: "theorem", // Corollaries are 'attached' to Theorems + fill: rgb("#f8e8e8") +) +#let definition = thmbox( + "definition", // Definitions use their own counter + "Definition", + stroke: rgb("#68ff68") + 1pt +) + +// Examples and remarks are not numbered +#let example = thmplain("example", "Example").with(numbering: none) +#let remark = thmplain("remark", "Remark").with(numbering: none) + +// Proofs are attached to theorems, although they are not numbered +#let proof = thmplain( + "proof", + "Proof", + base: "theorem", + bodyfmt: body => [ + #body #h(1fr) $square$ // Insert QED symbol + ] +).with(numbering: none) + + +#let project(title: "", authors: (), body) = { + set page(height: auto) + set document(author: authors, title: title) + set text(font: "Linux Libertine", lang: "en") + set heading(numbering: "1.1.") + set par(justify: true) + + align(center)[ + #block(text(weight: 700, 1.75em, title)) + ] + + v(2em) + + body +} + + +// Document starts here + +#show: project.with( + title: "Theorems!", + authors: ( + "Satvik Saha", + ), +) + += Introduction + +#lemma(name: "Pythagoras")[ + In a right angled triangle, $ a^2 + b^2 = c^2. $ +] +#theorem(name: "WLLN")[#lorem(20)] +#proof[ + #lorem(30) + $ integral_(-infinity)^infinity sin(x)/x space upright(d) x = pi $ + #lorem(5) +] + +#corollary[#lorem(4)] +#corollary[#lorem(8)] + +#example[#lorem(10)] + +#lemma[#lorem(10)] + + +== Sub-Heading + +#definition[#lorem(16)] + +#example(name: [#lorem(3)])[#lorem(10)] +#remark[#lorem(5)] + +#theorem[#lorem(6)] + +// Numbering can be reactivated +#proof(numbering: "1.1")[#lorem(4)] +#proof(numbering: "1.1")[#lorem(5)] + += Heading +#lemma[#lorem(14)] +#remark[#lorem(8)] + +#corollary(name: [#lorem(4)])[#lorem(12)] +// The base can be overridden +#example(numbering: "1.1.1.a", base: "corollary")[#lorem(20)] +#example(numbering: "1.1.1.a", base: "corollary")[#lorem(10)] diff --git a/theorems.typ b/theorems.typ new file mode 100644 index 0000000..37aeabc --- /dev/null +++ b/theorems.typ @@ -0,0 +1,125 @@ +// Create a theorem environment with counter identified by "identifier", attached to environments with identifier "base". Contents are formatted using "fmt", which maps (name, number, body) to content. +// +// Supplying base: "heading" attaches the environment to the heading counter. +// Supplying base: "" makes the environment count up globally, i.e. keeps it unattached. +// +// A theorem environment is a map (body, name:, numbering:, base:) to content. +// name: none is intended to be shown in the title +// numbering: "1.1" indicates the numbering style, can be "none" +// base: base defaults to the "base" supplied when creating the +// environment, can be overriden here. + +#let thmenv(identifier, base, fmt) = { + + let thmcounters = state("thm", + ( + "counters": ("": (), "heading": ()), + "latest": () + ) + ) + + let global_numbering = numbering + + return (body, name: none, numbering: "1.1", base: base) => { + let number = none + if not numbering == none { + locate(loc => { + thmcounters.update(thmpair => { + let counters = thmpair.at("counters") + counters.at("heading") = counter(heading).at(loc) + if not identifier in counters.keys() { + counters.insert(identifier, (0, )) + } + + let tc = counters.at(identifier) + let bc = counters.at(base) + if tc.slice(0, -1) == bc { + counters.at(identifier) = (..bc, tc.last() + 1) + } else { + counters.at(identifier) = (..bc, 1) + } + + let latest = counters.at(identifier) + return ( + "counters": counters, + "latest": latest + ) + }) + }) + + number = thmcounters.display(x => { + return global_numbering(numbering, ..x.at("latest")) + }) + } + + fmt(name, number, body) + } +} + + +// Creates a box-like theorem environment with parameters "identifier" and "base" (defaulted to "heading"). +// head indicates the name of the environment appearing in the title +// namefmt: formatting to apply to the "name", defaults to wrapping in parentheses +// titlefmt: formatting to apply to the "title" (head + number), defaults to bold +// bodyfmt: formatting to apply to the body, defaults to identity +// padding: padding around box +// fill, stroke, inset, radius, breakable: +// parameters of the box + +#let thmbox( + identifier, + head, + fill: none, + stroke: none, + inset: 1.2em, + radius: 0.3em, + breakable: false, + padding: (top: 0.5em, bottom: 0.5em), + namefmt: x => [(#x)], + titlefmt: strong, + bodyfmt: x => x, + base: "heading" +) = { + let boxfmt(name, number, body) = { + if not name == none { + name = [#namefmt(name) :] + } else { + name = [:] + } + let title = titlefmt(head) + if not number == none { + title += " " + titlefmt(number) + } + body = bodyfmt(body) + pad( + ..padding, + block( + fill: fill, + stroke: stroke, + inset: inset, + width: 100%, + radius: radius, + breakable: breakable, + [ + #title + #name + #h(0.5em) + #body + ] + ) + ) + } + return thmenv(identifier, base, boxfmt) +} + + +// Plainer defaults on thmbox with no padding, smaller inset, and emphasized title in place of bold. + +#let thmplain = thmbox.with( + padding: (top: 0em, bottom: 0em), + breakable: true, + inset: (top: 0em, left: 1.2em, right: 1.2em), + namefmt: name => emph([(#name)]), + titlefmt: emph, +) +