Compare commits

...

9 Commits

Author SHA1 Message Date
eneller
df511b4a3e update 2025-11-30 16:38:04 +01:00
eneller
597111974e update 2025-11-30 15:24:08 +01:00
eneller
b67fa4db89 update 2025-11-28 12:57:59 +01:00
eneller
e10e311f0f update 2025-11-26 23:27:47 +01:00
eneller
e015e816bd update 2025-11-26 19:24:53 +01:00
eneller
25f725049e clean up latex header 2025-11-26 18:40:11 +01:00
eneller
520d3f8bc6 update 2025-11-26 18:25:10 +01:00
eneller
31a3e956ab begin compression 2025-11-25 13:14:17 +01:00
eneller
5f5ee2b395 minor updates 2025-11-03 11:06:19 +01:00
6 changed files with 447 additions and 2 deletions

5
.latexmkrc Normal file
View File

@@ -0,0 +1,5 @@
$latex = 'latex %O --shell-escape %S';
$pdflatex = 'pdflatex %O --shell-escape %S';
$pdf_mode = 1;
$clean_ext = "lol nav snm loa bbl*";
$bibtex_use = 2;

94
compression.bib Normal file
View File

@@ -0,0 +1,94 @@
@article{shannon1948mathematical,
title={A mathematical theory of communication},
author={Shannon, Claude E},
journal={The Bell system technical journal},
volume={27},
number={3},
pages={379--423},
year={1948},
publisher={Nokia Bell Labs}
}
@misc{ enwiki:shannon-source-coding,
author = "{Wikipedia contributors}",
title = "Shannon's source coding theorem --- {Wikipedia}{,} The Free Encyclopedia",
year = "2025",
url = "https://en.wikipedia.org/w/index.php?title=Shannon%27s_source_coding_theorem&oldid=1301398440",
note = "[Online; accessed 25-November-2025]"
}
@misc{ enwiki:shannon-fano,
author = "{Wikipedia contributors}",
title = "ShannonFano coding --- {Wikipedia}{,} The Free Encyclopedia",
year = "2025",
url = "https://en.wikipedia.org/w/index.php?title=Shannon%E2%80%93Fano_coding&oldid=1315776380",
note = "[Online; accessed 26-November-2025]"
}
@misc{ enwiki:huffman-code,
author = "{Wikipedia contributors}",
title = "Huffman coding --- {Wikipedia}{,} The Free Encyclopedia",
year = "2025",
url = "https://en.wikipedia.org/w/index.php?title=Huffman_coding&oldid=1321991625",
note = "[Online; accessed 26-November-2025]"
}
@misc{ enwiki:lzw,
author = "{Wikipedia contributors}",
title = "LempelZivWelch --- {Wikipedia}{,} The Free Encyclopedia",
year = "2025",
url = "https://en.wikipedia.org/w/index.php?title=Lempel%E2%80%93Ziv%E2%80%93Welch&oldid=1307959679",
note = "[Online; accessed 26-November-2025]"
}
@misc{ enwiki:arithmetic-code,
author = "{Wikipedia contributors}",
title = "Arithmetic coding --- {Wikipedia}{,} The Free Encyclopedia",
year = "2025",
url = "https://en.wikipedia.org/w/index.php?title=Arithmetic_coding&oldid=1320999535",
note = "[Online; accessed 26-November-2025]"
}
@misc{ enwiki:kraft-mcmillan,
author = "{Wikipedia contributors}",
title = "KraftMcMillan inequality --- {Wikipedia}{,} The Free Encyclopedia",
year = "2025",
url = "https://en.wikipedia.org/w/index.php?title=Kraft%E2%80%93McMillan_inequality&oldid=1313803157",
note = "[Online; accessed 26-November-2025]"
}
@misc{ enwiki:partition,
author = "{Wikipedia contributors}",
title = "Partition problem --- {Wikipedia}{,} The Free Encyclopedia",
year = "2025",
url = "https://en.wikipedia.org/w/index.php?title=Partition_problem&oldid=1320732818",
note = "[Online; accessed 30-November-2025]"
}
@misc{ dewiki:shannon-fano,
author = "Wikipedia",
title = "Shannon-Fano-Kodierung --- Wikipedia{,} die freie Enzyklopädie",
year = "2024",
url = "https://de.wikipedia.org/w/index.php?title=Shannon-Fano-Kodierung&oldid=246624798",
note = "[Online; Stand 26. November 2025]"
}
@misc{ dewiki:huffman-code,
author = "Wikipedia",
title = "Huffman-Kodierung --- Wikipedia{,} die freie Enzyklopädie",
year = "2025",
url = "https://de.wikipedia.org/w/index.php?title=Huffman-Kodierung&oldid=254369306",
note = "[Online; Stand 26. November 2025]"
}
@misc{ dewiki:lzw,
author = "Wikipedia",
title = "Lempel-Ziv-Welch-Algorithmus --- Wikipedia{,} die freie Enzyklopädie",
year = "2025",
url = "https://de.wikipedia.org/w/index.php?title=Lempel-Ziv-Welch-Algorithmus&oldid=251943809",
note = "[Online; Stand 26. November 2025]"
}
@misc{ dewiki:kraft-mcmillan,
author = "Wikipedia",
title = "Kraft-Ungleichung --- Wikipedia{,} die freie Enzyklopädie",
year = "2018",
url = "https://de.wikipedia.org/w/index.php?title=Kraft-Ungleichung&oldid=172862410",
note = "[Online; Stand 26. November 2025]"
}
@misc{ dewiki:partition,
author = "Wikipedia",
title = "Partitionsproblem --- Wikipedia{,} die freie Enzyklopädie",
year = "2025",
url = "https://de.wikipedia.org/w/index.php?title=Partitionsproblem&oldid=255787013",
note = "[Online; Stand 26. November 2025]"
}

View File

@@ -0,0 +1,333 @@
\documentclass{article}
%%% basic layouting
\usepackage[utf8x]{inputenc}
\usepackage[margin=1in]{geometry} % Adjust margins
\usepackage{caption}
\usepackage{hyperref}
\PassOptionsToPackage{hyphens}{url} % allow breaking urls
\usepackage{float}
\usepackage{wrapfig}
\usepackage{subcaption}
\usepackage{parskip} % dont indent after paragraphs, figures
\usepackage{xcolor}
%%% algorithms
\usepackage{algorithm}
\usepackage{algpseudocodex}
% graphs and plots
\usepackage{tikz}
\usepackage{pgfplots}
\usetikzlibrary{positioning}
\usetikzlibrary{trees}
%\usetikzlibrary{graphs, graphdrawing}
%%% math
\usepackage{amsmath}
%%% citations
\usepackage[style=ieee, backend=biber, maxnames=1, minnames=1]{biblatex}
%\usepackage{csquotes} % Recommended for biblatex
\addbibresource{compression.bib}
\title{Compression}
\author{Erik Neller}
\date{\today}
\begin{document}
\maketitle
\section{Introduction}
As the volume of data grows exponentially around the world, compression is only gaining in importance to all disciplines.
Not only does it enable the storage of large amounts of information needed for research in scientific domains
like DNA sequencing and analysis, it also plays a vital role in keeping stored data accessible by
facilitating cataloging, search and retrieval.
The concept of entropy introduced in the previous entry is closely related to the design of efficient codes for compression.
In coding theory, the events of an information source are to be encoded in a manner that minimizes the bits needed to store
the information provided by the source.
The process of encoding can thus be described by a function $C$ transforming from a source alphabet $X$ to a code alphabet $Y$.
Symbols in the alphabets are denominated $x_i$ and $y_j$ respectively, and have underlying probabilities $p_{i}$.
% TODO fix use of alphabet / symbol / code word: alphabet is usually binary -> code word is 010101
\begin{equation}
C: X \rightarrow Y \qquad X=\{x_1,x_2,...x_n\} \qquad Y=\{y_1,y_2,...y_m\}
\label{eq:formal-code}
\end{equation}
The understanding of entropy as the expected information $E(I)$ of a message provides an intuition that,
given a source with a given entropy (in bits), any coding can not have a lower average word length $l_j$ (in bits)
than this entropy without losing information.
\begin{equation}
H = E(I) = - \sum_i p_i \log_2(p_i) \quad \leq \quad E(L) = \sum_i p_j l_j
\label{eq:entropy-information}
\end{equation}
This is the content of Shannons's source coding theorem,
introduced in \citeyear{shannon1948mathematical}.
In his paper, \citeauthor{shannon1948mathematical} proposed two principal ideas to minimize the average length of a code.
The first is to use short codes for symbols with higher probability.
This is an intuitive approach as more frequent symbols have a higher impact on average code length.
The second idea is to encode events that frequently occur together at the same time, artificially increasing
the size of the code alphabet $Y$ to allow for greater flexibility in code design.\cite{enwiki:shannon-source-coding}
Codes can have several properties. A code where all codewords have equal lengths is called a \textit{block code}.
While easy to construct, they are not well suited for our goal of minimizing average word length
as specified in \autoref{eq:entropy-information} because the source alphabet is generally not equally distributed
in a way that $p_i = \frac{1}{n}$.
In order to send (or store, for that matter) multiple code words in succession, a code $Y$ has to be uniquely decodable.
When receiving 0010 in succesion using the nonsingular code $Y_2$ from \autoref{tab:code-properties},
it is not clear to the recipient which source symbols make up the intended message.
For the specified sequence, there are a total of three possibilities to decode the received code:
$s_0 s_3 s_0$, $s_0 s_0 s_1$ or $s_2 s_1$ could all be the intended message, making the code useless.
\begin{table}[H]
\centering
\begin{tabular}{c l l l}
Source Code $X$ & Prefix Code $Y_0$ & Suffix Code $Y_1$ & Nonsingular Code $Y_2$ \\
\hline
$s_0$ & 0 & 0 & 0 \\
$s_1$ & 10 & 01 & 10 \\
$s_2$ & 110 & 011 & 00 \\
$s_3$ & 1110 & 0111 & 01 \\
\end{tabular}
\caption{Examples of different properties of codes}
\label{tab:code-properties}
\end{table}
Another interesting property of a code that is specifically important for transmission but less so for storage, is
being prefix-free.
A prefix code (which is said to be prefix-free) can be decoded by the receiver of the symbol as soon as it is received
because no code word $y_j$ is the prefix of another valid code word.
As shown in \autoref{tab:code-properties} $Y_0$ is a prefix code, in this case more specifically called a \textit{comma code}
because each code word is separated by a trailing 0 from the next code word.
$Y_1$ in contrast is called a \textit{capital code} (capitalizes the beginning of each word) and is not a prefix code.
In the case of the capital code in fact every word other than the longest possible code word is a prefix of the longer words
lower in the table. As a result, the receiver cannot instantaneously decode each word but rather has to wait for the leading 0
of the next codeword.
Further, a code is said to be \textit{efficient} if it has the smallest possible average word length, i.e. matches
the entropy of the source alphabet.
\section{Kraft-McMillan inequality}
The Kraft-McMillan inequality gives a necessary and sufficient condition for the existence of a prefix code.
In the form shown in \autoref{eq:kraft-mcmillan} it is intuitive to understand given a code tree.
Because prefix codes require code words to only be situated on the leaves of a code tree,
for every code word $i$ using an alphabet of size $r$, it uses up exactly $r^{-l_i}$ of the available code words.
The sum over all of them can thus never be larger than one else
the code will not be uniquely decodable \cite{enwiki:kraft-mcmillan}.
\begin{equation}
\sum_l r^{-l_i} \leq 1
\label{eq:kraft-mcmillan}
\end{equation}
\section{Shannon-Fano}
Shannon-Fano coding is one of the earliest methods for constructing prefix codes.
It is a top-down method that divides symbols into equal groups based on their probabilities,
recursively partitioning them to assign shorter codewords to more frequent events.
\begin{algorithm}
\caption{Shannon-Fano compression}
\label{alg:shannon-fano}
\begin{algorithmic}
\Procedure{ShannonFano}{symbols, probabilities}
\If{length(symbols) $= 1$}
\State \Return codeword for single symbol
\EndIf
\State $\text{current\_sum} \gets 0$
\State $\text{split\_index} \gets 0$
\For{$i \gets 1$ \textbf{to} length(symbols)}
\If{$|\text{current\_sum} + \text{probabilities}[i] - 0.5| < |\text{current\_sum} - 0.5|$}
\State $\text{current\_sum} \gets \text{current\_sum} + \text{probabilities}[i]$
\State $\text{split\_index} \gets i$
\EndIf
\EndFor
\State $\text{left\_group} \gets \text{symbols}[1 : \text{split\_index}]$
\State $\text{right\_group} \gets \text{symbols}[\text{split\_index} + 1 : \text{length(symbols)}]$
\State Assign prefix ``0'' to codes from ShannonFano($\text{left\_group}, \ldots$)
\State Assign prefix ``1'' to codes from ShannonFano($\text{right\_group}, \ldots$)
\EndProcedure
\end{algorithmic}
\end{algorithm}
While Shannon-Fano coding guarantees the generation of a prefix-free code with an average word length close to the entropy,
it is not guaranteed to be optimal. In practice, it often generates codewords that are only slightly longer than necessary.
Weaknesses of the algorithm also include the non-trivial partitioning phase \cite{enwiki:partition}, which can in practice however be solved relatively efficiently.
Due to the aforementioned limitations, neither of the two historically slightly ambiguous Shannon-Fano algorithms are almost never used,
in favor of the Huffman coding as described in the next section.
\section{Huffman Coding}
\label{sec:huffman}
Huffman coding is an optimal prefix coding algorithm that minimizes the expected codeword length
for a given set of symbol probabilities. Developed by David Huffman in 1952, it guarantees optimality
by constructing a binary tree where the most frequent symbols are assigned the shortest codewords.
Huffman coding achieves the theoretical limit of entropy for discrete memoryless sources,
making it one of the most important compression techniques in information theory.
Unlike Shannon-Fano, which uses a top-down approach, Huffman coding employs a bottom-up strategy.
The algorithm builds the code tree by iteratively combining the two symbols with the lowest probabilities
into a new internal node. This greedy approach ensures that the resulting tree minimizes the weighted path length,
where the weight of each symbol is its probability.
\begin{algorithm}
\caption{Huffman coding algorithm}
\label{alg:huffman}
\begin{algorithmic}
\Procedure{Huffman}{symbols, probabilities}
\State Create a leaf node for each symbol and add it to a priority queue
\While{priority queue contains more than one node}
\State Extract two nodes with minimum frequency: $\text{left}$ and $\text{right}$
\State Create a new internal node with frequency $\text{freq(left)} + \text{freq(right)}$
\State Set $\text{left}$ as the left child and $\text{right}$ as the right child
\State Add the new internal node to the priority queue
\EndWhile
\State $\text{root} \gets$ remaining node in priority queue
\State Traverse tree and assign codewords: ``0'' for left edges, ``1'' for right edges
\State \Return codewords
\EndProcedure
\end{algorithmic}
\end{algorithm}
The optimality of Huffman coding can be proven by exchange arguments.
The key insight is that if two codewords have the maximum length in an optimal code, they must correspond to the two least frequent symbols.
Moreover, these two symbols can be combined into a single meta-symbol without affecting optimality,
which leads to a recursive structure that guarantees Huffman's method produces an optimal code.
The average codeword length $L_{\text{Huffman}}$ produced by Huffman coding satisfies the following bounds:
\begin{equation}
H(X) \leq L_{\text{Huffman}} < H(X) + 1
\label{eq:huffman-bounds}
\end{equation}
where $H(X)$ is the entropy of the source. This means Huffman coding is guaranteed to be within one bit
of the theoretical optimum. In practice, when symbol probabilities are powers of $\frac{1}{2}$,
Huffman coding achieves perfect compression and $L_{\text{Huffman}} = H(X)$.
The computational complexity of Huffman coding is $O(n \log n)$, where $n$ is the number of distinct symbols.
A priority queue implementation using a binary heap achieves this bound, making Huffman coding
efficient even for large alphabets. Its widespread use in compression formats such as DEFLATE, JPEG, and MP3
testifies to its practical importance.
However, Huffman coding has limitations. First, it requires knowledge of the probability distribution
of symbols before encoding, necessitating a preprocessing pass or transmission of frequency tables.
Second, it assigns an integer number of bits to each symbol, which can be suboptimal
when symbol probabilities do not align well with powers of two.
Symbol-by-symbol coding imposes a constraint that is often unneeded since codes will usually be packed in long sequences,
leaving room for further optimization as provided by Arithmetic Coding.
\section{Arithmetic Coding}
Arithmetic coding is a modern compression technique that encodes an entire message as a single interval
within the range $[0, 1)$, as opposed to symbol-by-symbol coding used by Huffman.
By iteratively refining this interval based on the probabilities of the symbols in the message,
arithmetic coding can achieve compression rates that approach the entropy of the source.
Its ability to handle non-integer bit lengths makes it particularly powerful
for applications requiring high compression efficiency.
In the basic form, a message is first written in the base of the alphabet with a leading '$0.$': $ \text{ABBCAB} = 0.011201_3$,
in this case yielding a ternary number as the alphabet is $ |\{A,B,C\}| = 3 $.
This number can then be encoded to the target base (usually 2) with sufficient precision to yield back the original number, resulting in $0.0010110001_2$.
The decoder only gets the rational number $q$ and the length $n$ of the original message.
The encoding can then be easily reversed by changing base and rounding to $n$ digits.
In general, arithmetic coding can produce near-optimal output for any given source probability distribution.
This is achieved by adjusting the intervals that are interpreted as a given source symbol.
Given the following source probabilities of $p_A = \frac{6}{8}, p_B = p_C = \frac{1}{8}$ the intervals would be adjusted to
$ A= [0,\frac{6}{8}), B=(\frac{6}{8}, \frac{7}{8}), C=(\frac{7}{8},1]$.
Instead of transforming the base of the number and rounding to appropriate precision, the encoder recursively refines the interval and in the end chooses a number inside that interval.
\begin{enumerate}
\item \textbf{Symbol:A} $A=[0, \frac{6}{8})$
\item $ A= [0,(\frac{6}{8})^2), B=((\frac{6}{8})^2, \frac{7}{8} \cdot \frac{6}{8}), C=(\frac{7}{8} \cdot \frac{6}{8},1 \cdot \frac{6}{8}]$.
\item \textbf{Symbol:B} $B=((\frac{6}{8})^2, \frac{7}{8} \cdot \frac{6}{8}) = (\frac{36}{64}, \frac{42}{64})$
\end{enumerate}
Depending on implementation, the source message can also be encoded in base $n+1$, reserving room for a special \verb|END-OF-DATA| symbol that the decoder
will look for and consequently stop reading from the input $q$.
\section{LZW Algorithm}
The Lempel-Ziv-Welch (LZW) algorithm is a dictionary-based compression method that dynamically builds a dictionary
of recurring patterns in the data as compression proceeds. Unlike entropy-based methods such as Huffman or arithmetic coding,
LZW does not require prior knowledge of symbol probabilities, making it highly adaptable and efficient
for a wide range of applications, including image and text compression.
The algorithm was developed by Abraham Lempel and Jacob Ziv, with refinements by Terry Welch in 1984.
The fundamental insight of LZW is that many data sources contain repeating patterns that can be exploited
by replacing longer sequences with shorter codes. Rather than assigning variable-length codes to individual symbols
based on their frequency, LZW identifies recurring substrings and assigns them fixed-length codes.
As the algorithm processes the data, it dynamically constructs a dictionary that maps these patterns to codes,
without requiring the dictionary to be transmitted with the compressed data.
\begin{algorithm}
\caption{LZW compression algorithm}
\label{alg:lzw}
\begin{algorithmic}
\Procedure{LZWCompress}{data}
\State Initialize dictionary with all single characters
\State $\text{code} \gets$ next available code (typically 256 for byte alphabet)
\State $w \gets$ first symbol from data
\State $\text{output} \gets [\,]$
\For{each symbol $c$ in remaining data}
\If{$w + c$ exists in dictionary}
\State $w \gets w + c$
\Else
\State append $\text{code}(w)$ to output
\If{code $<$ max\_code}
\State Add $w + c$ to dictionary with code $\text{code}$
\State $\text{code} \gets \text{code} + 1$
\EndIf
\State $w \gets c$
\EndIf
\EndFor
\State append $\text{code}(w)$ to output
\State \Return output
\EndProcedure
\end{algorithmic}
\end{algorithm}
The decompression process is equally elegant. The decompressor initializes an identical dictionary
and reconstructs the original data by decoding the transmitted codes. Crucially, the decompressor
can reconstruct the dictionary entries on-the-fly as it processes the compressed data,
recovering the exact sequence of dictionary updates that occurred during compression.
This property is what allows the dictionary to remain implicit rather than explicitly transmitted.
\begin{algorithm}
\caption{LZW decompression algorithm}
\label{alg:lzw-decompress}
\begin{algorithmic}
\Procedure{LZWDecompress}{codes}
\State Initialize dictionary with all single characters
\State $\text{code} \gets$ next available code
\State $w \gets \text{decode}(\text{codes}[0])$
\State $\text{output} \gets w$
\For{each code $c$ in $\text{codes}[1:]$}
\If{$c$ exists in dictionary}
\State $k \gets \text{decode}(c)$
\Else
\State $k \gets w + w[0]$ \quad \{handle special case\}
\EndIf
\State append $k$ to output
\State Add $w + k[0]$ to dictionary with code $\text{code}$
\State $\text{code} \gets \text{code} + 1$
\State $w \gets k$
\EndFor
\State \Return output
\EndProcedure
\end{algorithmic}
\end{algorithm}
LZW's advantages make it particularly valuable for certain applications. First, it requires no statistical modeling
of the input data, making it applicable to diverse data types without prior analysis.
Second, the dictionary is built incrementally and implicitly, eliminating transmission overhead.
Third, it can achieve significant compression on data with repeating patterns, such as text, images, and structured data.
Fourth, the algorithm is relatively simple to implement and computationally efficient, with time complexity $O(n)$
where $n$ is the length of the input.
However, LZW has notable limitations. Its compression effectiveness is highly dependent on the structure and repetitiveness
of the input data. On truly random data with no repeating patterns, LZW can even increase the file size.
Additionally, the fixed size of the dictionary (typically 12 or 16 bits, allowing $2^12=4096$ or $2^16=65536$ entries)
limits its ability to adapt to arbitrarily large vocabularies of patterns.
When the dictionary becomes full, most implementations stop adding new entries, potentially reducing compression efficiency.
LZW has seen widespread practical deployment in compression standards and applications.
The GIF image format uses LZW compression, as does the TIFF image format in some variants.
The relationship between dictionary-based methods like LZW and entropy-based methods like Huffman
is complementary rather than competitive. LZW excels at capturing structure and repetition,
while entropy-based methods optimize symbol encoding based on probability distributions.
This has led to hybrid approaches that combine both techniques, such as the Deflate algorithm,
which uses LZSS (a variant of LZ77) followed by Huffman coding of the output to achieve better compression ratios.
\printbibliography
\end{document}

View File

@@ -0,0 +1,2 @@
Error correction codes (theorems: Hamming condition, Varsham-Gilbert)

View File

@@ -0,0 +1,2 @@
Cryptography: DES, AES and RSA

View File

@@ -26,7 +26,7 @@
\begin{document} \begin{document}
\maketitle \maketitle
\section{What is entropy?} \section{Introduction}
Across disciplines, entropy is a measure of uncertainty or randomness. Across disciplines, entropy is a measure of uncertainty or randomness.
Originating in classical thermodynamics, Originating in classical thermodynamics,
over time it has been applied in different sciences such as chemistry and information theory. over time it has been applied in different sciences such as chemistry and information theory.
@@ -348,13 +348,22 @@ The capacity of the binary symmetric channel is given by:
where $H_2(p) = -p \log_2(p) - (1-p)\log_2(1-p)$ is the binary entropy function. where $H_2(p) = -p \log_2(p) - (1-p)\log_2(1-p)$ is the binary entropy function.
As $p$ increases, uncertainty grows and channel capacity declines. As $p$ increases, uncertainty grows and channel capacity declines.
When $p = 0.5$, output bits are completely random and no information can be transmitted ($C = 0$). When $p = 0.5$, output bits are completely random and no information can be transmitted ($C = 0$).
As already shown in \autoref{fig:graph-entropy}, an error rate over $p > 0.5$ is equivalent to $ 1-p < 0.5$, As illustrated in \autoref{fig:graph-entropy}, an error rate over $p > 0.5$ is equivalent to $ 1-p < 0.5$,
though not relevant in practice. though not relevant in practice.
Shannons theorem is not constructive as it does not provide an explicit method for constructing such efficient codes, Shannons theorem is not constructive as it does not provide an explicit method for constructing such efficient codes,
but it guarantees their existence. but it guarantees their existence.
In practice, structured codes such as Hamming and ReedSolomon codes are employed to approach channel capacity. In practice, structured codes such as Hamming and ReedSolomon codes are employed to approach channel capacity.
\section{Conclusion}
Entropy provides a fundamental measure of uncertainty and information,
bridging concepts from thermodynamics to modern communication theory.
Beyond the provided examples, the concept of entropy has far-reaching applications in diverse fields:
from cryptography, where it quantifies randomness and security,
to statistical physics, where it characterizes disorder in complex systems,
to biology, connecting molecular information and population diversity.
\printbibliography \printbibliography
\end{document} \end{document}