![](https://www.kde.cs.uni-kassel.de/wp-content/plugins/bibsonomy-csl/img/loading.gif)
List of publications and preprints by Tom Hanika
2024
- 1.Hirth, J., Horn, V., Stumme, G., Hanika, T.: Ordinal motifs in lattices. Information Sciences. 659, 120009 (2024). https://doi.org/https://doi.org/10.1016/j.ins.2023.120009.
@article{HIRTH2024120009,
author = {Hirth, Johannes and Horn, Viktoria and Stumme, Gerd and Hanika, Tom},
journal = {Information Sciences},
keywords = {itegpub},
pages = 120009,
title = {Ordinal motifs in lattices},
volume = 659,
year = 2024
}%0 Journal Article
%1 HIRTH2024120009
%A Hirth, Johannes
%A Horn, Viktoria
%A Stumme, Gerd
%A Hanika, Tom
%D 2024
%J Information Sciences
%P 120009
%R https://doi.org/10.1016/j.ins.2023.120009
%T Ordinal motifs in lattices
%U https://www.sciencedirect.com/science/article/pii/S0020025523015943
%V 659 - 1.Budde, K.B., Rellstab, C., Heuertz, M., Gugerli, F., Hanika, T., Verdú, M., Pausas, J.G., González-Martínez, S.C.: Divergent selection in a Mediterranean pine on local spatial scales. Journal of Ecology. 112, (2024). https://doi.org/https://doi.org/10.1111/1365-2745.14231.Abstract The effects of selection on an organism's genome are hard to detect on small spatial scales, as gene flow can swamp signatures of local adaptation. Therefore, most genome scans to detect signatures of environmental selection are performed on large spatial scales; however, divergent selection on the local scale (e.g. between contrasting soil conditions) has also been demonstrated, in particular for herbaceous plants. Here, we hypothesised that in topographically complex landscapes, microenvironment variability is strong enough to leave a selective footprint in the genomes of long-lived organisms. To test this, we investigated paired south- versus north-facing Pinus pinaster stands on the local scale, with trees growing in close vicinity (≤820 m distance between paired south- and north-facing stands), in a Mediterranean mountain area. While trees on north-facing slopes experience less radiation, trees on south-facing slopes suffer from especially harsh conditions, particularly during the dry summer season. Two outlier analyses consistently revealed five putatively adaptive loci (out of 4034), in candidate genes two of which encoded non-synonymous substitutions. Additionally, one locus showed consistent allele frequency differences in all three stand pairs indicating divergent selection despite high gene flow on the local scale. Permutation tests demonstrated that our findings were robust. Functional annotation of these candidate genes revealed biological functions related to abiotic stress response, such as water availability, in other plant species. Synthesis. Our study highlights how divergent selection in heterogeneous microenvironments shapes and maintains the functional genetic variation within populations of long-lived forest tree species, being the first to focus on adaptive genetic divergence between south- and north-facing slopes within continuous forest stands. This is especially relevant in the current context of climate change, as this variation is at the base of plant population responses to future climate.
@article{https://doi.org/10.1111/1365-2745.14231,
abstract = {Abstract The effects of selection on an organism's genome are hard to detect on small spatial scales, as gene flow can swamp signatures of local adaptation. Therefore, most genome scans to detect signatures of environmental selection are performed on large spatial scales; however, divergent selection on the local scale (e.g. between contrasting soil conditions) has also been demonstrated, in particular for herbaceous plants. Here, we hypothesised that in topographically complex landscapes, microenvironment variability is strong enough to leave a selective footprint in the genomes of long-lived organisms. To test this, we investigated paired south- versus north-facing Pinus pinaster stands on the local scale, with trees growing in close vicinity (≤820 m distance between paired south- and north-facing stands), in a Mediterranean mountain area. While trees on north-facing slopes experience less radiation, trees on south-facing slopes suffer from especially harsh conditions, particularly during the dry summer season. Two outlier analyses consistently revealed five putatively adaptive loci (out of 4034), in candidate genes two of which encoded non-synonymous substitutions. Additionally, one locus showed consistent allele frequency differences in all three stand pairs indicating divergent selection despite high gene flow on the local scale. Permutation tests demonstrated that our findings were robust. Functional annotation of these candidate genes revealed biological functions related to abiotic stress response, such as water availability, in other plant species. Synthesis. Our study highlights how divergent selection in heterogeneous microenvironments shapes and maintains the functional genetic variation within populations of long-lived forest tree species, being the first to focus on adaptive genetic divergence between south- and north-facing slopes within continuous forest stands. This is especially relevant in the current context of climate change, as this variation is at the base of plant population responses to future climate.},
author = {Budde, Katharina B. and Rellstab, Christian and Heuertz, Myriam and Gugerli, Felix and Hanika, Tom and Verdú, Miguel and Pausas, Juli G. and González-Martínez, Santiago C.},
journal = {Journal of Ecology},
keywords = {itegpub},
number = 2,
title = {Divergent selection in a Mediterranean pine on local spatial scales},
volume = 112,
year = 2024
}%0 Journal Article
%1 https://doi.org/10.1111/1365-2745.14231
%A Budde, Katharina B.
%A Rellstab, Christian
%A Heuertz, Myriam
%A Gugerli, Felix
%A Hanika, Tom
%A Verdú, Miguel
%A Pausas, Juli G.
%A González-Martínez, Santiago C.
%D 2024
%J Journal of Ecology
%N 2
%R https://doi.org/10.1111/1365-2745.14231
%T Divergent selection in a Mediterranean pine on local spatial scales
%U https://besjournals.onlinelibrary.wiley.com/doi/abs/10.1111/1365-2745.14231
%V 112
%X Abstract The effects of selection on an organism's genome are hard to detect on small spatial scales, as gene flow can swamp signatures of local adaptation. Therefore, most genome scans to detect signatures of environmental selection are performed on large spatial scales; however, divergent selection on the local scale (e.g. between contrasting soil conditions) has also been demonstrated, in particular for herbaceous plants. Here, we hypothesised that in topographically complex landscapes, microenvironment variability is strong enough to leave a selective footprint in the genomes of long-lived organisms. To test this, we investigated paired south- versus north-facing Pinus pinaster stands on the local scale, with trees growing in close vicinity (≤820 m distance between paired south- and north-facing stands), in a Mediterranean mountain area. While trees on north-facing slopes experience less radiation, trees on south-facing slopes suffer from especially harsh conditions, particularly during the dry summer season. Two outlier analyses consistently revealed five putatively adaptive loci (out of 4034), in candidate genes two of which encoded non-synonymous substitutions. Additionally, one locus showed consistent allele frequency differences in all three stand pairs indicating divergent selection despite high gene flow on the local scale. Permutation tests demonstrated that our findings were robust. Functional annotation of these candidate genes revealed biological functions related to abiotic stress response, such as water availability, in other plant species. Synthesis. Our study highlights how divergent selection in heterogeneous microenvironments shapes and maintains the functional genetic variation within populations of long-lived forest tree species, being the first to focus on adaptive genetic divergence between south- and north-facing slopes within continuous forest stands. This is especially relevant in the current context of climate change, as this variation is at the base of plant population responses to future climate. - 1.Hanika, T., Hille, T.: What is the Intrinsic Dimension of Your Binary Data? - and How to Compute it Quickly. In: Cabrera, I.P., Ferr{{é}}, S., and Obiedkov, S.A. (eds.) Conceptual Knowledge Structures - First International Joint Conference, {CONCEPTS} 2024, C{{á}}diz, Spain, September 9-13, 2024, Proceedings. pp. 97–112. Springer (2024). https://doi.org/10.1007/978-3-031-67868-4\_7.
@inproceedings{DBLP:conf/concepts/HanikaH24,
author = {Hanika, Tom and Hille, Tobias},
booktitle = {Conceptual Knowledge Structures - First International Joint Conference, {CONCEPTS} 2024, C{{á}}diz, Spain, September 9-13, 2024, Proceedings},
editor = {Cabrera, Inma P. and Ferr{{é}}, S{{é}}bastien and Obiedkov, Sergei A.},
keywords = {itegpub},
pages = {97--112},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
title = {What is the Intrinsic Dimension of Your Binary Data? - and How to Compute it Quickly},
volume = 14914,
year = 2024
}%0 Conference Paper
%1 DBLP:conf/concepts/HanikaH24
%A Hanika, Tom
%A Hille, Tobias
%B Conceptual Knowledge Structures - First International Joint Conference, {CONCEPTS} 2024, C{{á}}diz, Spain, September 9-13, 2024, Proceedings
%D 2024
%E Cabrera, Inma P.
%E Ferr{{é}}, S{{é}}bastien
%E Obiedkov, Sergei A.
%I Springer
%P 97--112
%R 10.1007/978-3-031-67868-4\_7
%T What is the Intrinsic Dimension of Your Binary Data? - and How to Compute it Quickly
%U https://doi.org/10.1007/978-3-031-67868-4\_7
%V 14914 - 1.Hanika, T., Jäschke, R.: A Repository for Formal Contexts. In: Proceedings of the 1st International Joint Conference on Conceptual Knowledge Structures (2024).Data is always at the center of the theoretical development and investigation of the applicability of formal concept analysis. It is therefore not surprising that a large number of data sets are repeatedly used in scholarly articles and software tools, acting as de facto standard data sets. However, the distribution of the data sets poses a problem for the sustainable development of the research field. There is a lack of a central location that provides and describes FCA data sets and links them to already known analysis results. This article analyses the current state of the dissemination of FCA data sets, presents the requirements for a central FCA repository, and highlights the challenges for this.
@inproceedings{hanika2024repository,
abstract = {Data is always at the center of the theoretical development and investigation of the applicability of formal concept analysis. It is therefore not surprising that a large number of data sets are repeatedly used in scholarly articles and software tools, acting as de facto standard data sets. However, the distribution of the data sets poses a problem for the sustainable development of the research field. There is a lack of a central location that provides and describes FCA data sets and links them to already known analysis results. This article analyses the current state of the dissemination of FCA data sets, presents the requirements for a central FCA repository, and highlights the challenges for this.},
author = {Hanika, Tom and Jäschke, Robert},
booktitle = {Proceedings of the 1st International Joint Conference on Conceptual Knowledge Structures},
keywords = {repository},
title = {A Repository for Formal Contexts},
year = 2024
}%0 Conference Paper
%1 hanika2024repository
%A Hanika, Tom
%A Jäschke, Robert
%B Proceedings of the 1st International Joint Conference on Conceptual Knowledge Structures
%D 2024
%T A Repository for Formal Contexts
%U https://arxiv.org/abs/2404.04344
%X Data is always at the center of the theoretical development and investigation of the applicability of formal concept analysis. It is therefore not surprising that a large number of data sets are repeatedly used in scholarly articles and software tools, acting as de facto standard data sets. However, the distribution of the data sets poses a problem for the sustainable development of the research field. There is a lack of a central location that provides and describes FCA data sets and links them to already known analysis results. This article analyses the current state of the dissemination of FCA data sets, presents the requirements for a central FCA repository, and highlights the challenges for this. - 1.Ganter, B., Hanika, T., Hirth, J., Obiedkov, S.: Collaborative Hybrid Human {AI} Learning through Conceptual Exploration. In: Ericson, P., Khairova, N., and Vos, M.D. (eds.) Proceedings of the Workshops at the Third International Conference on Hybrid Human-Artificial Intelligence co-located with (HHAI) 2024), Malmö, Sweden, June 10-11, 2024. pp. 1–8. CEUR-WS.org (2024).
@inproceedings{DBLP:conf/hhai/GanterHHO24,
author = {Ganter, Bernhard and Hanika, Tom and Hirth, Johannes and Obiedkov, Sergei},
booktitle = {Proceedings of the Workshops at the Third International Conference on Hybrid Human-Artificial Intelligence co-located with (HHAI) 2024), Malmö, Sweden, June 10-11, 2024},
editor = {Ericson, Petter and Khairova, Nina and Vos, Marina De},
keywords = {itegpub},
pages = {1--8},
publisher = {CEUR-WS.org},
series = {{CEUR} Workshop Proceedings},
title = {Collaborative Hybrid Human {AI} Learning through Conceptual Exploration},
volume = 3825,
year = 2024
}%0 Conference Paper
%1 DBLP:conf/hhai/GanterHHO24
%A Ganter, Bernhard
%A Hanika, Tom
%A Hirth, Johannes
%A Obiedkov, Sergei
%B Proceedings of the Workshops at the Third International Conference on Hybrid Human-Artificial Intelligence co-located with (HHAI) 2024), Malmö, Sweden, June 10-11, 2024
%D 2024
%E Ericson, Petter
%E Khairova, Nina
%E Vos, Marina De
%I CEUR-WS.org
%P 1--8
%T Collaborative Hybrid Human {AI} Learning through Conceptual Exploration
%U https://ceur-ws.org/Vol-3825/tutorial.pdf
%V 3825 - 1.Hirth, J., Hanika, T.: The Geometric Structure of Topic Models, (2024).Topic models are a popular tool for clustering and analyzing textual data. They allow texts to be classified on the basis of their affiliation to the previously calculated topics. Despite their widespread use in research and application, an in-depth analysis of topic models is still an open research topic. State-of-the-art methods for interpreting topic models are based on simple visualizations, such as similarity matrices, top-term lists or embeddings, which are limited to a maximum of three dimensions. In this paper, we propose an incidence-geometric method for deriving an ordinal structure from flat topic models, such as non-negative matrix factorization. These enable the analysis of the topic model in a higher (order) dimension and the possibility of extracting conceptual relationships between several topics at once. Due to the use of conceptual scaling, our approach does not introduce any artificial topical relationships, such as artifacts of feature compression. Based on our findings, we present a new visualization paradigm for concept hierarchies based on ordinal motifs. These allow for a top-down view on topic spaces. We introduce and demonstrate the applicability of our approach based on a topic model derived from a corpus of scientific papers taken from 32 top machine learning venues.
@preprint{hirth2024geometric,
abstract = {Topic models are a popular tool for clustering and analyzing textual data. They allow texts to be classified on the basis of their affiliation to the previously calculated topics. Despite their widespread use in research and application, an in-depth analysis of topic models is still an open research topic. State-of-the-art methods for interpreting topic models are based on simple visualizations, such as similarity matrices, top-term lists or embeddings, which are limited to a maximum of three dimensions. In this paper, we propose an incidence-geometric method for deriving an ordinal structure from flat topic models, such as non-negative matrix factorization. These enable the analysis of the topic model in a higher (order) dimension and the possibility of extracting conceptual relationships between several topics at once. Due to the use of conceptual scaling, our approach does not introduce any artificial topical relationships, such as artifacts of feature compression. Based on our findings, we present a new visualization paradigm for concept hierarchies based on ordinal motifs. These allow for a top-down view on topic spaces. We introduce and demonstrate the applicability of our approach based on a topic model derived from a corpus of scientific papers taken from 32 top machine learning venues.},
author = {Hirth, Johannes and Hanika, Tom},
keywords = {kde},
title = {The Geometric Structure of Topic Models},
year = 2024
}%0 Generic
%1 hirth2024geometric
%A Hirth, Johannes
%A Hanika, Tom
%D 2024
%T The Geometric Structure of Topic Models
%X Topic models are a popular tool for clustering and analyzing textual data. They allow texts to be classified on the basis of their affiliation to the previously calculated topics. Despite their widespread use in research and application, an in-depth analysis of topic models is still an open research topic. State-of-the-art methods for interpreting topic models are based on simple visualizations, such as similarity matrices, top-term lists or embeddings, which are limited to a maximum of three dimensions. In this paper, we propose an incidence-geometric method for deriving an ordinal structure from flat topic models, such as non-negative matrix factorization. These enable the analysis of the topic model in a higher (order) dimension and the possibility of extracting conceptual relationships between several topics at once. Due to the use of conceptual scaling, our approach does not introduce any artificial topical relationships, such as artifacts of feature compression. Based on our findings, we present a new visualization paradigm for concept hierarchies based on ordinal motifs. These allow for a top-down view on topic spaces. We introduce and demonstrate the applicability of our approach based on a topic model derived from a corpus of scientific papers taken from 32 top machine learning venues. - 1.Hille, T., Stubbemann, M., Hanika, T.: Reproducibility and Geometric Intrinsic Dimensionality: An Investigation on Graph Neural Network Research. Transactions on Machine Learning Research. (2024).
@article{hille2024reproducibility,
author = {Hille, Tobias and Stubbemann, Maximilian and Hanika, Tom},
journal = {Transactions on Machine Learning Research},
keywords = {itegpub},
note = {Reproducibility Certification},
title = {Reproducibility and Geometric Intrinsic Dimensionality: An Investigation on Graph Neural Network Research.},
year = 2024
}%0 Journal Article
%1 hille2024reproducibility
%A Hille, Tobias
%A Stubbemann, Maximilian
%A Hanika, Tom
%D 2024
%J Transactions on Machine Learning Research
%T Reproducibility and Geometric Intrinsic Dimensionality: An Investigation on Graph Neural Network Research.
%U https://openreview.net/forum?id=CtEGxIqtud
2023
- 1.Stubbemann, M., Hanika, T., Schneider, F.M.: Intrinsic Dimension for Large-Scale Geometric Learning. Transactions on Machine Learning Research. (2023).The concept of dimension is essential to grasp the complexity of data. A naive approach to determine the dimension of a dataset is based on the number of attributes. More sophisticated methods derive a notion of intrinsic dimension (ID) that employs more complex feature functions, e.g., distances between data points. Yet, many of these approaches are based on empirical observations, cannot cope with the geometric character of contemporary datasets, and do lack an axiomatic foundation. A different approach was proposed by V. Pestov, who links the intrinsic dimension axiomatically to the mathematical concentration of measure phenomenon. First methods to compute this and related notions for ID were computationally intractable for large-scale real-world datasets. In the present work, we derive a computationally feasible method for determining said axiomatic ID functions. Moreover, we demonstrate how the geometric properties of complex data are accounted for in our modeling. In particular, we propose a principle way to incorporate neighborhood information, as in graph data, into the ID. This allows for new insights into common graph learning procedures, which we illustrate by experiments on the Open Graph Benchmark.
@article{stubbemann2022intrinsic,
abstract = {The concept of dimension is essential to grasp the complexity of data. A naive approach to determine the dimension of a dataset is based on the number of attributes. More sophisticated methods derive a notion of intrinsic dimension (ID) that employs more complex feature functions, e.g., distances between data points. Yet, many of these approaches are based on empirical observations, cannot cope with the geometric character of contemporary datasets, and do lack an axiomatic foundation. A different approach was proposed by V. Pestov, who links the intrinsic dimension axiomatically to the mathematical concentration of measure phenomenon. First methods to compute this and related notions for ID were computationally intractable for large-scale real-world datasets. In the present work, we derive a computationally feasible method for determining said axiomatic ID functions. Moreover, we demonstrate how the geometric properties of complex data are accounted for in our modeling. In particular, we propose a principle way to incorporate neighborhood information, as in graph data, into the ID. This allows for new insights into common graph learning procedures, which we illustrate by experiments on the Open Graph Benchmark.},
author = {Stubbemann, Maximilian and Hanika, Tom and Schneider, Friedrich Martin},
journal = {Transactions on Machine Learning Research},
keywords = {itegpub},
title = {Intrinsic Dimension for Large-Scale Geometric Learning},
year = 2023
}%0 Journal Article
%1 stubbemann2022intrinsic
%A Stubbemann, Maximilian
%A Hanika, Tom
%A Schneider, Friedrich Martin
%D 2023
%J Transactions on Machine Learning Research
%T Intrinsic Dimension for Large-Scale Geometric Learning
%U https://openreview.net/forum?id=85BfDdYMBY
%X The concept of dimension is essential to grasp the complexity of data. A naive approach to determine the dimension of a dataset is based on the number of attributes. More sophisticated methods derive a notion of intrinsic dimension (ID) that employs more complex feature functions, e.g., distances between data points. Yet, many of these approaches are based on empirical observations, cannot cope with the geometric character of contemporary datasets, and do lack an axiomatic foundation. A different approach was proposed by V. Pestov, who links the intrinsic dimension axiomatically to the mathematical concentration of measure phenomenon. First methods to compute this and related notions for ID were computationally intractable for large-scale real-world datasets. In the present work, we derive a computationally feasible method for determining said axiomatic ID functions. Moreover, we demonstrate how the geometric properties of complex data are accounted for in our modeling. In particular, we propose a principle way to incorporate neighborhood information, as in graph data, into the ID. This allows for new insights into common graph learning procedures, which we illustrate by experiments on the Open Graph Benchmark. - 1.Dürrschnabel, D., Hanika, T., Stumme, G.: Drawing Order Diagrams Through Two-Dimension Extension. Journal of Graph Algorithms and Applications. 27, 783–802 (2023). https://doi.org/10.7155/jgaa.00645.
@article{drrschnabel2023drawing,
author = {Dürrschnabel, Dominik and Hanika, Tom and Stumme, Gerd},
journal = {Journal of Graph Algorithms and Applications},
keywords = {itegpub},
number = 9,
pages = {783–802},
publisher = {Journal of Graph Algorithms and Applications},
title = {Drawing Order Diagrams Through Two-Dimension Extension},
volume = 27,
year = 2023
}%0 Journal Article
%1 drrschnabel2023drawing
%A Dürrschnabel, Dominik
%A Hanika, Tom
%A Stumme, Gerd
%D 2023
%I Journal of Graph Algorithms and Applications
%J Journal of Graph Algorithms and Applications
%N 9
%P 783–802
%R 10.7155/jgaa.00645
%T Drawing Order Diagrams Through Two-Dimension Extension
%U http://dx.doi.org/10.7155/jgaa.00645
%V 27 - 1.Hanika, T., Hirth, J.: Conceptual views on tree ensemble classifiers. International Journal of Approximate Reasoning. 159, 108930 (2023). https://doi.org/https://doi.org/10.1016/j.ijar.2023.108930.Random Forests and related tree-based methods are popular for supervised learning from table based data. Apart from their ease of parallelization, their classification performance is also superior. However, this performance, especially parallelizability, is offset by the loss of explainability. Statistical methods are often used to compensate for this disadvantage. Yet, their ability for local explanations, and in particular for global explanations, is limited. In the present work we propose an algebraic method, rooted in lattice theory, for the (global) explanation of tree ensembles. In detail, we introduce two novel conceptual views on tree ensemble classifiers and demonstrate their explanatory capabilities on Random Forests that were trained with standard parameters.
@article{HANIKA2023108930,
abstract = {Random Forests and related tree-based methods are popular for supervised learning from table based data. Apart from their ease of parallelization, their classification performance is also superior. However, this performance, especially parallelizability, is offset by the loss of explainability. Statistical methods are often used to compensate for this disadvantage. Yet, their ability for local explanations, and in particular for global explanations, is limited. In the present work we propose an algebraic method, rooted in lattice theory, for the (global) explanation of tree ensembles. In detail, we introduce two novel conceptual views on tree ensemble classifiers and demonstrate their explanatory capabilities on Random Forests that were trained with standard parameters.},
author = {Hanika, Tom and Hirth, Johannes},
journal = {International Journal of Approximate Reasoning},
keywords = {xai},
pages = 108930,
title = {Conceptual views on tree ensemble classifiers},
volume = 159,
year = 2023
}%0 Journal Article
%1 HANIKA2023108930
%A Hanika, Tom
%A Hirth, Johannes
%D 2023
%J International Journal of Approximate Reasoning
%P 108930
%R https://doi.org/10.1016/j.ijar.2023.108930
%T Conceptual views on tree ensemble classifiers
%U https://www.sciencedirect.com/science/article/pii/S0888613X23000610
%V 159
%X Random Forests and related tree-based methods are popular for supervised learning from table based data. Apart from their ease of parallelization, their classification performance is also superior. However, this performance, especially parallelizability, is offset by the loss of explainability. Statistical methods are often used to compensate for this disadvantage. Yet, their ability for local explanations, and in particular for global explanations, is limited. In the present work we propose an algebraic method, rooted in lattice theory, for the (global) explanation of tree ensembles. In detail, we introduce two novel conceptual views on tree ensemble classifiers and demonstrate their explanatory capabilities on Random Forests that were trained with standard parameters.