sdpython
diff --git a/‎.github/workflows/check_urls.yml‎
Lines changed: 6 additions & 6 deletions b/‎.github/workflows/check_urls.yml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎.github/workflows/documentation.yml‎
Lines changed: 29 additions & 22 deletions b/‎.github/workflows/documentation.yml‎
Lines changed: 29 additions & 22 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.rst‎
Lines changed: 4 additions & 5 deletions b/‎README.rst‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎_doc/_static/project_ico.png‎
-114 Bytes b/‎_doc/_static/project_ico.png‎
-114 Bytes
diff --git a/‎_doc/c_clus/gauss_mixture.rst‎
Lines changed: 1 addition & 1 deletion b/‎_doc/c_clus/gauss_mixture.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎_doc/c_garden/quantization.rst‎
Lines changed: 51 additions & 11 deletions b/‎_doc/c_garden/quantization.rst‎
Lines changed: 51 additions & 11 deletions
diff --git a/‎_doc/c_metric/pvalues.rst‎
Lines changed: 1 addition & 1 deletion b/‎_doc/c_metric/pvalues.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎_doc/c_metric/roc.rst‎
Lines changed: 0 additions & 2 deletions b/‎_doc/c_metric/roc.rst‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎_doc/c_ml/missing_values_mf.rst‎
Lines changed: 3 additions & 3 deletions b/‎_doc/c_ml/missing_values_mf.rst‎
Lines changed: 3 additions & 3 deletions
@@ -17,9 +17,9 @@ jobs:
         print_all: false
         timeout: 2
         retry_count# : 2
-        # exclude_urls: https://github, ...
-        # exclude_patterns: https://github.com/...
-        force_pass : true
+        # exclude_urls: https://dumps.wikimedia.org/other/pageviews/%Y/%Y-%m/pageviews-%Y%m%d-%H0000.gz,https://dumps.wikimedia.org/frwiki/latest/latest-all-titles-in-ns0.gz
+        exclude_patterns: https://dumps.wikimedia.org/
+        # force_pass : true
 
     - name: urls-checker-docs
       uses: urlstechie/urlchecker-action@master
@@ -29,6 +29,6 @@ jobs:
         print_all: false
         timeout: 2
         retry_count# : 2
-        # exclude_urls: https://github, ...
-        # exclude_patterns: https://github.com/...
-        force_pass : true
+        exclude_urls: https://hal.archives-ouvertes.fr/hal-00990252/document
+        exclude_patterns: https://www.data.gouv.fr/fr/datasets/r/e3d83ab3-dc52-4c99-abaf-8a38050cc68c
+        # force_pass : true
@@ -1,24 +1,9 @@
-name: Documentation
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-  schedule:
-    #        ┌───────────── minute (0 - 59)
-    #        │  ┌───────────── hour (0 - 23)
-    #        │  │ ┌───────────── day of the month (1 - 31)
-    #        │  │ │ ┌───────────── month (1 - 12 or JAN-DEC)
-    #        │  │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    #        │  │ │ │ │
-    #        │  │ │ │ │
-    #        │  │ │ │ │
-    #        *  * * * *
-    - cron: '30 1 * * 0'
+name: Documentation and Code Coverage
+
+on: [push]
 
 jobs:
-  build_wheels:
+  run:
     name: Build documentation on ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
     strategy:
@@ -41,12 +26,34 @@ jobs:
       - name: Install requirements
         run: python -m pip install -r requirements.txt
 
-      - name: Install
-        run: python setup.py install
-
       - name: Install requirements dev
         run: python -m pip install -r requirements-dev.txt
 
+      - name: Cache pip
+        uses: actions/cache@v2
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('requirements-dev.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-
+            ${{ runner.os }}-
+
+      - name: Generate coverage report
+        run: |
+          pip install pytest
+          pip install pytest-cov
+          export PYTHONPATH=.
+          pytest --cov=./mlstatpy/ --cov-report=xml --durations=10 --ignore-glob=**LONG*.py --ignore-glob=**notebook*.py
+          export PYTHONPATH=
+
+      - name: Upload coverage reports to Codecov
+        uses: codecov/codecov-action@v3
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+
+      - name: Install
+        run: python setup.py install
+
       - name: Copy license
         run: cp LICENSE* ./_doc
       - name: Copy changelogs
 
@@ -12,6 +12,8 @@ build/*
 onnxruntime_profile*
 prof
 temp_*
+_doc/CHANGELOGS.rst
+_doc/LICENSE.txt
 _doc/auto_examples/*
 _doc/examples/_cache/*
 _doc/examples/onnxruntime_profile*
 
@@ -1,8 +1,4 @@
 
-.. image:: https://travis-ci.com/sdpython/mlstatpy.svg?branch=main
-    :target: https://app.travis-ci.com/github/sdpython/mlstatpy
-    :alt: Build status
-
 .. image:: https://ci.appveyor.com/api/projects/status/5env33qptorgshaq?svg=true
     :target: https://ci.appveyor.com/project/sdpython/mlstatpy
     :alt: Build Status Windows
@@ -24,6 +20,9 @@
     :alt: GitHub Issues
     :target: https://github.com/sdpython/mlstatpy/issues
 
+.. image:: https://codecov.io/gh/sdpython/mlstatpy/branch/master/graph/badge.svg?token=2gKZsIVL3e 
+    :target: https://codecov.io/gh/sdpython/mlstatpy
+
 .. _l-README:
 
 mlstatpy: détours mathématiques autour du machine learning
@@ -45,4 +44,4 @@ algorithm, a graph edit distance, some helpers on Wikipedia data,
 an algorithm to convert decision trees into neural network.
 
 * `GitHub/mlstatpy <https://github.com/sdpython/mlstatpy/>`_
-* `documentation <http://www.xavierdupre.fr/app/mlstatpy/helpsphinx/index.html>`_
+* `documentation <https://sdpython.github.io/doc/mlstatpy/>`_
@@ -56,7 +56,7 @@ L'estimation d'une telle densité s'effectue par l'intermédiaire
 d'un algorithme de type `Expectation Maximization (EM) <https://fr.wikipedia.org/wiki/Algorithme_esp%C3%A9rance-maximisation>`_
 (voir [Dempster1977]_) ou de ses variantes
 `SEM <https://fr.wikipedia.org/wiki/Algorithme_esp%C3%A9rance-maximisation#Algorithme_SEM>`_,
-`SAEM <http://wiki.webpopix.org/index.php/The_SAEM_algorithm_for_estimating_population_parameters>`_, ...
+`SAEM <https://wiki.inria.fr/popix/The_SAEM_algorithm_for_estimating_population_parameters>`_, ...
 (voir [Celeux1985]_, [Celeux1985b]_).
 La sélection du nombre de lois dans le mélange reste un
 problème ouvert abordé par l'article [Biernacki2001]_.
 
@@ -47,21 +47,21 @@ dans l'intervalle *[0, 255]*, 0 à gauche, 255 à droite.
 .. math::
 
     \begin{array}{rcl}
-    q_1(z, \lambda, x) &=& c_{0}^{255}\pa{\intf_{i8}{\frac{x}{\lambda}} + z} \text{ quantization}\\
+    q_1(z, \lambda, x) &=& c_{0}^{255}\pa{\intf{\frac{x}{\lambda}}_{i8} + z} \text{ quantization}\\
     q_2(z, \lambda, i) &=& \lambda(i - z) \text{ déquantization} \\
     q(z, \lambda, x) &=& q_2(z, \lambda, q_1(z, \lambda, x)) \\
-    &=& \lambda\pa{c_{0}^{255}\pa{\intf_{i8}{\frac{x}{\lambda}} + z} - z} \\
-    &=& \lambda\intf_{i8,z}{\frac{x}{\lambda}}
+    &=& \lambda\pa{c_{0}^{255}\pa{\intf{\frac{x}{\lambda}}_{i8} + z} - z} \\
+    &=& \lambda\intf{\frac{x}{\lambda}}_{i8,z}
     \end{array}
 
-La fonction :math:`\intf_{i8,z}{x}` est la partie entière asociée à la fonction
+La fonction :math:`\intf{x}_{i8,z}` est la partie entière asociée à la fonction
 :math:`c_{0}^{255}(i)`.
 
 .. math::
 
-    \norm{B - q(z,\lambda,B)}^2 = \sum_{ij} \pa{b_{ij} - \lambda\intf_{i8,z}{\frac{x}{\lambda}}}^2
+    \norm{B - q(z,\lambda,B)}^2 = \sum_{ij} \pa{b_{ij} - \lambda\intf{\frac{x}{\lambda}}_{i8,z}}^2
 
-Le problème est la fonction :math:`\intf_{i8,z}{.}` qui n'est pas dérivable.
+Le problème est la fonction :math:`\intf{.}_{i8,z}` qui n'est pas dérivable.
 C'est un problème d'optimisation discrète. Le paramètre :math:`\lambda`
 est appelé *scale* ou *échelle*. Il peut y en avoir un ou plusieurs
 mais dans ce cas, on considère les différentes parties de *B*
@@ -97,20 +97,60 @@ inférieur (ou le plus proche).
 
 .. math::
 
-    \norm{B - q(z,\lambda,B)}^2 = \sum_{ij} \pa{b_{ij} - \lambda\intf_{f8,z}{\frac{x}{\lambda}} }^2
+    \norm{B - q(z,\lambda,B)}^2 = \sum_{ij} \pa{b_{ij} - \lambda\intf{\frac{x}{\lambda}}_{f8,z} }^2
 
 Optimisation
 ============
 
 L'idée est de traiter la discrétisation sur un ensemble fini de valeurs,
-quel qu'il soit, des entiers ou des réels codés sur 8 bits. On note Cette
+quel qu'il soit, des entiers ou des réels codés sur 8 bits. On note cet
 ensemble :math:`(d_1, ..., d_n)`. On réécrit le problème d'optimisation :
 
 .. math::
 
     \begin{array}{rcl}
-    \norm{B - q(z,\lambda,B)}^2 &=& \sum_{ij} \pa{b_{ij} - \lambda\intf_{f8,z}{\frac{x}{\lambda}} }^2 \\
-    &=& \sum_{k=1}^{n} \sum_{ij} \pa{b_{ij} - \lambda\intf_{f8}{\frac{x}{\lambda}} }^2
-    \indicatrice{\intf_{f8}{\frac{x}{\lambda}} = d_k}
+    \norm{B - q(z,\lambda,B)}^2 &=& \sum_{ij} \pa{b_{ij} - \lambda\intf{\frac{x}{\lambda}}_{f8,z} }^2 \\
+    &=& \sum_{k=1}^{n} \sum_{ij} \pa{b_{ij} - \lambda\intf{\frac{x}{\lambda}}_{f8} }^2
+    \indicatrice{\intf{\frac{x}{\lambda}}_{f8} = d_k} \\
+    &=& \sum_{k=1}^{n} \sum_{ij} \pa{b_{ij} - \lambda d_k }^2
+    \indicatrice{\intf{\frac{x}{\lambda}}_{f8} = d_k} \\
     \end{array}
 
+On note :math:`K(u)=\frac{1}{\sqrt{2\pi}}e^{-\frac{1}{2}u^2}` le noyau gaussien.
+
+.. math::
+
+    \begin{array}{rcl}
+    \norm{B - q(z,\lambda,B)}^2 &=& \lim_{h\to 0} \sum_{k=1}^{n} \sum_{ij} \pa{b_{ij} - \lambda d_k }^2
+    \frac{1}{h} K\pa{\frac{b_{ij} - \lambda d_k}{h}}\indicatrice{\intf{\frac{x}{\lambda}}_{f8} = d_k}
+    \end{array}
+
+Cette notation ne tient pas compte du décalage *z* qu'on peut ajouter comme suit :
+
+.. math::
+
+    \begin{array}{rcl}
+    \norm{B - q(z,\lambda,B)}^2 &=& \lim_{h\to 0} \sum_{k=1}^{n} \sum_{ij} \pa{b_{ij} - \lambda d_k - z }^2
+    \frac{1}{h} K\pa{\frac{b_{ij} - \lambda d_k - z}{h}}\indicatrice{\intf{\frac{x}{\lambda}}_{?,z} = d_k}
+    \end{array}
+
+Le problème est beaucoup plus simple à résoudre si on enlève l'indicatrice
+et la fonction devient dérivable. L'idée est de regarder l'évolution des valeurs trouvées
+pour :math:`\lambda` et *z* en faisant tendre *h* vers 0.
+On commence par le plus simple, le cas float 8 pour lequel on impose :math:`z=0`.
+
+.. math::
+
+    f(B,\lambda,h) = \frac{1}{h} \sum_{k=1}^{n} \sum_{ij} \pa{b_{ij} - \lambda d_k - z }^2
+    K\pa{\frac{b_{ij} - \lambda d_k - z}{h}}
+
+Si on suppose que les coefficients de *B* suivent une certaine loi de probabilité,
+ce calcul devient une somme d'espérence.
+
+.. math::
+
+    f(X,\lambda,h) = \frac{1}{h} \sum_{k=1}^{n} \esp{X - \lambda d_k - z }^2
+    K\pa{\frac{X - \lambda d_k - z}{h}}
+
+Résolution
+==========
@@ -435,7 +435,7 @@ Bibliographie
 =============
 
 * `p-Value and Statistical Practice
-  <https://www.stat.columbia.edu/~gelman/research/published/pvalues3.pdf>`_
+  <http://www.stat.columbia.edu/~gelman/research/published/pvalues3.pdf>`_
 * `An investigation of the false discovery rate and the misinterpretation of p-values
   <https://rsos.royalsocietypublishing.org/content/royopensci/1/3/140216.full.pdf>`_
 * :epkg:`Holm-Bonferroni method`
@@ -620,8 +620,6 @@ courbes ROC obtenues pour chacune des classes prise séparément
 Exemple
 =======
 
-Voir `ROC <http://www.xavierdupre.fr/app/ensae_teaching_cs/helpsphinx/antiseches/ml_basic/plot_regression.html#sphx-glr-antiseches-ml-basic-plot-roc-py>`_.
-
 .. [Agarwal2005] Generalization Bounds for the Area Under the ROC Curve (2005),
    Shivani Agarwal, Thore Graepel, Ralf Herbich, Sariel Har-Peled, Dan Roth
    *Journal of Machine Learning Research, volume 6, pages 393-425*
 
@@ -291,8 +291,7 @@ revient à déterminer les coordonnées de la projection d'un nouveau point :mat
 dans le plan défini par les vecteurs de la matrice :math:`H`.
 Pour de nouvelles observations :math:`M_2=X_{q+1}`,
 la fonction `transform
-<https://scikit-learn.org/stable/modules/generated/sklearn.decomposition
-.NMF.html#sklearn.decomposition.NMF.transform>`_
+<https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html#sklearn.decomposition.NMF.transform>`_
 de la classe :class:`sklearn.decomposition.NMF` réestime une matrice
 :math:`W_2` qui projette les vecteurs lignes de :math:`M_2` sur
 les vecteurs de *H* en conservant des coefficients de projection positifs.
@@ -349,7 +348,8 @@ avec une factorisation de matrices. On peut également se server de la méthode
 pour calculer une ACP avec des valeurs manquantes.
 
 * `Imputation de données manquantes <https://www.math.univ-toulouse.fr/~besse/Wikistat/pdf/st-m-app-idm.pdf>`_
-* `Principal component analysis with missing values: a comparative survey of methods <http://pbil.univ-lyon1.fr/members/dray/files/articles/dray2015a.pdf>`_
+* `Principal component analysis with missing values: a comparative survey of methods
+  <https://www.researchgate.net/publication/273901434_Principal_component_analysis_with_missing_values_a_comparative_survey_of_methods>`_
 
 Interprétation
 ++++++++++++++