summaryrefslogtreecommitdiffstats
path: root/source/t/texlive/prep/texmf_get.sh
blob: fab74ce5f2935ae0e1df276c6e7ea0f3805a36fe (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
#!/bin/bash

# texmf_get.sh (c) 2016-2018 Johannes Schoepfer, Germany, slackbuilds[at]schoepfer[dot]info
# All rights reserved.
#
# Redistribution and use of this script, with or without modification, is
# permitted provided that the following conditions are met:
#
# 1. Redistributions of this script must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#
#  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED
#  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
#  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
#  EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
#  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
#  OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
#  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
#  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
#  ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#  V 15.0.0
#
#  Prepare xz-compressed tarballs of texlive-texmf-trees based on texlive.tlpdb
#  This script takes care of dependencies(as far as these are present in texlive.tlpdb)
#  of collections and packages, and that every texlive-package is included only once.
#  The editions(base/extra/docs) should contain no binaries(exception biber)
# -base: the most usefull stuff, most binaries/scripts,
#  manpages for compiled binaries  65mb 2017-11-07
# -docs: -base documentation only, no manpages/GNU infofiles
# -extra: remaining stuff
#
#  texlive netarchive policy: Every package is included as dependency 
#  in exactly one collection. A package may have dependencies on other
#  packages from any collection.

# package source: http://mirror.ctan.org/systems/texlive/tlnet/archive/

#set -e
MAJORVERSION=2018

# -excluded packages, which are
# -useless without tlmgr-installer
# -non-linux
# -covered by an external package e.g. asymptote on SBo
# -obsolete
# -binaries provided already by texlive.Slackbuild
# -does only contain sources, or hyphen directives, e.g. metatype1, patch, ...
# -does only contain hyphen directives, e.g. hyphen-farsi ...

global_exclude="
  hyphen-farsi
  hyphen-arabic
  asymptote
  tlcockpit
  tlshell
  texlive.infra
  texlive-docindex
  texlive-scripts
  texlive-msg-translations
  texosquery
  texliveonfly
  texworks
  aleph
  omega
  antomega
  omegaware
  lambda
  otibet
  ocherokee
  oinuit
  cslatex
  bibarts
  metatype1
  patch
  wintools.win32
  dviout.win32
  "

  # special packages, move their type1 fonts(if metfonts are present)
  # and/or docs to -extra
special_packages="
  koma-script
  velthuis
  armtex
  montex
  vntex
  pl
  cc-pl
  cs
  musixtex-fonts
  tipa
  cbfonts
  ptex-fonts
  uptex-fonts
  "
#Todo: split type1 fonts, or keep subset of type1 fonts in base
#  cm-super

# keep precompiled binaries, list binary, not package name 
keep_precompiled="
  biber
  "

texmf_editions () {
  
  # "excludes from -base", also dependencies are excluded
  PACKAGES="
    cm-super
    biber
    bib2gls
    knitting
    pgfornament
    pgfplots
    pst-cox
    pst-poker
    pst-vectorian
    pst-geo
    bclogo
    $(grep ^"name .*biblatex" $db | cut -d' ' -f2 )
    " texmfget extra || exit 1
 
    # packages/collections and their dependencies for -base
  PACKAGES="
    $(cat $corepackages)
    etoolbox
    xcolor
    memoir
    velthuis
    wasy
    ptex
    platex
    revtex
    uptex
    uplatex
    collection-basic
    collection-latex
    collection-metapost
    collection-plaingeneric
    collection-luatex
    collection-context
    collection-fontutils
    collection-langczechslovak
    collection-langeuropean
    collection-langenglish
    collection-langfrench
    collection-langitalian
    collection-langpolish
    collection-langportuguese
    collection-langspanish
    collection-latexrecommended
    $(collection_by_size fontsextra 70000 || exit 1)
    $(collection_by_size publishers 10000 || exit 1)
    " texmfget base

    # packages/collections and their dependencies for -extra
  PACKAGES="
    amiri
    arabi
    arabi-add
    arara
    arev
    bangorcsthesis
    beamer2thesis
    beamertheme-detlevcm
    beamertheme-epyt
    beamertheme-saintpetersburg
    beebe
    bhcexam
    bxtexlogo
    churchslavonic
    collection-fontsextra
    collection-langchinese
    collection-langcjk
    collection-langjapanese
    collection-langkorean
    collection-publishers
    collection-texworks
    collection-wintools
    ctan-o-mat
    ctanify
    ctanupload
    dad
    ethiop-t1
    fibeamer
    fithesis
    ghsystem
    gregoriotex
    hustthesis
    ijsra
    iwhdp
    jpsj
    kerkis
    ketcindy
    kpfonts
    langsci
    latex2nemeth
    libertine
    lilyglyphs
    lni
    luatexko
    media9
    musuos
    mwe
    newtx
    nwejm
    padauk
    pdfwin
    pdfx
    powerdot-tuliplab
    powerdot-FUBerlin
    quran
    resumecls
    sanskrit-t1
    sapthesis
    sduthesis
    seuthesix
    simurgh
    skaknew
    stellenbosch
    suanpan
    tudscr
    uantwerpendocs
    ucs
    udesoftec
    universalis
    uowthesis
    wasy2-ps
    xduthesis
    xetexko 
    xq
    " texmfget extra || exit 1
 
   # What's left, for base again 
  PACKAGES="
    collection-fontsrecommended
    collection-xetex
    collection-langcyrillic
    collection-langarabic
    collection-langgerman
    collection-langgreek
    collection-langother
    collection-humanities
    collection-mathscience
    collection-pictures
    collection-pstricks
    collection-music
    collection-games
    collection-binextra
    collection-bibtexextra
    collection-formatsextra
    collection-latexextra
    " texmfget base || exit 1

}

# ==== Nothing to edit beyond this line ====

usage () {
  echo
  echo "Generate texmf trees/editions based on collections/packages"
  echo "and their (hard)dependencies."
  echo "./texmf_get.sh [base|docs|extra|lint]"
  echo
  echo "-base:  texfiles, no docs"
  echo "-docs:  docs of -base"
  echo "-extra: remaining texfiles and docs"
  echo "[lint]: compare filename contents of all generated editions,"
  echo " to detect overlapping files"
  echo 
  echo "Only new/updated/missing tex packages are downloaded."
  echo "The first run takes \"long\", tex packages(about 2500Mb)"
  echo "need to be downloaded."
  echo "To check out a new version/release, delete"
  echo "$db"
  echo "A new ascii index file/database(texlive.tlpdb) is will be"
  echo "pulled on the next run, and a new version yymmdd will be set."
  echo
  echo "All generated tarballs, logs etc. are going to"
  echo "$TMP"
  echo
}

collection_by_size () {
  # from collection $1, pull packages smaller $2 bytes 
  start_n="$(grep -n ^"name collection-$1"$ $db | cut -d':' -f1)"
  # find end of package/collection
  for emptyline in $emptylines
  do
    if [ "$emptyline" -gt "$start_n" ]
    then
      end_n=$emptyline
      break
    fi
  done
  extrapackages="$(sed "${start_n},${end_n}!d" $db | grep ^"depend " | grep -v ^"depend collection" | sed "s/^depend//g" )"
  
  # add if smaller than ...
  for checksize in $extrapackages
  do
    package_meta $checksize || exit 1
    size=$(grep ^"containersize " $texmf/$checksize.meta | cut -d' ' -f2)
    # for $2, e.g. 3000 means 3kb
    [ $size -lt $2 ] && echo $checksize
  done
}

package_meta () {
  if [ ! -s "$texmf/$1.meta" ]
  then
    # collection start linenumer
    start_n="$(grep -n ^"name ${1}"$ $db | cut -d':' -f1)"
    [ -z "$start_n" ] && echo "$1 was not found in $db, bye." && exit 1
    # find end of package/collection
    for emptyline in $emptylines
    do
      if [ "$emptyline" -gt "$start_n" ]
      then
        end_n=$emptyline
        break
      fi
    done
    # Don't handle collections as dependency of other collections
    sed "${start_n},${end_n}!d;/^depend collection/d" $db > $texmf/$1.meta 
  fi
}

download () {
  # Download packages, if not already available. Not every packages has a corresponding .doc package.
  # Try three times if package isn't present, with -t1 to get another mirror the second time
  cd $texmf
  if [ ! -s "${1}${flavour}.tar.xz" ]
  then
    for run in {1..10}
    do
      wget -t1 -c ${mirror}archive/${1}${flavour}.tar.xz
      [ -s "${1}${flavour}.tar.xz" ] && break
    done
  fi
  # If no success by downloading, write error log
  [ ! -s ${1}${flavour}.tar.xz ] && echo "Downloading ${1}${flavour}.tar.xz did not work, writing to $errorlog" && echo "$VERSION" >> $errorlog && echo "Error downloading ${1}${flavour}.tar.xz" >> $errorlog && exit 1
    
  # check sha512, give three tries for downloading again(diffrent mirrors are used automatically)
  if [ "$flavour" = ".doc" ]
  then
    sha512="$(grep ^doccontainerchecksum $texmf/$1.meta | cut -d' ' -f2 )"
  else
    sha512="$(grep ^containerchecksum $texmf/$1.meta | cut -d' ' -f2 )"
  fi
  
  for run in {1..10}
  do
    if [ "$(sha512sum ${1}${flavour}.tar.xz | cut -d' ' -f1 )" != "$sha512" ]
    then
      # Download (hopefully) newer file
      rm ${1}${flavour}.tar.xz
      wget -t1 -c ${mirror}archive/${1}${flavour}.tar.xz
    else
      break
    fi
  done
  # check sha512 again, exit if it fails
  if [ "$(sha512sum ${1}${flavour}.tar.xz | cut -d' ' -f1 )" != "$sha512" ]
  then
    echo "sha512sum $(sha512sum ${1}${flavour}.tar.xz | cut -d' ' -f1 ) of"
    echo "${package}${flavour}.tar.xz doesn't match with $db"
    # delete metafile on failure to get generated again on next run, where new $db may be in use
    rm $texmf/$1.meta
    echo "sha512sum $sha512"
    echo "Delete ${db}* to be current again, and try again."
    exit 1
  fi
}

untar () {
  # leave if $1 has no content
  if [ -s "$1" ]
  then
    while read package
    do
      echo "untar $package"
      # untar all packages, check for relocation, "relocate 1" -> untar in texmf-dist
      download $package || exit 1
      # untar the tex package, relocate to texmf-dist if necessary, binpackages always need relocation
      relocated='.'
      [ -n "$(grep -w ^"relocated 1" $texmf/$package.meta)" -o -n "$(grep ^"binfiles " $texmf/$package.meta)" ] && relocated="texmf-dist" 
      # if not .doc package, investigate files for dependencies/provides
      if [ -n "$flavour" ]
      then
        tar xf ${package}${flavour}.tar.xz --exclude tlpkg -C $relocated || exit 1
      else
        #tar vxf ${package}${flavour}.tar.xz --exclude tlpkg -C $relocated 
        tar vxf ${package}${flavour}.tar.xz --exclude tlpkg -C $relocated | egrep '\.sty$|\.bbx$|\.cls$' > $texmf/$package.deps
        if [ -n "$texmf/$package.deps" ]
        then
          unset provide
          unset depends
          for depfile in $(cat $texmf/$package.deps)
          do
            filename="$( echo $depfile | rev | cut -d'.' -f2- | cut -d'/' -f1 | rev)"
            # always add $filename as "ProvidesPackage", if it's a .sty
            echo $depfile | grep '\.sty'$ &>/dev/null
            [ $? = 0 ] && provide+="${filename},"
            # remove comments, if there are backslashes ignore that content, except it is \filename
            #provide+="$(sed "s/%.*//g" $texmf/$relocated/$depfile | sed -n "s/.*\\\ProvidesPackage{\([^}]*\)}.*/\1/p" | sed "s/\\\filename/$filename/g;/\\\/d" | sort -u | tr '\n' ',')"
            #sed -z "s/.*\\\Provides\(Package\|ExplPackage\|File\|Class\)*.\n//g" | sed "s/[[:space:]]//
           #provide+="$(sed "s/%.*//g" $texmf/$relocated/$depfile | sed -z "s/.*\\\Provides\(Package\|ExplPackage\|File\|Class\)*.\n//g" | sed "s/[[:space:]]// | sed -n "s/.*\\\Provides\(Package\|ExplPackage\|File\|Class\){\([^}]*\)}.*/\2/p" | sed "s/\\\filename/$filename/g;s/\\\ExplFileName/$filename/g;/\\\/d" | sed "s/\(\.sty$\|\.cls$\)//g" | sort -u | tr '\n' ',')"
            provide+="$(sed "s/%.*//g" $texmf/$relocated/$depfile | sed -z "s/\(Package\|ExplPackage\|File\|Class\)\n/\1/g" | sed "s/[[:space:]]//" | sed -n "s/.*\\\Provides\(Package\|ExplPackage\|File\|Class\){\([^}]*\)}.*/\2/p" | sed "s/\\\filename/$filename/g;s/\\\ExplFileName/$filename/g" | sed "s/\(\.sty$\|\.cls$\)//g" | sort -u | tr '\n' ',')"
            #depends+="$(sed "s/%.*//g" $texmf/$relocated/$depfile | sed -n "s/.*\(\\\require\|\\\use\)package{\([^}]*\)}.*/\2/p" | sed "/\\\/d" | sort -u | tr '\n' ',')"
            depends+="$(sed "s/%.*//g" $texmf/$relocated/$depfile | sed -n "s/.*\(\\\require\|\\\use\)package{\([^}]*\)}.*/\2/p" | sort -u | tr '\n' ',')"
            #depends+="$(sed "s/%.*//g" $texmf/$relocated/$depfile | sed -n "s/.*\\\\(require\|use\)package{\([^}]*\)}.*/\2/p" | sed "/\\\/d" | sort -u | tr '\n' ',')"
          done
          if  [ -n "$provide" ]
          then
            #echo "$package $provide | sort -u | tr '\n' ',' " >> $TMP/provides.run.$edition
            echo "$package $provide" >> $TMP/provides.run.$edition
          fi
          if [ -n "$depends" ]
          then
            echo "$package $depends" >> $TMP/depends.run.$edition
          fi
        fi
      fi

      # Check for binaries, delete them as these should be provided
      #  by texlive.Slackbuild, keep symlinks and scripts
      
      unset binaries
      unset scripts
      for arch in $platforms
      do
        if [ -d $texmf/texmf-dist/bin/$arch ]
        then
          [ ! -d $texmf/texmf-dist/linked_scripts ] \
            && mkdir $texmf/texmf-dist/linked_scripts 
          # rewrite link target to fit systemwide installation
          for link in $(find $texmf/texmf-dist/bin/$arch -type l)
          do
            ln -sf $(readlink $link | sed "s/^..\/..\(.*\)/..\/share\1/" ) $link || exit 1
          done
          # move symlinks/scripts to linked_scripts
          find $texmf/texmf-dist/bin/$arch -type l -exec mv '{}' $texmf/texmf-dist/linked_scripts/ \; 
        
          binaries="$(find $texmf/texmf-dist/bin/$arch -type f -exec file '{}' + | grep -e "executable" -e "shared object" | grep ELF | cut -f 1 -d : )"
          for bin in $binaries
          do
            # keep binaries of special packages
            for binary in $keep_precompiled
            do
              if [ "$(echo $bin | rev | cut -d'/' -f1 | rev )" != "$binary" ]
              then
                rm $bin
                echo $bin | rev | cut -d'/' -f1 | rev >> $binary_removed.$edition
              fi
            done
          done
          # move scripts to linked-scripts
          scripts="$(find $texmf/texmf-dist/bin/$arch -type f -exec file '{}' + | grep -wv ELF | cut -f 1 -d : )"
          for script in $scripts
          do
            mv $script $texmf/texmf-dist/linked_scripts/ 
          done
        fi
      done

      if [ "$flavour" = ".doc" ]
      then
        size=$(grep ^doccontainersize $texmf/$package.meta | cut -d' ' -f2) 
      else
        size=$(grep ^containersize $texmf/$package.meta | cut -d' ' -f2)
      fi
      shortdesc="$(grep ^shortdesc $texmf/$package.meta | cut -d' ' -f2- )"
      echo "$size byte, $package$flavour: $shortdesc" >> $output.meta
      # make index of uncompressed size of each package
      echo "$(xz -l --verbose ${package}${flavour}.tar.xz | grep "Uncompressed size" | cut -d'(' -f2 | cut -d' ' -f1 ) byte, $package$flavour: $shortdesc" >> $output.meta.uncompressed
    done < $1
    
    # copy packages index to texmf-dist, so included packages are known in later installation
    # don't list binary packages, as the binaries itself are not contained, only the symlinks.
    cat $output.meta | grep -v '\-linux:'  >> $output.$edition.meta
    cat $output.meta.uncompressed | grep -v '\-linux:' >> $output.$edition.meta.uncompressed
    
    # cleanup
    [ -f $output.meta ] && rm $output.meta
    [ -f $output.meta.uncompressed ] && rm $output.meta.uncompressed
  fi
}

remove_cruft () {
  # Remove m$-stuff, ConTeXt single-user-system stuff, source leftovers and pdf-versions of manpages
  rm -rf texmf-dist/source
  rm -rf texmf-dist/scripts/context/stubs/source/
  find texmf-dist/ -type d -name 'win32'         -exec rm -rf {} +
  find texmf-dist/ -type d -name 'win64'         -exec rm -rf {} +
  find texmf-dist/ -type d -name 'mswin'         -exec rm -rf {} +
  find texmf-dist/ -type d -name 'win'           -exec rm -rf {} +
  find texmf-dist/ -type d -name 'setup'         -exec rm -rf {} +
  find texmf-dist/ -type d -name 'install'       -exec rm -rf {} +
  find texmf-dist/ -type f -name 'uninstall*.sh' -delete
  find texmf-dist/ -type f -name '*.bat'         -delete
  find texmf-dist/ -type f -name '*.bat.w95'     -delete
  find texmf-dist/ -type f -name '*win32*'       -delete
  find texmf-dist/ -type f -name 'winansi*'      -delete
  find texmf-dist/ -type f -name '*man1.pdf'     -delete
  find texmf-dist/ -type f -name '*man5.pdf'     -delete
  # Remove zero-length files, as these appear e.g. in hyph-utf8 tex-package.
  # find texmf-dist/ -type f -size 0c -delete
  find texmf-dist/ -type f -empty -delete
  # Remove empty directories recursively
    find texmf-dist/ -type d -empty -delete
}

texmfget () {
  # make sure no package is added more than once.
  echo "Preparing index of packages to be added to -${1} ..."
  echo "$PACKAGES" | sed "s/[[:space:]]//g;/^$/d" > $collections_tobedone
  # Remove outputfile if already present
  >$output
  >$output_doc
  
  # Only do something if $collection wasn't already done before
  while [ -s $collections_tobedone ]
  do
    collection=$(tail -n1 $collections_tobedone)
  
    # continue with next collection if collection was already done
    if [ -s "$collections_done" ]
    then
      grep -w "^${collection}$" $collections_done &> /dev/null
      if [ $? = 0 ]
      then
        # remove from $collections_tobedone
        sed -i "/^$collection$/d" $collections_tobedone
        if [ -n "$(grep "^${collection} added to" $logfile)" ]
        then
          echo "$collection already added " >> $logfile
        fi
        continue
      fi
    fi
    
    package_meta $collection || exit 1
 
    # If $collection is a singel package(not a collection-), add it here
    if [ -n "$(head -n1 $texmf/$collection.meta | fgrep -v "name collection" )" ]
    then
      addpackage=no 
      # if package contains docs, add to docpackages
      if [ -n "$(grep ^docfiles $texmf/$collection.meta)" ]
      then
        echo "$collection" >> $output_doc
        echo "$collection added to docs $1" >> $logfile
        addpackage=yes 
      fi
      if [ -n "$(grep ^runfiles $texmf/$collection.meta)" -o -n "$(grep ^binfiles $texmf/$collection.meta)" ]
      then
        echo "$collection" >> $output
        echo "$collection added to -$1" >> $logfile
        addpackage=yes 
      fi
      # very package should be added to one edition, abort if that didn't work
      if [ $addpackage = no ]
      then
        echo "$collection doesn't contain any docfiles/runfiles/binfiles"
        echo "Please exclude package/report to upstream mailinglist tex-live@tug.org, bye."
        exit 1
      fi
    fi

    # Don't handle collections as dependency of other collections, as this destroys control over what packages to be added
    # add dependend packages, but no binary(ARCH) and no packages conataining a '.'. Packges with dot indicate binary/texlive-manager/windows packages
    
    grep ^"depend " $texmf/$collection.meta | cut -d' ' -f2- > $dependencies
    
    if [ -s "$dependencies" ]
    then
      # check for .ARCH packages which may be binaries, scripts or links
      # Binaries should all come from the sourcebuild(exception biber)
      for dependency in $(cat $dependencies)
      do
        echo $dependency | grep '\.ARCH'$ &>/dev/null
        if [ $? = 0 ] 
        then
          for arch in $platforms
          do
            archpackage="$(echo $dependency | sed "s/\.ARCH$/\.$arch/")"
            grep ^"name $archpackage"$ $db &>/dev/null && echo "$archpackage" >> $dependencies.verified_arch
          done
        else
          echo $dependency >> $dependencies.verified_arch
        fi
      done
      if [ -f $dependencies.verified_arch ]
      then
        mv $dependencies.verified_arch $dependencies
      else
        rm $dependencies
      fi
    fi

    if [ -s "$dependencies" ]
    then
      echo "----------------" >> $logfile
      echo "Dependencies of $collection: $(cat $dependencies | tr '\n' ' ')" >> $logfile
      for dependency in $(cat $dependencies)
      do
        if [ -n "$(grep ^"${dependency}"$ $collections_done)" ]
        then
          sed -i "/^${dependency}$/d" $dependencies
          continue
        else
          for exclude in $global_exclude
          do
            if [ "$exclude" = "$dependency" ]
            then
              sed -i "/^${exclude}$/d" $dependencies
              echo "$exclude excluded, see \$global_exclude" >> $logfile
            fi
          done
        fi
      done
      cat $dependencies >> $collections_tobedone
      echo "----------------" >> $logfile
    fi
    
    sed -i "/^${collection}$/d" $collections_tobedone
    echo "$collection" >> $collections_done
  done
  # handle package index list per edition
  cat $output >> $TMP/packages.$1
  # handle doc package index, one for each edition
  cat $output_doc >> $TMP/packages.$1.doc

  # untar only one $edition, untar docs together with -extra edition
  if [ "$1" = $edition -o docs = $edition ]
  then
    cd $texmf
    
    # Cleanup tar-directory
    [ -d $texmf/texmf-dist ] && rm -rf $texmf/texmf-dist
    mkdir $texmf/texmf-dist
    
    # Make tarball/checksum reproducible by setting mtime(clamp-mtime), owner, group and sort content
    # --clamp-mtime --mtime doesn't work with tar 1.13, when makepkg creates the tarball:
    # tar-1.13: time_t value 9223372036854775808 too large (max=68719476735)
    echo "Adding files to $( echo $tarball | rev | cut -d'/' -f1 | rev ) ..."
    case $edition in
      base)
      unset flavour
      untar $output || exit 1
      remove_cruft || exit 1
      tar rf $tarball --owner=0 --group=0 --sort=name texmf-dist || exit 1
      rm -rf texmf-dist
      ;;
      extra)
      unset flavour
      untar $output || exit 1
      export flavour=".doc"
      untar $output_doc || exit 1
      remove_cruft || exit 1
      #tar vrf $tarball --clamp-mtime --mtime --owner=0 --group=0 --sort=name texmf-dist || exit 1
      tar rf $tarball --owner=0 --group=0 --sort=name texmf-dist || exit 1
      rm -rf texmf-dist
      ;;
      docs)
      export flavour=".doc"
      # only add -base docs to -docs
      if [ $1 = base ]
      then
        untar $output_doc || exit 1
        remove_cruft || exit 1
      #tar vrf $tarball --clamp-mtime --mtime --owner=0 --group=0 --sort=name texmf-dist || exit 1
        tar rf $tarball --owner=0 --group=0 --sort=name texmf-dist || exit 1
        rm -rf texmf-dist
      fi
      ;;
    esac
  fi
}

lint () {

echo "Comparing content of all editions, this may take a while ..."; 
cd $TMP
# check if all editions of same VERSION are there, take -base as reference
lint_version=$( ls texlive-base-*tar.xz | head -n1 | cut -d'.' -f2 || exit 1)
if [ -s texlive-extra-$MAJORVERSION.$lint_version.tar.xz \
  -a -s texlive-docs-$MAJORVERSION.$lint_version.tar.xz  ]
then
  for edition in base extra docs
  do
    echo "Extracting index of texlive-${edition}-$MAJORVERSION.$lint_version.tar.xz ..."
    # don't list directories
    tar tf texlive-${edition}-$MAJORVERSION.$lint_version.tar.xz | grep -v '/'$ > $TMP/packages.$edition.lint
  done

  # compare content
  for edition in base extra docs
  do
    >$TMP/packages.$edition.lint.dup
    case $edition in
      base)
        echo "check if files of base are present in another edition"
        while read line
        do
          grep ^"$line"$ $TMP/packages.extra.lint >> $TMP/packages.base.lint.dup
          grep ^"$line"$ $TMP/packages.docs.lint >> $TMP/packages.base.lint.dup
        done < $TMP/packages.$edition.lint
      ;;
    esac
  done
else
  echo "Not all editions are present to lint them. Create them first by"
  echo "$0 [base|docs|extra]"
  echo "bye."
  exit 1
fi

exit 0

}

# Main

# release mirror
mirror="http://mirror.ctan.org/systems/texlive/tlnet/"
# pre-test mirror 2016
# mirror="http://ftp.cstug.cz/pub/tex/local/tlpretest/"
LANG=C
TMP=$PWD/tmp
output=$TMP/packages
output_doc=$TMP/packages.doc.tmp
errorlog=$TMP/error.log
texmf=$TMP/texmf
db=$TMP/texlive.tlpdb
tmpfile=$TMP/tmpfile
collections_done=$TMP/done
collections_tobedone=$TMP/tobedone
corepackages=$TMP/corepackages
allcollections=$TMP/allcollections
binary_removed=$TMP/binaries.removed
manpages=$TMP/manpages
dependencies=$TMP/deps
packages_base=$TMP/packages.base
packages_extra=$TMP/packages.extra
packages_manpages=$TMP/packages.manpages
files_split=$TMP/files.split
platforms="x86_64-linux i386-linux"

mkdir -p $texmf
cd $TMP

case "$1" in
  base|docs|extra) edition=$1;;
  lint) lint ;;
  *) usage; exit 0 ;;
esac

echo "Building $edition tarball ..."

# Set VERSION, get texlive.tlpdb and keep unshorten $db.orig 
if [ ! -s ${db}.orig -o ! -s $db -o ! -s VERSION ]
then
  echo $MAJORVERSION.$(date +%y%m%d) > VERSION
  wget -c -O ${db}.orig ${mirror}tlpkg/texlive.tlpdb 
  # remove most content from $db to be faster on later processing. 
  # keep dependencies/manpages/binfiles/shortdesc/sizes
  egrep \
    '^\S|^ RELOC/doc/man|^ texmf-dist/doc/man/man|^ RELOC/doc/info/|^ texmf-dist/doc/info/|^ bin|^$' \
    ${db}.orig | grep -v ^longdesc > $db
  
  # As $db (might be)/is new, remove the meta-files, might created again with (pontentionally) new content
  rm -rf $texmf/*.meta
fi

# Get linenumbers of empty lines from $db
emptylines="$(grep -n ^$ $db | cut -d':' -f1)"

# Provide TLCore packages for -base, as these packages(and their dependencies) should be present in any case. 
grep -B1 ^'category TLCore' $db | grep -v ^'category TLCore' |  grep -v ^-- | grep -v '\.' | cut -d' ' -f2 > $corepackages

# Make a list of all collections
grep ^"name collection-" $db | cut -d' ' -f2 > $allcollections

# translate .ARCH to platforms in excludes, to make .ARCH packages excludeable by $global_exclude
for exclude in $global_exclude
do
  if [ -n "$(echo $exclude | grep '\.ARCH'$ )" ]
  then
    for arch in $platforms
    do
      global_exclude+=" $(echo $exclude | sed "s/\.ARCH$/\.$arch/")" 
    done
    global_exclude=${global_exclude/$exclude/}
  fi
done
# globally exclude from $corepackages
for exclude in $global_exclude
do
  sed -i "/^${exclude}$/d" $corepackages
done 
  
VERSION=$(cat $TMP/VERSION)
tarball=$TMP/texlive-$edition-$VERSION.tar
# set logfile
logfile=$TMP/$VERSION.log

# reset some files
>$logfile
>$tarball
>$collections_done
>$files_split
>$manpages
>$packages_manpages
>$packages_base
>$packages_extra
>$packages_base.doc
>$packages_extra.doc
>$TMP/packages.$edition.meta
>$TMP/packages.$edition.meta.uncompressed
>$TMP/provides.run.$edition
>$TMP/depends.run.$edition
>$binary_removed.$edition

# put the editions base/extra together
texmf_editions || exit 1

# Check if all collections are part in at least one edition
while read collection
do
  grep -w "$collection" $collections_done &> /dev/null
  if [ $? != 0 ]
  then
    echo "Error: $collection was not handled."
    echo "Edit packages/collections in the texmfget function." | tee -a $logfile
    exit 1
  fi
done < $allcollections

# cleanup 
rm $allcollections
rm $corepackages
rm $collections_done
rm $collections_tobedone
rm $output
rm $output_doc
rm $dependencies

# untar special- and manpage packages to be splitted/moved to other editions
# splitting special packages, files index
echo "Prepare index of to be splitted/moved files from -base"
[ ! -d texmf-dist ] && mkdir texmf-dist
for package in $special_packages
do
  echo "Splitting $package"
  # special packages have to be in -base, as only here are special
  # tasks done to reduce size of -base edition
  if [ -z "$( grep ^"$package"$ $packages_base )" ]
  then
    echo "$package was not found to be part of -base"
    echo "Edit \$special_packages in $0"
    echo "to contain only packages from -base, bye."
    exit 1
  fi
  unset relocated
  pathprefix="texmf-dist/"
  [ -n "$(grep -w ^"relocated 1" $texmf/$package.meta)" ] && \
    relocated="-C texmf-dist" && unset pathprefix
  # avoid big pdf docs which are also present as html
  # move (big)type1 fonts to -extra
  # $files_split lists files to be moved from -base -extra
  if [ $package = "cm-super" ]
  then
    # cm-super minimal for -base, create index of extended cm-super
    tar tf $texmf/$package.tar.xz | sed \
      "/1000\.pfb$/d;/^tlpkg/d;/\.sty$/d;/\.enc$/d;/\.GS$/d" \
      | tee -a $files_split > $files_split.tmp
  else
    tar tf $texmf/${package}.tar.xz | sed \
    -ne "/.*doc\/latex\/.*\.pdf$/p" \
    -ne "/.*fonts\/map\/.*\.map$/p" \
    -ne "/.*fonts\/enc\/.*\.enc$/p" \
    -ne "/.*fonts\/afm\/.*\.\(afm\|afm\.gz\)$/p" \
    -ne "/.*fonts\/type1\/.*\.pfb$/p" \
    -ne "/.*fonts\/vf\/.*\.vf$/p" \
    | tee -a $files_split > $files_split.tmp
  fi

  if [ $edition = base ]
  then
    # Calculate package-minimal size, uncompressed and compressed
    mkdir -p calculate/texmf-dist
    tar xf $texmf/$package.tar.xz -C calculate/texmf-dist --exclude-from=$files_split.tmp
    tar cf calculate/calc.tar.xz -I 'xz -9' calculate/texmf-dist
    size_minimal=$(du -bc calculate/calc.tar.xz | tail -n1 | sed "s/[[:space:]].*//")
    size_minimal_uncompressed="$(xz -l --verbose calculate/calc.tar.xz | grep "Uncompressed size" | cut -d'(' -f2 | cut -d' ' -f1 )"
    sed -i \
      -e "s/^[0-9]* byte, $package: /$size_minimal byte, $package-minimal: /" \
      $output.base.meta
    sed -i \
      -e "s/^[0-9]* byte, $package: /$size_minimal_uncompressed byte, $package-minimal: /" \
      $output.base.meta.uncompressed
    rm -rf calculate
  fi
  
  if [ $edition = extra ]
  then
    mkdir -p calculate/texmf-dist
    tar xf $texmf/${package}.tar.xz -C calculate/texmf-dist $(paste $files_split.tmp)
    tar cf calculate/calc.tar.xz -I 'xz -9' calculate/texmf-dist
    size_extended=$(du -bc calculate/calc.tar.xz | tail -n1 | sed "s/[[:space:]].*//")
    size_extended_uncompressed="$(xz -l --verbose calculate/calc.tar.xz | \
      grep "Uncompressed size" | cut -d'(' -f2 | cut -d' ' -f1 )"
 
    # put new sizes in package index uncompressed
    sed -i \
      -e "s/^[0-9]* byte, $package: /$size_extended byte, $package-extended: /" \
      $output.extra.meta
    sed -i \
      -e "s/^[0-9]* byte, $package: /$size_extended_uncompressed byte, $package-extended: /" \
      $output.extra.meta.uncompressed
    rm -rf calculate
  fi
 
  # untar to provide files for -extra
  tar xf $texmf/${package}.tar.xz $relocated $(paste $files_split.tmp)
  if [ $package = "cm-super" ]
  then
    # create cm-super- minimal config/maps with 10pt glyphs only
    sed "s/cm-super/cm-super-minimal/g" $texmf/texmf-dist/dvips/cm-super/config.cm-super \
      > $texmf/texmf-dist/dvips/cm-super/config-minimal.cm-super
    for map in t1 t2a t2b t2c ts1 x2
    do
      grep 1000 $texmf/texmf-dist/fonts/map/dvips/cm-super/cm-super-$map.map \
        > $texmf/texmf-dist/fonts/map/dvips/cm-super/cm-super-minimal-$map.map 
      sed -i "/.*1000\.pfb/d" $texmf/texmf-dist/fonts/map/dvips/cm-super/cm-super-$map.map 
    done
  fi

done


# cleanup
rm $files_split.tmp
# fix relocation in index for splitted packages
sed -i \
  -e "s|^doc|texmf-dist\/doc|g" \
  -e "s|^fonts|texmf-dist\/fonts|g" \
  -e "s|^dvips|texmf-dist\/dvips|g" \
  $files_split

# sort meta data about added packages
sort -n $output.$edition.meta > $tmpfile
mv $tmpfile $output.$edition.meta 
sort -n $output.$edition.meta.uncompressed > $tmpfile
mv $tmpfile $output.$edition.meta.uncompressed 

sort -u $binary_removed.$edition > $tmpfile
mv $tmpfile $binary_removed.$edition

# include manpages/GNU infofiles in -base, write index for later exclution from other editions.
# In -extra/-docs there should not be any manpage left.
echo "Looking for manpages/GNU infofiles to be included in -base ..."
for package in $(paste -s $packages_base.doc | sort -u)
do
  if [ -n "$(egrep "(doc/man/man|doc/info/)" $texmf/$package.meta )" ]
  then
    echo "Adding manpage from $package.doc to -base"
    flavour=".doc" download $package || exit 1
    unset relocated
    pathprefix="texmf-dist/"
    [ -n "$(grep -w ^"relocated 1" $texmf/$package.meta)" ] \
      && relocated="-C texmf-dist" && unset pathprefix
    tar tf $texmf/${package}.doc.tar.xz | sed \
    -ne "/.*doc\/man\/.*\.1$/p" \
    -ne "/.*doc\/man\/.*\.5$/p" \
    -ne "/.*doc\/info\/.*\.info$/p" \
    | tee -a $manpages > $manpages.tmp
    # untar to provide files for -/extra/-docs
    tar xf $texmf/${package}.doc.tar.xz $relocated $(paste $manpages.tmp)
    echo "$package" >> $packages_manpages
  fi
done 
# cleanup
rm $manpages.tmp
sed -i \
  -e "s/^doc/texmf-dist\/doc/g" \
  $manpages
  
case $edition in
  base) 
  # Content info
  cat << EOF | gzip -9 >> $texmf/texmf-dist/packages.$edition.gz
Content of -$edition:
$(sed "/-linux$/d" $packages_base | sort)
EOF
#Splitted packages, type1 fonts/docs moved to -extra:
#$(echo $special_packages)
#EOF

# add manpages/GNU infofiles to the tarball
  tar rf $tarball --owner=0 --group=0 --sort=name \
    texmf-dist/doc/man/ texmf-dist/doc/info/ \
    texmf-dist/packages.$edition.gz \
    || exit 1
#  # add cm-super minimal maps/config
#  tar rf $tarball --owner=0 --group=0 --sort=name \
#    texmf-dist/dvips/cm-super/config-minimal.cm-super \
#    --wildcards texmf-dist/fonts/map/dvips/cm-super/cm-super-minimal-*.map \
#    || exit 1
  echo "Removing files -from base, splitted from special packages to be included in -extra"
  tar f $tarball --delete $(paste $files_split) || exit 1
  ;;
  extra)
  echo "Removing manpages from $edition which now reside in -base" 
  tar f $tarball --delete $(paste $manpages) 2>/dev/null 
  # content info
  echo "Content of -$edition, including documentation:" > $texmf/texmf-dist/packages.$edition
  sed "/-linux$/d" $TMP/packages.$edition | sort >> $texmf/texmf-dist/packages.$edition
  gzip -9 $texmf/texmf-dist/packages.$edition
#  # remove cm-super minimal config, which resides in -base
#  rm \
#    $texmf/texmf-dist/dvips/cm-super/config-minimal.cm-super \
#    $texmf/texmf-dist/fonts/map/dvips/cm-super/cm-super-minimal-*.map 
  tar rf $tarball --owner=0 --group=0 --sort=name \
    --exclude texmf-dist/doc \
    texmf-dist \
    || exit 1
  ;;
  docs)
  # add docs splittet from base from special packages, add packages index
  # content info, this edition contains all docs from -base
  echo "Content of -$edition, documentation for -base:" > $texmf/texmf-dist/packages.$edition
  sort $packages_base.doc >> $texmf/texmf-dist/packages.$edition
  gzip -9 $texmf/texmf-dist/packages.$edition
  tar rf $tarball --owner=0 --group=0 --sort=name \
    texmf-dist/doc/ \
    texmf-dist/packages.$edition.gz \
    || exit 1
  echo "Removing manpages from $edition which now reside in -base" 
  tar f $tarball --delete $(paste $manpages) || exit 1
  ;;
esac
  
rm -rf texmf-dist

# compress the tarball as everything is in place now
echo "Compressing $tarball ..."
[ -f $tarball.xz ] && rm $tarball.xz
xz -9 -T0 $tarball || exit 1
md5sum $tarball.xz
ls -lh $tarball.xz
echo "Logfile: $logfile"