diff --git a/xml_cleaner_v2.sh b/xml_cleaner_v2.sh new file mode 100644 index 0000000..eb3c705 --- /dev/null +++ b/xml_cleaner_v2.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +wget --no-check-certificate -O /data/RAW_CANADA.xml https://download2.iptvboss.pro/ZNLS-2213-GCPB-A8O7-QDK1-GIXS-4MOI-1NFG/xmlonly/CANADA.xml +wget --no-check-certificate -O /data/RAW_USA.xml https://download2.iptvboss.pro/R5SQ-4GRG-KK00-0XQT-JZ67-7TSA-D5O7-UC1U/xmlonly/USA.xml + +id_list="/data/xml_tv_ids.txt" +raw_can_xml="/data/RAW_CANADA.xml" +raw_us_xml="/data/RAW_USA.xml" +parse_output="/data/parsed_CANADA_USA.xml" +update_output="/data/updated_CANADA_USA.xml" +format_output="/data/formatted_CANADA_USA.xml" + +rm $parse_output $update_output $format_output + +# XPATHs pour trouver les channels/programmes dans id_list +channel_id_xpath=$(awk '{printf "@id='\''%s'\'' or ", $0}' "$id_list" | sed 's/ or $//') +programme_channel_xpath=$(awk '{printf "@channel='\''%s'\'' or ", $0}' "$id_list" | sed 's/ or $//') + +# Parse les XMLs complets avec les XPATHs pour copier seulement les channels et programmes dans id_list +{ + echo '' + xmlstarlet sel -t -c "//channel[$channel_id_xpath]" "$raw_can_xml" + xmlstarlet sel -t -c "//channel[$channel_id_xpath]" "$raw_us_xml" + xmlstarlet sel -t -c "//programme[$programme_channel_xpath]" "$raw_can_xml" + xmlstarlet sel -t -c "//programme[$programme_channel_xpath]" "$raw_us_xml" + echo '' +} > "$parse_output" + + +# XPATH pour trouver les programmes avec une description qui commence par SnnEnn +EPISODE_PATTERN='/tv/programme[ + starts-with(desc, "S") + and substring(desc, 2, 2) >= "00" + and substring(desc, 4, 1) = "E" +]' + +# XPATH qui selectionne les elems qui n'ont pas encore un attribut "system" +EPISODE_NUM_NOSYSTEM='/tv/programme/episode-num[not(@system)]' + +# XPATH pour mettre à jour tous les +EPISODE_NUM_ALL='/tv/programme/episode-num' + +xmlstarlet ed \ + -s "$EPISODE_PATTERN" \ + -t elem -n "episode-num" -v "" \ + -i "/tv/programme/episode-num[not(@system)]" \ + -t attr -n system -v "onscreen" \ + -u "$EPISODE_PATTERN/episode-num" \ + -x "substring-before(../desc, ' ')" \ + "$parse_output" > "$update_output" + +# On format le xml pour un bon indent +xmlstarlet fo -t $update_output > $format_output \ No newline at end of file