มีเดียวิกิ:Gadget-Fill Index.js

จาก วิกิซอร์ซ

หมายเหตุ: หลังเผยแพร่ คุณอาจต้องล้างแคชเว็บเบราว์เซอร์ของคุณเพื่อดูการเปลี่ยนแปลง

  • ไฟร์ฟอกซ์ / ซาฟารี: กด Shift ค้างขณะคลิก Reload หรือกด Ctrl-F5 หรือ Ctrl-R (⌘-R บนแมค)
  • กูเกิล โครม: กด Ctrl-Shift-R (⌘-Shift-R บนแมค)
  • อินเทอร์เน็ตเอกซ์พลอเรอร์ และ Edge: กด Ctrl ค้างขณะคลิก Refresh หรือกด Ctrl-F5
  • โอเปร่า: กด Ctrl-F5
/*
 * Author: w:fr:Phe
 *
 * Import the contents of the "Book" template from Commons into the Index
 * page fields at Wikisource
 *
 * Modified: 2020-11-10:    More robust template handling to deal with Faebot
 *                          uploads (Inductiveload)
 *           2020-11-27:    Some simple heuristics to improve IA metadata
 */

( function ( mw, $ ) {
    'use strict';

    var FillIndex = {

    };

    function parse_template(text, template) {
        // find the start of the the template in the wikitext
        var re = new RegExp("\{\{ *" + template + "[ \n]*\\|", "i");
        var index = text.search(re);

        // The template is not
        if (index < 0) {
            return [null, null];
        }

        var tokens = []

        var old_index = index;

        var token_list = [
            ["{{", "}}"],
            ["[[", "]]"],
            // ["[", "]"],
        ];

        var param_name = "";
        var param_content = "";
        var found_equals = false;
        var pos_param_idx = 0

        var params = {};

        while (index < text.length) {

            for (var i = 0; i < token_list.length; i+= 1) {
                var cand_token = text.slice(index, index + token_list[i][0].length);
                if (cand_token === token_list[i][0]) {
                    tokens.push(cand_token);
                    index += cand_token.length;

                    if (cand_token !== "{{" || Object.keys(params).length > 0) {
                        param_content += cand_token;
                    }
                    continue;
                } else if (cand_token === token_list[i][1]
                           && tokens.slice(-1)[0] === token_list[i][0]) {
                    tokens.pop();
                    index += cand_token.length;
                    param_content += cand_token;
                    continue;
                }
            }

            if (tokens.length == 0) {
                // end of template
                break;
            } else {

                if (text[index] === "|" && tokens.length === 1) {
                    param_name = "";
                    param_content = "";
                    found_equals = false;
                } else if (tokens.length === 1 &&
                        (text[index + 1] === "|" || text.slice(index, index + 2) === "}}")) {
                    // end of a template parameter, save it
                    param_name = param_name.trim();
                    param_content = param_content.trim();

                    if (param_name.length === 0) {
                        // positional parameter (pos=0 is the template name)
                        params[pos_param_idx] = param_content;
                        pos_param_idx += 1;
                    } else {
                        param_name = param_name[0].toUpperCase() + param_name.slice(1);
                        params[param_name] = param_content;
                    }
                } else if (text[index + 1] === "=" && !found_equals) {
                    found_equals = true;
                    param_name = param_content;
                    param_content = "";
                    index += 1; // skip =
                } else {
                    param_content += text[index];
                }

                index += 1;
            }
        }

        if (tokens.length == 0) {
            // got to end of template
            return [params, text.slice(old_index, index)];
        }
        return [null, text.slice(old_index, index)];
    }

    var extract_dict = {};
    var field_names = {};

    function setup_extract_dict() {
        extract_dict = self.fill_index_data.extract_dict;
        field_names = self.fill_index_data.field_names;
    }

    function set_field(idx, content) {

        content = content.replace(/ ([;:,]) /, "$1 ");

        var field_name = field_names[idx];
        var f = document.getElementsByName("wpprpindex-" + field_name)[0];

        if( f ) {
            f.value = content;
        }
    }

    function get_wd_author(qid) {
        $.ajax( {
            url: '//wikidata.org/w/api.php',
            data: {
                'format': 'json',
                'action': 'wbgetentities',
                'ids': qid,
                'props': 'sitelinks',
            },
            dataType: 'jsonp',
            cache: true
        } )
        .done( function(data) {
            var author = data.entities[qid].sitelinks[mw.config.get("wgWikiID")].title;
            console.log(author);
        } );
    }

    function process_author(str) {
        str = str.replace(/^[*:][ ]*/, "");
        str = str.trim();

        // strip creator templates:
        str = str.replace(/{{[ ]*[Cc]reator[ ]*:[ ]*(.*)[ ]*}}/, "$1");

        // TODO: fix wikidata here
        str = str.replace(/{{[ ]*[Cc]reator[ ]*\|[ ]*[Ww]ikidata[ ]*=[ ]*(Q[0-9]*)}}/, "$1");

        if (str.match(/Q[0-9]+/)) {
            str = get_wd_author(str);
        } else {
            // strip dates - these are nearly always not needed
            str = str.replace(/(?:, )?(\(?\d+-\d+\)?)$/, "");

            // strip birth date
            str = str.replace(/(?:, )b\. \d{3,4}$/, "");

            // strip initial expansions
            str = str.replace(/(?:[A-Z]. ?)+ \((.*)\)/, "$1");

            // Last, First -> First Last
            str = str.replace(/^([^,]+), ([^,]+)$/, "$2 $1");

            // Fix initials without dots
            str = str.replace(/ ([A-Z]) /, " $1. ");

            // Fix bogus fullstops
            // str = str.replace(/(?<!Jr|Sr)\.$/, "");
        }

        // pipe trick auto-removes dates
        str = "[[" + 
        		self.fill_index_data.ns_author_name + ":" + str + "|]]";
        return str;
    }

    function process_authors(str) {
        var as = str.split("\n");
        as = as.map(function(s){ return process_author(s); });
        return as.join(", ");
    }

    function split_city_publisher(str) {

        // most books are published in a few cities
        var cities = [/London/, /Edinburgh/, /Oxford/, /Cambridge/,
            /New York/, /Boston/, /Philadelphia/, /Washington D. ?C./,
            /Paris/,
            /Berlin/, /Stuttgart/, /Jena/,
            /Hong Kong/,
            /Calcutta/, /Bombay/, /Delhi/];

        var city = "";
        var publisher = str;

        if (str.indexOf(":") > -1) {
            // a colon: assume this is a city: publisher
            var parts = str.split(":");
            city = parts[0];
            publisher = parts.slice(1).join(":");
        } else {

            var parts = str.split(/[,;:] /);

            if (parts.length > 1) {
                for (var i = 0; i < cities.length; i++) {
                    if (parts[0].match(cities[i])) {
                        city = parts[0];
                        publisher = parts.slice(1).join(", ");
                        break;
                    }
                }
            }
        }

        return [publisher.trim(), city.trim()];
    }

    function extract_content( data ) {
        var importationDone = false;
        $.each( data.query.pages, function( ids, page ) {
            if( ids < 0 ) {
                return;
            }

            var content = page.revisions[0]['*'];
            var temp_parsed = parse_template(content, "Book");

            if (temp_parsed[0] === null) {
                console.error("Failed to parse Book template");
            } else {
                for( var idx in extract_dict ) {

                    var template_content = "";

                    if (typeof extract_dict[idx] === "string") {
                        template_content = temp_parsed[0][extract_dict[idx]]
                    } else {
                        // find the first matching parameter
                        for (var i = 0; i < extract_dict[idx].length; i++) {

                            template_content = temp_parsed[0][extract_dict[idx][i]]

                            if (template_content !== undefined && template_content.length > 0) {
                                break;
                            }
                        }
                    }

                    if (template_content !== undefined && template_content.length > 0) {
                        switch (idx) {
                        case "Editor":
                        case "Author":
                        case "Translator":
                        case "Illustrator":
                            set_field(idx, process_authors(template_content));
                            break;
                        case "Publisher":
                            // it is very common for the Commons publisher field
                            // to contain the location
                            var pub_city = split_city_publisher(template_content);

                            if (pub_city[1].length > 0) {
                                set_field("Publisher", pub_city[0]);
                                set_field("City", pub_city[1]);
                            } else {
                                set_field("Publisher", pub_city[0]);
                            }
                            break;
                        case "Title":
                            set_field("Title", "''[[" + template_content + "]]''");
                            break;
                        case "Volume":
                            var title = temp_parsed[0][extract_dict["Title"]];
                            if (title !== undefined) {
                                var link = "[[" + title +
                                    "/" + template_content + "|" + template_content + "]]";
                                set_field(idx, link);
                            } else {
                                // fallback
                                set_field(idx, template_content);
                            }
                            break;
                        default:
                            set_field(idx, template_content);
                        }
                    }
                }
            }

            // set the file type selector
            set_field("Source", mw.config.get("wgTitle").split(".").slice(-1)[0]);

            // set the sort key
            {
                var title = temp_parsed[0][extract_dict["Title"]];
                if (title !== undefined) {
                    var titlewords = title.split(" ");
                    if (["The", "A", "An", "Of"].indexOf(titlewords[0]) >= 0) {
                        title = titlewords.slice(1).join(" ") + ", " + titlewords[0];
                        title = title[0].toUpperCase() + title.slice(1);
                        set_field("Key", title);
                    }
                }
            }
            importationDone = true;
        } );

        return importationDone;
    }

    function common_content( data ) {
        if( !extract_content( data ) ) {
            $.ajax( {
                url: mw.util.wikiScript( 'api' ),
                data: {
                    'format': 'json',
                    'action': 'query',
                    'prop': 'revisions',
                    'rvprop': 'content',
                    'titles': 'File:' + mw.config.get( 'wgTitle' )
                }
            } )
            .done( extract_content );
        }
    }

    function setup() {
        setup_extract_dict();
        if( $( ".mw-newarticletext" ).length === 0 ) {
            return;
        }

        $.ajax( {
            url: '//commons.wikimedia.org/w/api.php',
            data: {
                'format': 'json',
                'action': 'query',
                'prop': 'revisions',
                'rvprop': 'content',
                'titles': 'File:' + mw.config.get( 'wgTitle' )
            },
            dataType: 'jsonp',
            cache: true
        } )
        .done( common_content );
    }

/* Localisation section, you can provide your own data before loading this script to
 * change the script behavior
 */
if( !self.fill_index_data ) {
    self.fill_index_data = {}
}

if( !self.fill_index_data.ns_author_name ) {
    self.fill_index_data.ns_author_name = 'Author';
}

if( !self.fill_index_data.extract_dict ) {
    // Commons Book template field names
    // Should not need to be internationalised
    self.fill_index_data.extract_dict = {
        'Editor'      : 'Editor',
        'Publisher'   : 'Publisher',
        'Author'      : 'Author',
        'Translator'  : 'Translator',
        'Volume'      : 'Volume',
        'Illustrator' : 'Illustrator',
        'Image page'  : 'Image page',
        'Title'       : 'Title',
        'Date'        : ['Publication date', 'Date'],
        'City'        : 'City',
        'Source'      : 'Source',
        'LCCN'        : 'LCCN',
        'OCLC'        : 'OCLC',
    };
}

if( !self.fill_index_data.field_names ) {
    // Proofread page field names
    self.fill_index_data.field_names = {
        'Editor'      : 'Editor',
        'Publisher'   : 'Publisher',
        'Author'      : 'Author',
        'Translator'  : 'Translator',
        'Volume'      : 'Volume',
        'Illustrator' : 'Illustrator',
        'Image page'  : 'Image',
        'Title'       : 'Title',
        'Date'        : 'Year',
        'City'        : 'Address',
        'Source'      : 'Source',
        'LCCN'        : 'LCCN',
        'OCLC'        : 'OCLC',
        'Key'         : 'Key',
    };
}
/* end of localisation section */

$( function() {
    if( mw.config.get( "wgCanonicalNamespace" ) === "Index" && mw.config.get( "wgAction" ) === "edit" ) {
        setup();
    }
} );
} ( mediaWiki, jQuery ) );