Python script to upload images and descriptions to a mediawiki

From Squirrel's Lair
Jump to navigation Jump to search

  • Cargo:

  • Categories:
  • Default form

This is the code that was used to upload the images and metadata acquired using Python script to scrape images and metadata from a website for upload to a mediawiki to the Offline Museum Kiosk With MediaWiki and Raspberry Pi.

# Known bugs:
## '''very similar images will go to a different interface page in mediawiki, this program doesn't capture that so the second image won't get uploaded
## '''same for images with same title, which would lead to a duplicate file page, so mediawiki won't upload the second one
## '''fails without error for titles that can't be filenames such as those that contain a "#"'''

import os
import time
import json
import requests
from selenium import webdriver
from import Select
from import By
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys

# set parameters
scrape_dir = 'C:\\Users\\Tina\\Desktop\\CHS_Scrape_results\\'
meta_file = scrape_dir + 'CHS_image_metadata.json'
wiki = ''
login = '/index.php?title=Special:UserLogin'
upload = '/index.php?title=Special:Upload'
driver = webdriver.Edge()               # launch the Edge browser
driver.implicitly_wait(20)              # tell driver how long to wait to find things ie load pages

# log in to the wiki
driver.get(wiki+login)                 # take selenium to the login page
username_box = driver.find_element("id", "wpName1")
username_box.send_keys("ttenbergen bot")
password_box = driver.find_element("id", "wpPassword1")
login_button = driver.find_element("id", "wpLoginAttempt")

# open the meta data file
with open(meta_file, 'r') as f:
    CHS_meta = json.load(f)             # load the .json file generated by scrape script

# iterate through the meta data file
for index, a_meta in enumerate(CHS_meta):

    # upload image
    img_file = 'p15931coll2_' + a_meta["ID"] +'_extralarge.jpg'
                                        # generate image name
    driver.get(wiki+upload)             # take selenium to upload page

    file_input_id = 'wpUploadFile'
    file_input = driver.find_element(By.ID,file_input_id)
    # set filename to be used on wiki
    file_nm = driver.find_element("id", "wpDestFile")
    ActionChains(driver).double_click(file_nm).click(file_nm).perform() #triple click to select original name to overwrite

    # add description template
    # generate the wiki markup
    mkup = '\
{{Image description \n\
|Description=' + a_meta["description"] + '\n\
|Creator=' + a_meta["creator"] + '\n\
|Date_taken=' + a_meta["date"] + '\n\
    file_nm = driver.find_element("id", "wpUploadDescription")

    upload_file_button_id = 'wpUpload'
    upload_button = driver.find_element(By.NAME,upload_file_button_id)
driver.quit()                          # Close the browser

print("program complete")