Python script to upload images and descriptions to a mediawiki

From Squirrel's Lair
Jump to navigation Jump to search


  • Cargo:


  • Categories:
  • Default form


This is the code that was used to upload the images and metadata acquired using Python script to scrape images and metadata from a website for upload to a mediawiki to the Offline Museum Kiosk With MediaWiki and Raspberry Pi.

# Known bugs:
## '''very similar images will go to a different interface page in mediawiki, this program doesn't capture that so the second image won't get uploaded
## '''same for images with same title, which would lead to a duplicate file page, so mediawiki won't upload the second one
## '''fails without error for titles that can't be filenames such as those that contain a "#"'''

import os
import time
import json
import requests
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys

# set parameters
scrape_dir = 'C:\\Users\\Tina\\Desktop\\CHS_Scrape_results\\'
meta_file = scrape_dir + 'CHS_image_metadata.json'
wiki = 'https://chs.squirrelslair.ca'
login = '/index.php?title=Special:UserLogin'
upload = '/index.php?title=Special:Upload'
driver = webdriver.Edge()               # launch the Edge browser
driver.implicitly_wait(20)              # tell driver how long to wait to find things ie load pages

# log in to the wiki
driver.get(wiki+login)                 # take selenium to the login page
username_box = driver.find_element("id", "wpName1")
username_box.send_keys("ttenbergen bot")
password_box = driver.find_element("id", "wpPassword1")
password_box.send_keys("456yui456456yui456")
login_button = driver.find_element("id", "wpLoginAttempt")
login_button.click()

# open the meta data file
with open(meta_file, 'r') as f:
    CHS_meta = json.load(f)             # load the .json file generated by scrape script

# iterate through the meta data file
for index, a_meta in enumerate(CHS_meta):

    # upload image
    img_file = 'p15931coll2_' + a_meta["ID"] +'_extralarge.jpg'
                                        # generate image name
    
    driver.get(wiki+upload)             # take selenium to upload page

    file_input_id = 'wpUploadFile'
    file_input = driver.find_element(By.ID,file_input_id)
    file_input.send_keys(scrape_dir+img_file)
    
    # set filename to be used on wiki
    file_nm = driver.find_element("id", "wpDestFile")
    ActionChains(driver).double_click(file_nm).click(file_nm).perform() #triple click to select original name to overwrite
    file_nm.send_keys(a_meta["title"]+".jpg")

    # add description template
    # generate the wiki markup
    mkup = '\
{{Image description \n\
|Description=' + a_meta["description"] + '\n\
|Creator=' + a_meta["creator"] + '\n\
|Date_taken=' + a_meta["date"] + '\n\
|Location_taken=\n\
|Topics=\n\
|Original=\n\
}}'
    file_nm = driver.find_element("id", "wpUploadDescription")
    file_nm.send_keys(mkup)

    upload_file_button_id = 'wpUpload'
    upload_button = driver.find_element(By.NAME,upload_file_button_id)
    upload_button.click()
    time.sleep(1)
    
driver.quit()                          # Close the browser

print("program complete")