Acciones

Módulo

Módulo:Format ISBN/data/doc

De La Venciclopedia

Esta es la página de documentación de Módulo:Format ISBN/data

Python script to update this data module

#!/usr/bin/env python

# This work has been released into the public domain by its author, User:Cobaltcigs.
# This applies worldwide.  In some countries this may not be legally possible; if so:
# User:Cobaltcigs grants anyone the right to use this work for any purpose, without any
# conditions, unless such conditions are required by law.

import xml.dom.minidom
import urllib2

url = "https://www.isbn-international.org/export_rangemessage.xml"
req = urllib2.Request(url)
response = urllib2.urlopen(req)
xmlTxt = response.read()

f = open("RangeMessage.xml", "w")
f.write(xmlTxt)
f.close()

document = xml.dom.minidom.parseString(xmlTxt)

source = document.getElementsByTagName("MessageSource")[0].firstChild.nodeValue
version = document.getElementsByTagName("MessageSerialNumber")[0].firstChild.nodeValue
timestamp = document.getElementsByTagName("MessageDate")[0].firstChild.nodeValue

print("--[[")
print("""\tGenerated from %s's RangeMessage.xml file at:
\t\t%s
\tusing a script.
\tVersion: %s (%s)""" % (source, url, version, timestamp))
print("""\tNotes:
\t* Integer tuples at right represent the middle three (of five) digit-group quantities
\t  for any ISBN in the specified (quasi-numeric but actually lexicographical) range.
\t* The "specified range" for the tuple specified on row[N] can be thought of as any
\t  ISBN Q where (Q <= row[N].isbn) && (Q > (row[n-1].isbn or 0))
\t* Omitted for brevity are the first group ("978"/"979", always 3) and the last group
\t  (check digit, always 1).
\t* In other words, an ISBN whose digit grouping is specified below as {x,y,z} will match
\t  the regular expression "^\d{3}\-\d{x}\-\d{y}\-\d{z}\-\d{1}$" once properly formatted.
""")
print("--]]")

print("return {")
for p in document.getElementsByTagName("Prefix"):
	pp = p.firstChild.nodeValue.split("-")
	if len(pp) != 2: continue
	prefix = "".join(pp)
	n1 = len(pp[0])
	n2 = len(pp[1]) 
	for q in p.parentNode.getElementsByTagName("Rule"):
		end = (prefix + q.getElementsByTagName("Range")[0].firstChild.nodeValue.split("-")[1]+"9"*9)[:13]
		n3 = int(q.getElementsByTagName("Length")[0].firstChild.nodeValue)
		# disregard (yet unallocated?) blocks where (publisher identifier's) <Length>0</Length>
		if n3 == 0: continue 
		n4 = 12-(n1+n2+n3)
		# check digit length not mathematically needed here
#		n5 = 1
		print('\t{"%s", {%s,%s,%s}},' % (end,n2,n3,n4))
print("\t}")