Skip to content

Commit 9b62f99

Browse files
committed
add --force and use newly available data_source
1 parent 066aa1a commit 9b62f99

File tree

1 file changed

+10
-66
lines changed

1 file changed

+10
-66
lines changed

scripts/1-fetch/smithsonian_fetch.py

Lines changed: 10 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -47,69 +47,6 @@
4747
]
4848
QUARTER = os.path.basename(PATHS["data_quarter"])
4949

50-
# Manually compiled unit code and name from:
51-
# https://github.com/Smithsonian/OpenAccess
52-
UNIT_MAP = {
53-
"AAA": "Archives of American Art",
54-
"AAG": "Archives of American Gardens",
55-
"ACM": "Anacostia Community Museum",
56-
"ACMA": "Anacostia Community Museum Archives",
57-
"CFCHFOLKLIFE": "Ralph Rinzler Folklife Archives and Collections",
58-
"CHNDM": "Cooper Hewitt, Smithsonian Design Museum",
59-
"FBR": "Smithsonian Field Book Project",
60-
"FSG": "Freer Gallery of Art and Arthur M. Sackler Gallery",
61-
"HAC": "Smithsonian Gardens",
62-
"HMSG": "Hirshhorn Museum and Sculpture Garden",
63-
"HSFA": "Human Studies Film Archives",
64-
"NASM": "National Air and Space Museum",
65-
"NMAAHC": "National Museum of African American History and Culture",
66-
"NMAH": "National Museum of American History",
67-
"NMAI": "National Museum of the American Indian",
68-
"NMAfA": "National Museum of African Art",
69-
"NMNHANTHRO": ("National Museum of Natural History - Anthropology Dept."),
70-
"NMNHBIRDS": (
71-
"National Museum of Natural History - Vertebrate Zoology - Birds"
72-
" Division"
73-
),
74-
"NMNHBOTANY": ("National Museum of Natural History - Botany Dept."),
75-
"NMNHEDUCATION": (
76-
"National Museum of Natural History - Education & Outreach"
77-
),
78-
"NMNHENTO": ("National Museum of Natural History - Entomology Dept."),
79-
"NMNHFISHES": (
80-
"National Museum of Natural History - Vertebrate Zoology - Fishes"
81-
" Division"
82-
),
83-
"NMNHHERPS": (
84-
"National Museum of Natural History - Vertebrate Zoology - Herpetology"
85-
" Division"
86-
),
87-
"NMNHINV": (
88-
"National Museum of Natural History - Invertebrate Zoology Dept."
89-
),
90-
"NMNHMAMMALS": (
91-
"National Museum of Natural History"
92-
" - Vertebrate Zoology - Mammals Division"
93-
),
94-
"NMNHMINSCI": (
95-
"National Museum of Natural History" " - Mineral Sciences Dept."
96-
),
97-
"NMNHPALEO": ("National Museum of Natural History - Paleobiology Dept."),
98-
"NPG": "National Portrait Gallery",
99-
"NPM": "National Postal Museum",
100-
"NZP": "Smithsonian's National Zoo & Conservation Biology Institute",
101-
"OCIO_DPO3D": "OCIO Digital Preservation & 3D Team",
102-
"OFEO-SG": "Office of Facilities Engineering &"
103-
" Operations – Smithsonian Gardens",
104-
"SAAM": "Smithsonian American Art Museum",
105-
"SIA": "Smithsonian Institution Archives",
106-
"SIL": "Smithsonian Libraries",
107-
"SILAF": "Smithsonian Institution Libraries, African Section",
108-
"SILNMAHTL": "Smithsonian Institution Libraries,"
109-
" National Museum of American History, Library",
110-
"SLA_SRO": "Smithsonian Libraries Archives, Special Research/Operations",
111-
}
112-
11350

11451
def parse_arguments():
11552
"""
@@ -127,13 +64,20 @@ def parse_arguments():
12764
action="store_true",
12865
help="Enable git actions (fetch, merge, add, commit, and push)",
12966
)
67+
parser.add_argument(
68+
"--force",
69+
action="store_true",
70+
help="Write data even if already exists",
71+
)
13072
args = parser.parse_args()
13173
if not args.enable_save and args.enable_git:
13274
parser.error("--enable-git requires --enable-save")
13375
return args
13476

13577

136-
def check_for_completion():
78+
def check_for_completion(args):
79+
if args.force:
80+
return
13781
completed_metrics = False
13882
completed_units = False
13983

@@ -197,7 +141,7 @@ def query_smithsonian(args, session):
197141
data_units.append(
198142
{
199143
"UNIT_CODE": unit["unit"],
200-
"DATA_SOURCE": UNIT_MAP.get(unit["unit"], unit["unit"]),
144+
"DATA_SOURCE": unit["data_source"],
201145
"CC0_RECORDS": unit["metrics"]["CC0_records"],
202146
"CC0_RECORDS_WITH_CC0_MEDIA": unit["metrics"][
203147
"CC0_records_with_CC0_media"
@@ -213,7 +157,7 @@ def query_smithsonian(args, session):
213157
def main():
214158
args = parse_arguments()
215159
shared.paths_log(LOGGER, PATHS)
216-
check_for_completion()
160+
check_for_completion(args)
217161
session = shared.get_session()
218162
data_metrics, data_units = query_smithsonian(args, session)
219163
shared.rows_to_csv(args, FILE_1_METRICS, HEADER_1_METRICS, data_metrics)

0 commit comments

Comments
 (0)