Skip to content

Commit f6b06e1

Browse files
committed
Changed the variable name UNIT_NAME to DATA_SOURCE and added the ylabels
1 parent f26fdbb commit f6b06e1

File tree

4 files changed

+18
-14
lines changed

4 files changed

+18
-14
lines changed

scripts/1-fetch/smithsonian_fetch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
]
4141
HEADER_2_UNITS = [
4242
"UNIT_CODE",
43-
"UNIT_NAME",
43+
"DATA_SOURCE",
4444
"CC0_RECORDS",
4545
"CC0_RECORDS_WITH_CC0_MEDIA",
4646
"TOTAL_OBJECTS",
@@ -250,7 +250,7 @@ def query_smithsonian(args, session):
250250
data_units.append(
251251
{
252252
"UNIT_CODE": unit["unit"],
253-
"UNIT_NAME": UNIT_MAP.get(unit["unit"], unit["unit"]),
253+
"DATA_SOURCE": UNIT_MAP.get(unit["unit"], unit["unit"]),
254254
"CC0_RECORDS": unit["metrics"]["CC0_records"],
255255
"CC0_RECORDS_WITH_CC0_MEDIA": unit["metrics"][
256256
"CC0_records_with_CC0_media"

scripts/2-process/smithsonian_process.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,13 @@ def process_totals_by_units(args, count_data):
8282
data = {}
8383

8484
for row in count_data.itertuples(index=False):
85-
unit = str(row.UNIT_NAME)
85+
unit = str(row.DATA_SOURCE)
8686
total_objects = int(row.TOTAL_OBJECTS)
8787

8888
data[unit] = total_objects
8989

90-
data = pd.DataFrame(data.items(), columns=["Unit_name", "Total_objects"])
91-
data.sort_values("Unit_name", ascending=True, inplace=True)
90+
data = pd.DataFrame(data.items(), columns=["Data_source", "Total_objects"])
91+
data.sort_values("Data_source", ascending=True, inplace=True)
9292
data.reset_index(drop=True, inplace=True)
9393
file_path = shared.path_join(
9494
PATHS["data_phase"], "smithsonian_totals_by_units.csv"
@@ -104,7 +104,7 @@ def process_totals_by_records(args, count_data):
104104
data = {}
105105

106106
for row in count_data.itertuples(index=False):
107-
unit = str(row.UNIT_NAME)
107+
unit = str(row.DATA_SOURCE)
108108
CC0_records = int(row.CC0_RECORDS)
109109
CC0_records_with_CC0_media = int(row.CC0_RECORDS_WITH_CC0_MEDIA)
110110
total_objects = int(row.TOTAL_OBJECTS)
@@ -123,7 +123,7 @@ def process_totals_by_records(args, count_data):
123123
data = (
124124
pd.DataFrame.from_dict(data, orient="index")
125125
.reset_index()
126-
.rename(columns={"index": "Unit_name"})
126+
.rename(columns={"index": "Data_source"})
127127
)
128128
data["CC0_without_media_percentage"] = (
129129
(
@@ -142,7 +142,7 @@ def process_totals_by_records(args, count_data):
142142
* 100
143143
).round(2)
144144

145-
data.sort_values("Unit_name", ascending=True, inplace=True)
145+
data.sort_values("Data_source", ascending=True, inplace=True)
146146
data.reset_index(drop=True, inplace=True)
147147

148148
file_path = shared.path_join(
@@ -164,7 +164,7 @@ def main():
164164
file_count,
165165
usecols=[
166166
"UNIT_CODE",
167-
"UNIT_NAME",
167+
"DATA_SOURCE",
168168
"CC0_RECORDS",
169169
"CC0_RECORDS_WITH_CC0_MEDIA",
170170
"TOTAL_OBJECTS",

scripts/3-report/smithsonian_report.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def plot_totals_by_top10_units(args):
143143
"smithsonian_totals_by_units.csv",
144144
)
145145
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
146-
name_label = "Unit_name"
146+
name_label = "Data_source"
147147
data_label = "Total_objects"
148148
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
149149
data["Total_objects"] = data["Total_objects"].astype(int)
@@ -157,6 +157,7 @@ def plot_totals_by_top10_units(args):
157157
title=title,
158158
name_label=name_label,
159159
data_label=data_label,
160+
bar_ylabel="Data Sources",
160161
)
161162

162163
image_path = shared.path_join(
@@ -193,7 +194,7 @@ def plot_totals_by_lowest10_units(args):
193194
"smithsonian_totals_by_units.csv",
194195
)
195196
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
196-
name_label = "Unit_name"
197+
name_label = "Data_source"
197198
data_label = "Total_objects"
198199
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
199200
data["Total_objects"] = data["Total_objects"].astype(int)
@@ -207,6 +208,7 @@ def plot_totals_by_lowest10_units(args):
207208
title=title,
208209
name_label=name_label,
209210
data_label=data_label,
211+
bar_ylabel="Data Sources",
210212
)
211213

212214
image_path = shared.path_join(
@@ -243,7 +245,7 @@ def plot_totals_by_top10_unit_records(args):
243245
"smithsonian_totals_by_records.csv",
244246
)
245247
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
246-
name_label = "Unit_name"
248+
name_label = "Data_source"
247249
data_label = "Total_objects"
248250
stack_labels = [
249251
"CC0_without_media_percentage",
@@ -260,6 +262,7 @@ def plot_totals_by_top10_unit_records(args):
260262
title=title,
261263
name_label=name_label,
262264
stack_labels=stack_labels,
265+
ylabel="Data Sources",
263266
)
264267
image_path = shared.path_join(
265268
PATHS["data_phase"], "smithsonian_by_top10_unit_records.png"
@@ -293,7 +296,7 @@ def plot_totals_by_lowest10_unit_records(args):
293296
"smithsonian_totals_by_records.csv",
294297
)
295298
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
296-
name_label = "Unit_name"
299+
name_label = "Data_source"
297300
data_label = "Total_objects"
298301
stack_labels = [
299302
"CC0_without_media_percentage",
@@ -310,6 +313,7 @@ def plot_totals_by_lowest10_unit_records(args):
310313
title=title,
311314
name_label=name_label,
312315
stack_labels=stack_labels,
316+
ylabel="Data Sources",
313317
)
314318
image_path = shared.path_join(
315319
PATHS["data_phase"], "smithsonian_by_lowest10_unit_records.png"

scripts/plot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def stacked_barh_plot(
208208
for current_left, width in zip(left, data[label])
209209
]
210210

211-
ax.set_xlabel("Number of works")
211+
ax.set_xlabel("Percentage of works")
212212
ax.xaxis.set_major_formatter(ticker.FuncFormatter(number_formatter))
213213
ax.set_yticks(range(len(data.index)))
214214
ax.set_yticklabels([wrap_label(label) for label in data.index])

0 commit comments

Comments
 (0)