Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 49 additions & 20 deletions beets/autotag/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,22 @@ class Info(AttrDict[Any]):

IGNORED_FIELDS: ClassVar[set[str]] = {"data_url"}
MEDIA_FIELD_MAP: ClassVar[dict[str, str]] = {}
LEGACY_TO_LIST_FIELD: ClassVar[dict[str, str]] = {"genre": "genres"}

@cached_classproperty
def nullable_fields(cls) -> set[str]:
"""Return fields that may be cleared when new metadata is applied."""
return set(config["overwrite_null"][cls.type.lower()].as_str_seq())

def __setitem__(self, key: str, value: Any) -> None:
# handle legacy info.str_field = "abc" and info["str_field"] = "abc"
if list_field := self.LEGACY_TO_LIST_FIELD.get(key):
self[list_field] = self._get_list_from_string_value(
key, list_field, value, self[list_field]
)
else:
super().__setitem__(key, value)

@property
def id(self) -> str | None:
"""Return the provider-specific identifier for this metadata object."""
Expand All @@ -165,6 +175,7 @@ def raw_data(self) -> JSONDict:
artist=self.artist_credit or self.artist,
artists=self.artists_credit or self.artists,
)

return correct_list_fields(data)

@cached_property
Expand Down Expand Up @@ -201,23 +212,10 @@ def __init__(
artists_sort: list[str] | None = None,
data_source: str | None = None,
data_url: str | None = None,
genre: str | None = None,
genres: list[str] | None = None,
media: str | None = None,
**kwargs,
) -> None:
if genre is not None:
deprecate_for_maintainers(
"The 'genre' parameter", "'genres' (list)", stacklevel=3
)
if not genres:
try:
sep = next(s for s in ["; ", ", ", " / "] if s in genre)
except StopIteration:
genres = [genre]
else:
genres = list(map(str.strip, genre.split(sep)))

self.album = album
self.artist = artist
self.artist_credit = artist_credit
Expand All @@ -229,11 +227,33 @@ def __init__(
self.artists_sort = artists_sort
self.data_source = data_source
self.data_url = data_url
self.genre = None
self.genres = genres
self.media = media
self.update(kwargs)

@staticmethod
def _get_list_from_string_value(
str_field: str,
list_field: str,
str_value: str | None,
list_value: list[str] | None,
) -> list[str] | None:
if str_value is not None:
deprecate_for_maintainers(
f"The '{str_field}' field",
f"'{list_field}' (list)",
stacklevel=3,
)
if not list_value:
try:
sep = next(s for s in ["; ", ", ", " / "] if s in str_value)
except StopIteration:
list_value = [str_value]
else:
list_value = list(map(str.strip, str_value.split(sep)))

return list_value


class AlbumInfo(Info):
"""Metadata snapshot representing a single album candidate.
Expand Down Expand Up @@ -368,6 +388,13 @@ class TrackInfo(Info):
"track_id": "mb_trackid",
"medium_index": "track",
}
LEGACY_TO_LIST_FIELD: ClassVar[dict[str, str]] = {
**Info.LEGACY_TO_LIST_FIELD,
"remixer": "remixers",
"lyricist": "lyricists",
"composer": "composers",
"arranger": "arrangers",
}

@property
def id(self) -> str | None:
Expand Down Expand Up @@ -402,41 +429,43 @@ def raw_data(self) -> JSONDict:
def __init__(
self,
*,
arranger: str | None = None,
arrangers: list[str] | None = None,
bpm: str | None = None,
composer: str | None = None,
composers: list[str] | None = None,
composer_sort: str | None = None,
disctitle: str | None = None,
index: int | None = None,
initial_key: str | None = None,
length: float | None = None,
lyricist: str | None = None,
lyricists: list[str] | None = None,
mb_workid: str | None = None,
medium: int | None = None,
medium_index: int | None = None,
medium_total: int | None = None,
release_track_id: str | None = None,
remixers: list[str] | None = None,
title: str | None = None,
track_alt: str | None = None,
track_id: str | None = None,
work: str | None = None,
work_disambig: str | None = None,
**kwargs,
) -> None:
self.arranger = arranger
self.arrangers = arrangers
self.bpm = bpm
self.composer = composer
self.composers = composers
self.composer_sort = composer_sort
self.disctitle = disctitle
self.index = index
self.initial_key = initial_key
self.length = length
self.lyricist = lyricist
self.lyricists = lyricists
self.mb_workid = mb_workid
self.medium = medium
self.medium_index = medium_index
self.medium_total = medium_total
self.release_track_id = release_track_id
self.remixers = remixers
self.title = title
self.track_alt = track_alt
self.track_id = track_id
Expand Down
2 changes: 1 addition & 1 deletion beets/dbcore/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def normalize(self, value: Any) -> list[str]:
else:
result.append(item)
return result
return list(value)
return self.model_type(value)

def to_sql(self, model_value: list[str]):
return self.db_delimiter.join(model_value)
Expand Down
4 changes: 4 additions & 0 deletions beets/library/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ class Library(dbcore.Database):
_migrations = (
(migrations.MultiGenreFieldMigration, (Item, Album)),
(migrations.LyricsMetadataInFlexFieldsMigration, (Item,)),
(migrations.MultiRemixerFieldMigration, (Item,)),
(migrations.MultiLyricistFieldMigration, (Item,)),
(migrations.MultiComposerFieldMigration, (Item,)),
(migrations.MultiArrangerFieldMigration, (Item,)),
)

def __init__(
Expand Down
104 changes: 72 additions & 32 deletions beets/library/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from contextlib import suppress
from functools import cached_property
from typing import TYPE_CHECKING, NamedTuple, TypeVar
from typing import TYPE_CHECKING, ClassVar, NamedTuple, TypeVar

from confuse.exceptions import ConfigError

Expand All @@ -21,71 +21,66 @@
T = TypeVar("T")


class GenreRow(NamedTuple):
id: int
genre: str
genres: str | None


def chunks(lst: list[T], n: int) -> Iterator[list[T]]:
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i : i + n]


class MultiGenreFieldMigration(Migration):
"""Backfill multi-value genres from legacy single-string genre data."""
class MultiValueFieldMigration(Migration):
"""Backfill multi-value field from legacy single-string values."""

str_field: ClassVar[str]
list_field: ClassVar[str]

@cached_property
def separators(self) -> list[str]:
"""Return known separators that indicate multiple legacy genres."""
separators = []
with suppress(ConfigError):
separators.append(beets.config["lastgenre"]["separator"].as_str())
return ["; ", ", ", " / "]

separators.extend(["; ", ", ", " / "])
return unique_list(filter(None, separators))

def get_genres(self, genre: str) -> str:
"""Normalize legacy genre separators to the canonical delimiter."""
def convert_to_list_value(self, str_value: str) -> str:
"""Normalize legacy str value separators to the canonical delimiter."""
for separator in self.separators:
if separator in genre:
return genre.replace(separator, MULTI_VALUE_DELIMITER)
if separator in str_value:
return str_value.replace(separator, MULTI_VALUE_DELIMITER)

return genre
return str_value

def _migrate_data(
self, model_cls: type[Model], current_fields: set[str]
) -> None:
"""Migrate legacy genre values to the multi-value genres field."""
if "genre" not in current_fields:
# No legacy genre field, so nothing to migrate.
"""Migrate legacy single-valued field to multi-valued field."""
str_field, list_field = self.str_field, self.list_field
if str_field not in current_fields:
# No legacy single-value field, so nothing to migrate.
return

table = model_cls._table

with self.db.transaction() as tx, self.with_row_factory(GenreRow):
rows: list[GenreRow] = tx.query( # type: ignore[assignment]
with self.db.transaction() as tx:
rows = tx.query( # type: ignore[assignment]
f"""
SELECT id, genre, genres
SELECT id, {str_field}, {list_field}
FROM {table}
WHERE genre IS NOT NULL AND genre != ''
WHERE {str_field} IS NOT NULL AND {str_field} != ''
"""
)

total = len(rows)
to_migrate = [e for e in rows if not e.genres]
to_migrate = [e for e in rows if not e[list_field]]
if not to_migrate:
return

migrated = total - len(to_migrate)

ui.print_(f"Migrating genres for {total} {table}...")
ui.print_(f"Migrating {list_field} for {total} {table}...")
for batch in chunks(to_migrate, 1000):
with self.db.transaction() as tx:
tx.mutate_many(
f"UPDATE {table} SET genres = ? WHERE id = ?",
[(self.get_genres(e.genre), e.id) for e in batch],
f"UPDATE {table} SET {list_field} = ? WHERE id = ?",
[
(self.convert_to_list_value(e[str_field]), e["id"])
for e in batch
],
)

migrated += len(batch)
Expand All @@ -98,6 +93,51 @@ def _migrate_data(
ui.print_(f"Migration complete: {migrated} of {total} {table} updated")


class MultiGenreFieldMigration(MultiValueFieldMigration):
"""Backfill multi-value genres from legacy single-string genre data."""

str_field = "genre"
list_field = "genres"

@cached_property
def separators(self) -> list[str]:
"""Return known separators that indicate multiple legacy genres."""
separators = []
with suppress(ConfigError):
separators.append(beets.config["lastgenre"]["separator"].as_str())

separators.extend(super().separators)
return unique_list(filter(None, separators))


class MultiRemixerFieldMigration(MultiValueFieldMigration):
"""Backfill multi-value remixers from legacy single-string remixer data."""

str_field = "remixer"
list_field = "remixers"


class MultiLyricistFieldMigration(MultiValueFieldMigration):
"""Backfill multi-value lyricists from legacy single-string lyricist data."""

str_field = "lyricist"
list_field = "lyricists"


class MultiComposerFieldMigration(MultiValueFieldMigration):
"""Backfill multi-value composers from legacy single-string composer data."""

str_field = "composer"
list_field = "composers"


class MultiArrangerFieldMigration(MultiValueFieldMigration):
"""Backfill multi-value arrangers from legacy single-string arranger data."""

str_field = "arranger"
list_field = "arrangers"


class LyricsRow(NamedTuple):
id: int
lyrics: str
Expand Down
8 changes: 4 additions & 4 deletions beets/library/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ class Item(LibModel):
"artists_sort": types.MULTI_VALUE_DSV,
"artist_credit": types.STRING,
"artists_credit": types.MULTI_VALUE_DSV,
"remixer": types.STRING,
"remixers": types.MULTI_VALUE_DSV,
"album": types.STRING,
"albumartist": types.STRING,
"albumartists": types.MULTI_VALUE_DSV,
Expand All @@ -656,13 +656,13 @@ class Item(LibModel):
"discogs_albumid": types.INTEGER,
"discogs_artistid": types.INTEGER,
"discogs_labelid": types.INTEGER,
"lyricist": types.STRING,
"composer": types.STRING,
"lyricists": types.MULTI_VALUE_DSV,
"composers": types.MULTI_VALUE_DSV,
"composer_sort": types.STRING,
"work": types.STRING,
"mb_workid": types.STRING,
"work_disambig": types.STRING,
"arranger": types.STRING,
"arrangers": types.MULTI_VALUE_DSV,
"grouping": types.STRING,
"year": types.PaddedInt(4),
"month": types.PaddedInt(2),
Expand Down
6 changes: 3 additions & 3 deletions beets/test/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ def item(lib=None, **kwargs):
albumartist="the album artist",
album="the album",
genres=["the genre"],
lyricist="the lyricist",
composer="the composer",
arranger="the arranger",
lyricists=["the lyricist"],
composers=["the composer"],
arrangers=["the arranger"],
grouping="the grouping",
work="the work title",
mb_workid="the work musicbrainz id",
Expand Down
2 changes: 1 addition & 1 deletion beetsplug/aura.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"genres": "genres",
"recording-mbid": "mb_trackid", # beets trackid is MB recording
"track-mbid": "mb_releasetrackid",
"composer": "composer",
"composer": "composers",
"albumartist": "albumartist",
"comments": "comments",
# Optional for Audio Metadata
Expand Down
2 changes: 1 addition & 1 deletion beetsplug/bpd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1357,7 +1357,7 @@ def cmd_decoders(self, conn):
"Genre": "genres",
"Date": "year",
"OriginalDate": "original_year",
"Composer": "composer",
"Composer": "composers",
"Disc": "disc",
"Comment": "comments",
"MUSICBRAINZ_TRACKID": "mb_trackid",
Expand Down
Loading
Loading