+9
README.md
+9
README.md
+87
examples/cat_videos_feed.json
+87
examples/cat_videos_feed.json
···
1
+
{
2
+
"order": "blend",
3
+
"manifest": {
4
+
"filter": {
5
+
"and": [
6
+
{
7
+
"embed_type": [
8
+
"==",
9
+
"video"
10
+
]
11
+
},
12
+
{
13
+
"or": [
14
+
{
15
+
"regex_any": [
16
+
"text",
17
+
[
18
+
"catsky",
19
+
"kittensky"
20
+
],
21
+
true,
22
+
false
23
+
]
24
+
},
25
+
{
26
+
"and": [
27
+
{
28
+
"regex_any": [
29
+
"text",
30
+
[
31
+
"photography",
32
+
"photo"
33
+
],
34
+
true,
35
+
false
36
+
]
37
+
},
38
+
{
39
+
"regex_matches": [
40
+
"text",
41
+
"\\b(cat|kitten)s?",
42
+
true
43
+
]
44
+
}
45
+
]
46
+
}
47
+
]
48
+
},
49
+
{
50
+
"entity_excludes": [
51
+
"hashtags",
52
+
[
53
+
"Furry",
54
+
"Anthro"
55
+
]
56
+
]
57
+
},
58
+
{
59
+
"regex_negation_matches": [
60
+
"text",
61
+
"\\bmemes?\\b",
62
+
true
63
+
]
64
+
},
65
+
{
66
+
"content_moderation": [
67
+
"sexual",
68
+
"<",
69
+
0.1
70
+
]
71
+
}
72
+
]
73
+
}
74
+
},
75
+
"custom_sort_settings": {
76
+
"time_window": 72,
77
+
"decay_penalty": 0.15,
78
+
"like_count_multiplier": 1,
79
+
"reply_count_multiplier": 1,
80
+
"repost_count_multiplier": 1,
81
+
"reader_like_count_multiplier": 1,
82
+
"reader_reply_count_multiplier": 0,
83
+
"reader_repost_count_multiplier": 0,
84
+
"request_less_count_multiplier": 1,
85
+
"request_more_count_multiplier": 1
86
+
}
87
+
}
+64
examples/minimal_or_group.py
+64
examples/minimal_or_group.py
···
1
+
from pasturepy.core import FeedConfig
2
+
from pasturepy.nodes.text import TextNode
3
+
from pasturepy.nodes.entity import EntityNode
4
+
from pasturepy.nodes.embed import EmbedNode
5
+
from pasturepy.nodes.ml import MLNode
6
+
7
+
# Create the main feed config
8
+
feed = FeedConfig(order="blend")
9
+
10
+
# Require video embeds
11
+
EmbedNode.embed(
12
+
feed.filters, comparison="==", embed_type="video"
13
+
)
14
+
15
+
# Create an OR group for what we DO want (cats)
16
+
include_cats = feed.filters.nest_filters("or")
17
+
18
+
# Option 1: Posts with keywords in text
19
+
TextNode.word_list(
20
+
include_cats, method="regex_any", field="text",
21
+
terms=["catsky", "kittensky"]
22
+
)
23
+
24
+
# Option 2: Photography hashtag + generic cat mention
25
+
photo_cats = include_cats.nest_filters("and")
26
+
TextNode.word_list(
27
+
photo_cats, method="regex_any", field="text",
28
+
terms=["photography", "photo"]
29
+
)
30
+
TextNode.regex(
31
+
photo_cats, method="regex_matches", field="text",
32
+
term=r"\b(cat|kitten)s?"
33
+
)
34
+
35
+
# Exclude certain hashtags
36
+
EntityNode.entity(
37
+
feed.filters, method="entity_excludes", entity_type="hashtags",
38
+
terms=["Furry", "Anthro"]
39
+
)
40
+
41
+
# Exclude meme posts
42
+
TextNode.regex(
43
+
feed.filters, method="regex_negation_matches", field="text",
44
+
term=r"\bmemes?\b"
45
+
)
46
+
47
+
# Moderate NSFW content
48
+
MLNode.content_moderation(
49
+
feed.filters, content_type="sexual", comparison="<", value=0.1
50
+
)
51
+
52
+
# Set blend sorting
53
+
feed.set_sort_order("blend")
54
+
feed.set_sort_settings(
55
+
like_count_multiplier=1,
56
+
reply_count_multiplier=1,
57
+
repost_count_multiplier=1,
58
+
decay_penalty=0.15,
59
+
time_window=72
60
+
)
61
+
62
+
# Generate the JSON file
63
+
feed.generate("cat_videos_feed.json")
64
+
print("✓ Feed generated: cat_videos_feed.json")
+17
pasturepy/__init__.py
+17
pasturepy/__init__.py
···
1
+
from pasturepy.core import FeedConfig, FilterGroup, SortSettings
2
+
from pasturepy.nodes.text import TextNode
3
+
from pasturepy.nodes.entity import EntityNode
4
+
from pasturepy.nodes.embed import EmbedNode
5
+
from pasturepy.nodes.ml import MLNode
6
+
7
+
__version__ = "0.1.0"
8
+
9
+
__all__ = [
10
+
"FeedConfig",
11
+
"FilterGroup",
12
+
"SortSettings",
13
+
"TextNode",
14
+
"EntityNode",
15
+
"EmbedNode",
16
+
"MLNode",
17
+
]
+54
pasturepy/constants/__init__.py
+54
pasturepy/constants/__init__.py
···
1
+
from .fields import (
2
+
TEXT_FIELDS,
3
+
OPTION_FIELDS,
4
+
NUMBER_FIELDS,
5
+
ENTITY_TYPES,
6
+
EMBED_TYPES
7
+
)
8
+
from .graze_json import (
9
+
REGEX_METHODS,
10
+
WORD_METHODS,
11
+
ENTITY_METHODS,
12
+
COMPARISONS,
13
+
EMBED_COMPARISONS,
14
+
MEMBER_TYPES,
15
+
SOCIAL_LISTS,
16
+
)
17
+
18
+
from .values import (
19
+
CONTENT_MODS,
20
+
IMAGE_MODS,
21
+
TOPICS,
22
+
LANGUAGES,
23
+
SENTIMENTS,
24
+
TOXICITY,
25
+
EMOTIONS,
26
+
CATEGORIES,
27
+
SPAM_TYPE,
28
+
LABELS,
29
+
)
30
+
31
+
__all__ = [
32
+
"TEXT_FIELDS",
33
+
"OPTION_FIELDS",
34
+
"NUMBER_FIELDS",
35
+
"ENTITY_TYPES",
36
+
"EMBED_TYPES",
37
+
"REGEX_METHODS",
38
+
"WORD_METHODS",
39
+
"ENTITY_METHODS",
40
+
"COMPARISONS",
41
+
"EMBED_COMPARISONS",
42
+
"MEMBER_TYPES",
43
+
"SOCIAL_LISTS",
44
+
"CONTENT_MODS",
45
+
"IMAGE_MODS",
46
+
"TOPICS",
47
+
"LANGUAGES",
48
+
"SENTIMENTS",
49
+
"TOXICITY",
50
+
"EMOTIONS",
51
+
"CATEGORIES",
52
+
"SPAM_TYPE",
53
+
"LABELS",
54
+
]
+52
pasturepy/constants/fields.py
+52
pasturepy/constants/fields.py
···
1
+
TEXT_FIELDS = {
2
+
"text",
3
+
# Alt text
4
+
"embed.alt", # video alt
5
+
"embed.media.alt", # video alt if qp
6
+
"embed.images[*].alt", # image alt
7
+
"embed.media.images[*].alt", # image alt if qp
8
+
"embed.external.description", # link alt
9
+
"embed.media.external.description", # link alt if qp
10
+
"inferences.video.audio_transcription.text", # video captions
11
+
# User bio, display name, handle
12
+
"hydrated_metadata.user.description",
13
+
"hydrated_metadata.user.handle",
14
+
"hydrated_metadata.user.display_name",
15
+
#"hydrated_metadata.mentions.[*].description",
16
+
#"hydrated_metadata.mentions.[*].handle",
17
+
#"hydrated_metadata.mentions.[*].display_name",
18
+
"hydrated_metadata.parent_post.author.description",
19
+
"hydrated_metadata.parent_post.author.handle",
20
+
"hydrated_metadata.parent_post.author.display_name",
21
+
"hydrated_metadata.quote_post.author.description",
22
+
"hydrated_metadata.quote_post.author.handle",
23
+
"hydrated_metadata.quote_post.author.display_name",
24
+
"hydrated_metadata.reply_post.author.description",
25
+
"hydrated_metadata.reply_post.author.handle",
26
+
"hydrated_metadata.reply_post.author.display_name",
27
+
}
28
+
29
+
# when it's a text field with select options, not open-ended
30
+
OPTION_FIELDS = {
31
+
# Labels
32
+
"hydrated_metadata.user.labels[*].val",
33
+
#"hydrated_metadata.mentions.[*].labels[*].val",
34
+
"hydrated_metadata.parent_post.*.labels[*].val",
35
+
"hydrated_metadata.quote_post.*.labels[*].val",
36
+
"hydrated_metadata.reply_post.*.labels[*].val"
37
+
}
38
+
39
+
NUMBER_FIELDS = {
40
+
# Followers, following
41
+
"hydrated_metadata.user.followers_count"
42
+
}
43
+
44
+
ENTITY_TYPES = {
45
+
"hashtags", "langs", "urls",
46
+
"mentions", "domains","labels"
47
+
}
48
+
49
+
EMBED_TYPES = {
50
+
"image", "link", "post",
51
+
"image_group", "video", "gif"
52
+
}
+8
pasturepy/constants/graze_json.py
+8
pasturepy/constants/graze_json.py
···
1
+
# Methods/operators that Graze uses for processing
2
+
REGEX_METHODS = {"regex_matches", "regex_negation_matches"}
3
+
WORD_METHODS = {"regex_any", "regex_none"}
4
+
ENTITY_METHODS = {"entity_excludes", "entity_matches"}
5
+
COMPARISONS = {">=", "<=", ">", "<", "==", "!="}
6
+
MEMBER_TYPES = {"in", "not_in"}
7
+
SOCIAL_LISTS = {"follows", "followers"}
8
+
EMBED_COMPARISONS = {"==", "!="}
+43
pasturepy/constants/values.py
+43
pasturepy/constants/values.py
···
1
+
# Values for ML filters (moderation, sentiment, etc.)
2
+
CONTENT_MODS = {"OK", "sexual", "sexual/minors", "violence/graphic",
3
+
"violence", "self-harm", "hate", "hate/threatening",
4
+
"harassment"}
5
+
IMAGE_MODS = {"SFW", "NSFW"}
6
+
TOPICS = {"Arts & Culture", "Business & Entrepreneurs",
7
+
"Celebrity & Pop Culture", "Diaries & Daily Life",
8
+
"Family", "Fashion & Style", "Film, TV & Video",
9
+
"Fitness & Health", "Food & Dining", "Gaming",
10
+
"Learning & Educational", "Music",
11
+
"News & Social Concern", "Other Hobbies",
12
+
"Relationships", "Science & Technology", "Sports",
13
+
"Travel & Adventure", "Youth & Student Life"}
14
+
LANGUAGES = {"English", "Japanese", "Dutch", "Arabic", "Polish",
15
+
"German", "Italian", "Portuguese", "Turkish",
16
+
"Spanish", "Hindi", "Greek", "Urdu", "Bulgarian",
17
+
"French", "Chinese", "Russian", "Thai", "Swahili",
18
+
"Vietnamese"}
19
+
SENTIMENTS = {"Positive", "Negative", "Neutral"}
20
+
TOXICITY = {"Toxic", "Severe Toxicity", "Obscene", "Threat",
21
+
"Insult", "Identity Hate"}
22
+
EMOTIONS = {"Admiration", "Amusement", "Anger", "Annoyance",
23
+
"Approval", "Caring", "Confusion", "Curiosity",
24
+
"Desire", "Disappointment", "Disapproval", "Disgust",
25
+
"Embarrassment", "Excitement", "Fear", "Gratitude",
26
+
"Grief", "Joy", "Love", "Nervousness", "Optimism",
27
+
"Pride", "Realization", "Relief", "Remorse",
28
+
"Sadness", "Surprise", "Neutral"}
29
+
CATEGORIES = {"Academic & Intellectual", "Adult & Sexual Content",
30
+
"AI & Machine Learning", "Animals", "Arts & Creative",
31
+
"Aviation & Maritime", "Data & Computing",
32
+
"Entertainment & Culture", "Film & TV",
33
+
"Food & Beverages", "Food & Lifestyle",
34
+
"Game Development", "Gaming", "Healthcare & Medicine",
35
+
"Medical Education", "Medical Specialties", "Music",
36
+
"Nature & Outdoors", "News & Media", "Politics",
37
+
"Programming", "Society & Culture",
38
+
"Software Development", "Sports", "Visual Arts"}
39
+
SPAM_TYPE = {"Marketing Spam", "Organic Content"}
40
+
LABELS = {"porn", "sexual", "nudity", "graphic-media",
41
+
"spam", "rude", "!no-unauthenticated",
42
+
"bridged-from-bridgy-fed-web",
43
+
"bridged-from-bridgy-fed-activitypub"}
+5
pasturepy/core/__init__.py
+5
pasturepy/core/__init__.py
+50
pasturepy/core/config.py
+50
pasturepy/core/config.py
···
1
+
from typing import Dict, Any, Optional, Literal
2
+
from .filter import FilterGroup
3
+
from .sort import SortSettings
4
+
5
+
OrderType = Literal["new", "trending", "blend"]
6
+
7
+
class FeedConfig:
8
+
"""Configure feed filtering and sorting settings."""
9
+
10
+
def __init__(self, order: OrderType = "blend"):
11
+
self.order = order
12
+
self.filters = FilterGroup("and")
13
+
self._sort_settings = None
14
+
15
+
def set_sort_order(self, order: OrderType) -> 'FeedConfig':
16
+
"""Set the feed sorting order."""
17
+
self.order = order
18
+
if order == "new":
19
+
self._sort_settings = None
20
+
elif order in ["trending", "blend"] and self._sort_settings is None:
21
+
self._sort_settings = SortSettings()
22
+
return self
23
+
24
+
def set_sort_settings(self, **kwargs) -> 'FeedConfig':
25
+
"""Set custom sort settings for trending/blend orders."""
26
+
if self.order == "new":
27
+
raise ValueError("No settings needed for 'new' order type")
28
+
29
+
from .sort import SortSettings
30
+
self._sort_settings = SortSettings(**kwargs)
31
+
return self
32
+
33
+
def to_dict(self) -> Dict[str, Any]:
34
+
"""Convert to Graze JSON format."""
35
+
config = {
36
+
"order": self.order,
37
+
"manifest": {"filter": self.filters.to_dict()}
38
+
}
39
+
if self._sort_settings:
40
+
config["custom_sort_settings"] = self._sort_settings.to_dict()
41
+
return config
42
+
43
+
def generate(self, output_file: Optional[str] = None) -> Dict[str, Any]:
44
+
"""Generate config and optionally write to file."""
45
+
config = self.to_dict()
46
+
if output_file:
47
+
import json
48
+
with open(output_file, 'w', encoding='utf-8') as f:
49
+
json.dump(config, f, indent=2)
50
+
return config
+27
pasturepy/core/filter.py
+27
pasturepy/core/filter.py
···
1
+
from typing import Dict, Any, List, Union, Literal
2
+
3
+
LogicType = Literal["and", "or"]
4
+
5
+
class FilterGroup:
6
+
"""Represents a group of filters connected by a logical operator."""
7
+
8
+
def __init__(self, logic_type: LogicType):
9
+
self.logic_type = logic_type
10
+
self.filters: List[Union[Dict[str, Any], 'FilterGroup']] = []
11
+
12
+
def nest_filters(self, logic_type: LogicType) -> 'FilterGroup':
13
+
"""Create a nested filter group."""
14
+
nested = FilterGroup(logic_type)
15
+
self.filters.append(nested)
16
+
return nested
17
+
18
+
def add_filter(self, filter_dict: Dict[str, Any]) -> 'FilterGroup':
19
+
"""Add a filter dictionary."""
20
+
self.filters.append(filter_dict)
21
+
return self
22
+
23
+
def to_dict(self) -> Dict[str, Any]:
24
+
"""Convert to JSON format."""
25
+
filters = [f.to_dict() if isinstance(f, FilterGroup) else f
26
+
for f in self.filters]
27
+
return {self.logic_type: filters} if filters else {}
+27
pasturepy/core/sort.py
+27
pasturepy/core/sort.py
···
1
+
from typing import Dict, Any, Union
2
+
3
+
Number = Union[int, float]
4
+
5
+
class SortSettings:
6
+
"""Configure sort settings for trending and blend orders."""
7
+
8
+
DEFAULTS = {
9
+
'time_window': 168,
10
+
'decay_penalty': 0.4,
11
+
'like_count_multiplier': 1,
12
+
'reply_count_multiplier': 1,
13
+
'repost_count_multiplier': 1,
14
+
'reader_like_count_multiplier': 1,
15
+
'reader_reply_count_multiplier': 0,
16
+
'reader_repost_count_multiplier': 0,
17
+
"request_less_count_multiplier": 1,
18
+
"request_more_count_multiplier": 1
19
+
}
20
+
21
+
def __init__(self, **kwargs):
22
+
for key, default in self.DEFAULTS.items():
23
+
setattr(self, key, kwargs.get(key, default))
24
+
25
+
def to_dict(self) -> Dict[str, Any]:
26
+
"""Convert to JSON format."""
27
+
return {key: getattr(self, key) for key in self.DEFAULTS.keys()}
+6
pasturepy/nodes/__init__.py
+6
pasturepy/nodes/__init__.py
+16
pasturepy/nodes/embed.py
+16
pasturepy/nodes/embed.py
···
1
+
from pasturepy.constants.fields import EMBED_TYPES
2
+
from pasturepy.constants.graze_json import EMBED_COMPARISONS
3
+
4
+
class EmbedNode:
5
+
6
+
@staticmethod
7
+
def embed(filter_group, comparison: str, embed_type: str):
8
+
"""Filter by embeds (images, videos, gifs, links)"""
9
+
if embed_type not in EMBED_TYPES:
10
+
raise ValueError(f"Invalid method '{embed_type}'. Must be one of {EMBED_TYPES}")
11
+
if comparison not in EMBED_COMPARISONS:
12
+
raise ValueError(f"Invalid entity_type '{comparison}'. Must be one of {EMBED_COMPARISONS}")
13
+
14
+
return filter_group.add_filter({
15
+
"embed_type": [comparison, embed_type]
16
+
})
+16
pasturepy/nodes/entity.py
+16
pasturepy/nodes/entity.py
···
1
+
from pasturepy.constants.fields import ENTITY_TYPES
2
+
from pasturepy.constants.graze_json import ENTITY_METHODS
3
+
4
+
class EntityNode:
5
+
6
+
@staticmethod
7
+
def entity(filter_group, method: str, entity_type: str, terms: list):
8
+
"""Filter by entities (hashtags, mentions, domains, etc.)."""
9
+
if method not in ENTITY_METHODS:
10
+
raise ValueError(f"Invalid method '{method}'. Must be one of {ENTITY_METHODS}")
11
+
if entity_type not in ENTITY_TYPES:
12
+
raise ValueError(f"Invalid entity_type '{entity_type}'. Must be one of {ENTITY_TYPES}")
13
+
14
+
return filter_group.add_filter({
15
+
method: [entity_type, terms]
16
+
})
+129
pasturepy/nodes/ml.py
+129
pasturepy/nodes/ml.py
···
1
+
from pasturepy.constants.graze_json import COMPARISONS
2
+
from pasturepy.constants.values import (
3
+
CONTENT_MODS, IMAGE_MODS, TOPICS, LANGUAGES,
4
+
SENTIMENTS, TOXICITY, EMOTIONS, CATEGORIES, SPAM_TYPE
5
+
)
6
+
7
+
class MLNode:
8
+
9
+
@staticmethod
10
+
def _validate_comparison(comparison: str) -> None:
11
+
if comparison not in COMPARISONS:
12
+
raise ValueError(f"Invalid comparison '{comparison}'. Must be one of {COMPARISONS}")
13
+
14
+
@staticmethod
15
+
def _validate_probability(value: float) -> None:
16
+
if not (0 <= value <= 1):
17
+
raise ValueError(f"Probability must be between 0 and 1, got {value}")
18
+
if round(value, 2) != value:
19
+
raise ValueError("Probability can only have up to 2 decimal places")
20
+
21
+
@staticmethod
22
+
def content_moderation(filter_group, content_type: str, comparison: str, value: float):
23
+
if content_type not in CONTENT_MODS:
24
+
raise ValueError(f"Invalid content_type. Must be one of {CONTENT_MODS}")
25
+
MLNode._validate_comparison(comparison)
26
+
MLNode._validate_probability(value)
27
+
28
+
return filter_group.add_filter({
29
+
"content_moderation": [content_type, comparison, value]
30
+
})
31
+
32
+
@staticmethod
33
+
def image_nsfw(filter_group, mod_type: str, comparison: str, value: float):
34
+
if mod_type not in IMAGE_MODS:
35
+
raise ValueError(f"Invalid mod_type. Must be one of {IMAGE_MODS}")
36
+
MLNode._validate_comparison(comparison)
37
+
MLNode._validate_probability(value)
38
+
39
+
return filter_group.add_filter({
40
+
"image_nsfw": [mod_type, comparison, value]
41
+
})
42
+
43
+
@staticmethod
44
+
def language(filter_group, language: str, comparison: str, value: float):
45
+
if language not in LANGUAGES:
46
+
raise ValueError(f"Invalid language. Must be one of {LANGUAGES}")
47
+
MLNode._validate_comparison(comparison)
48
+
MLNode._validate_probability(value)
49
+
50
+
return filter_group.add_filter({
51
+
"language_analysis": [language, comparison, value]
52
+
})
53
+
54
+
@staticmethod
55
+
def sentiment(filter_group, sentiment: str, comparison: str, value: float):
56
+
if sentiment not in SENTIMENTS:
57
+
raise ValueError(f"Invalid sentiment. Must be one of {SENTIMENTS}")
58
+
MLNode._validate_comparison(comparison)
59
+
MLNode._validate_probability(value)
60
+
61
+
return filter_group.add_filter({
62
+
"sentiment_analysis": [sentiment, comparison, value]
63
+
})
64
+
65
+
@staticmethod
66
+
def toxicity(filter_group, toxicity_type: str, comparison: str, value: float):
67
+
if toxicity_type not in TOXICITY:
68
+
raise ValueError(f"Invalid toxicity_type. Must be one of {TOXICITY}")
69
+
MLNode._validate_comparison(comparison)
70
+
MLNode._validate_probability(value)
71
+
72
+
return filter_group.add_filter({
73
+
"toxicity_analysis": [toxicity_type, comparison, value]
74
+
})
75
+
76
+
@staticmethod
77
+
def topic(filter_group, topic_type: str, comparison: str, value: float):
78
+
if topic_type not in TOPICS:
79
+
raise ValueError(f"Invalid topic_type. Must be one of {TOPICS}")
80
+
MLNode._validate_comparison(comparison)
81
+
MLNode._validate_probability(value)
82
+
83
+
return filter_group.add_filter({
84
+
"topic_analysis": [topic_type, comparison, value]
85
+
})
86
+
87
+
@staticmethod
88
+
def emotion(filter_group, emotion: str, comparison: str, value: float):
89
+
if emotion not in EMOTIONS:
90
+
raise ValueError(f"Invalid emotion. Must be one of {EMOTIONS}")
91
+
MLNode._validate_comparison(comparison)
92
+
MLNode._validate_probability(value)
93
+
94
+
return filter_group.add_filter({
95
+
"topic_analysis": [emotion, comparison, value]
96
+
})
97
+
98
+
@staticmethod
99
+
def spam(filter_group, marketing_type: str, comparison: str, value: float):
100
+
if marketing_type not in SPAM_TYPE:
101
+
raise ValueError(f"Invalid marketing_type. Must be one of {SPAM_TYPE}")
102
+
MLNode._validate_comparison(comparison)
103
+
MLNode._validate_probability(value)
104
+
105
+
return filter_group.add_filter({
106
+
"marketing_check": [marketing_type, comparison, value]
107
+
})
108
+
109
+
@staticmethod
110
+
def img_category(filter_group, img_topic: str, comparison: str, value: float):
111
+
if img_topic not in CATEGORIES:
112
+
raise ValueError(f"Invalid img_topic. Must be one of {CATEGORIES}")
113
+
MLNode._validate_comparison(comparison)
114
+
MLNode._validate_probability(value)
115
+
116
+
return filter_group.add_filter({
117
+
"image_arbitary": [img_topic, comparison, value]
118
+
})
119
+
120
+
@staticmethod
121
+
def txt_category(filter_group, txt_topic: str, comparison: str, value: float):
122
+
if txt_topic not in CATEGORIES:
123
+
raise ValueError(f"Invalid txt_topic. Must be one of {CATEGORIES}")
124
+
MLNode._validate_comparison(comparison)
125
+
MLNode._validate_probability(value)
126
+
127
+
return filter_group.add_filter({
128
+
"text_arbitary": [txt_topic, comparison, value]
129
+
})
+34
pasturepy/nodes/text.py
+34
pasturepy/nodes/text.py
···
1
+
from pasturepy.constants.fields import TEXT_FIELDS, OPTION_FIELDS
2
+
from pasturepy.constants.graze_json import REGEX_METHODS, WORD_METHODS
3
+
4
+
class TextNode:
5
+
6
+
@staticmethod
7
+
def _validate_field(field: str) -> None:
8
+
if field not in (TEXT_FIELDS | OPTION_FIELDS):
9
+
raise ValueError(
10
+
f"Invalid text field '{field}'. "
11
+
f"Must be one of: {', '.join(sorted(TEXT_FIELDS | OPTION_FIELDS))}"
12
+
)
13
+
14
+
@staticmethod
15
+
def word_list(filter_group, method: str, field: str, terms: list,
16
+
ignore_case: bool = True, regex_list: bool = False):
17
+
if method not in WORD_METHODS:
18
+
raise ValueError(f"Invalid method '{method}'. Must be one of {WORD_METHODS}")
19
+
TextNode._validate_field(field)
20
+
21
+
return filter_group.add_filter({
22
+
method: [field, terms, ignore_case, regex_list]
23
+
})
24
+
25
+
@staticmethod
26
+
def regex(filter_group, method: str, field: str, term: str,
27
+
ignore_case: bool = True):
28
+
if method not in REGEX_METHODS:
29
+
raise ValueError(f"Invalid method '{method}'. Must be one of {REGEX_METHODS}")
30
+
TextNode._validate_field(field)
31
+
32
+
return filter_group.add_filter({
33
+
method: [field, term, ignore_case]
34
+
})