Skip to content

Instantly share code, notes, and snippets.

View tdiggelm's full-sized avatar

Thomas Diggelmann tdiggelm

  • ETH Zürich
  • Zurich
View GitHub Profile
@tdiggelm
tdiggelm / union_root_model.py
Created September 30, 2024 21:42
Pydantic union root model
from types import UnionType
from typing import Annotated, Any, ClassVar, TypeVar, Union, get_args, get_origin
from pydantic import Discriminator, Field, RootModel
T = TypeVar("T")
__all__ = ["UnionRootModel"]
from logging import getLogger
from functools import wraps, lru_cache
from typing import Callable, Any
from datetime import datetime
log = getLogger(__name__)
def ttl_cache(
@tdiggelm
tdiggelm / pydantic_notrequired.ipynb
Created January 31, 2024 22:45
Pydantic_NotRequired.ipynb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@tdiggelm
tdiggelm / analyzer.py
Created March 24, 2020 00:31
Analyzer that does a few sane default transformations
import re
import nltk
class Analyzer:
''' This analyzer applies the following transformation to an input text:
1. tokenize text into words
2. lowercase transform words
3. stem words
4. filter stopwords
5. replace all numbers with _num_
@tdiggelm
tdiggelm / tpu_test.py
Created December 3, 2019 10:15
Test TPU
! pip3 install nltk --user
import os
import tensorflow as tf
import numpy as np
print(tf.__version__)
import nltk
nltk.download('movie_reviews')
nltk.download('punkt')
@tdiggelm
tdiggelm / tokenization.py
Last active November 14, 2019 09:14
FullTokenizer for Albert (Tensorflow 2.0 compatible)
# coding=utf-8
# Copyright 2019 The Google Research Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
@tdiggelm
tdiggelm / tf_glove_embeddings.py
Last active October 21, 2019 22:42
Helper functions to initialise Embedding layer with pre-trained GloVe embeddings.
import zipfile
from io import TextIOWrapper
import numpy as np
import tensorflow as tf
import os
GLOVE_EMBEDDINGS = {
'glove.6B.50d' : ('http://nlp.stanford.edu/data/glove.6B.zip' , 50),
'glove.6B.100d' : ('http://nlp.stanford.edu/data/glove.6B.zip' , 100),
'glove.6B.200d' : ('http://nlp.stanford.edu/data/glove.6B.zip' , 200),
@tdiggelm
tdiggelm / tf_multiheadattention.py
Last active October 25, 2019 08:10
Tensorflow 2.0 compatible version of MultiHeadAttention as in BERT from paper "Attention Is All You Need", see https://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf
import tensorflow as tf
class MultiHeadAttention(tf.keras.layers.Layer):
""" Mirrors the implementation from paper 'Attention Is All You Need' and
corresponding source code in
https://github.com/google-research/bert/blob/master/modeling.py. """
def __init__(self,
size_per_head=16,
num_attention_heads=12,
dropout_rate=0,
@tdiggelm
tdiggelm / catplot.py
Last active February 20, 2019 10:53
categorical scatter plot with matplotlib
import numpy as np
import matplotlib.pyplot as plt
def catplot(x, y, c, labels=None, title=None, n_categories=None,
s=10, alpha=0.65, cmap='rainbow', fig=None, ax=None,
border={'color': '0.7', 'linewidth': 1}, facecolor='white'):
if not fig:
fig, ax = plt.subplots(1, 1)
try:
N = int(n_categories)
except:
@tdiggelm
tdiggelm / gist:bee47b4fda60564118dbec3f32f8d4e8
Last active February 14, 2017 11:48
Textmate: replace latex url with markdown url
Find: \\url{([^}]+)}
Replace: <$1>